--- /dev/null
+wifi: ath11k: Fix DMA buffer allocation to resolve SWIOTLB issues
+Currently, the driver allocates cacheable DMA buffers for rings like
+HAL_REO_DST and HAL_WBM2SW_RELEASE. The buffers for HAL_WBM2SW_RELEASE
+are large (1024 KiB), exceeding the SWIOTLB slot size of 256 KiB. This
+leads to "swiotlb buffer is full" error messages on systems without an
+IOMMU that use SWIOTLB, causing driver initialization failures. The driver
+calls dma_map_single() with these large buffers obtained from kzalloc(),
+resulting in ring initialization errors on systems without an IOMMU that
+use SWIOTLB.
+
+To address these issues, replace the flawed buffer allocation mechanism
+with the appropriate DMA API. Specifically, use dma_alloc_noncoherent()
+for cacheable DMA buffers, ensuring proper freeing of buffers with
+dma_free_noncoherent().
+
+Error log:
+[ 10.194343] ath11k_pci 0000:04:00.0: swiotlb buffer is full (sz:1048583 bytes), total 32768 (slots), used 2529 (slots)
+[ 10.194406] ath11k_pci 0000:04:00.0: failed to set up tcl_comp ring (0) :-12
+[ 10.194781] ath11k_pci 0000:04:00.0: failed to init DP: -12
+
+Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+
+Reported-by: Tim Harvey <tharvey@gateworks.com>
+Closes: https://lore.kernel.org/all/20241210041133.GA17116@lst.de/
+Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
+Tested-by: Tim Harvey <tharvey@gateworks.com>
+Link: https://patch.msgid.link/20250119164219.647059-2-quic_ppranees@quicinc.com
+Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
+--- a/drivers/net/wireless/ath/ath11k/dp.c
++++ b/drivers/net/wireless/ath/ath11k/dp.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: BSD-3-Clause-Clear
+ /*
+ * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+ #include <crypto/hash.h>
+@@ -104,14 +104,12 @@ void ath11k_dp_srng_cleanup(struct ath11
+ if (!ring->vaddr_unaligned)
+ return;
+
+- if (ring->cached) {
+- dma_unmap_single(ab->dev, ring->paddr_unaligned, ring->size,
+- DMA_FROM_DEVICE);
+- kfree(ring->vaddr_unaligned);
+- } else {
++ if (ring->cached)
++ dma_free_noncoherent(ab->dev, ring->size, ring->vaddr_unaligned,
++ ring->paddr_unaligned, DMA_FROM_DEVICE);
++ else
+ dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
+ ring->paddr_unaligned);
+- }
+
+ ring->vaddr_unaligned = NULL;
+ }
+@@ -249,25 +247,14 @@ int ath11k_dp_srng_setup(struct ath11k_b
+ default:
+ cached = false;
+ }
+-
+- if (cached) {
+- ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+- if (!ring->vaddr_unaligned)
+- return -ENOMEM;
+-
+- ring->paddr_unaligned = dma_map_single(ab->dev,
+- ring->vaddr_unaligned,
+- ring->size,
+- DMA_FROM_DEVICE);
+- if (dma_mapping_error(ab->dev, ring->paddr_unaligned)) {
+- kfree(ring->vaddr_unaligned);
+- ring->vaddr_unaligned = NULL;
+- return -ENOMEM;
+- }
+- }
+ }
+
+- if (!cached)
++ if (cached)
++ ring->vaddr_unaligned = dma_alloc_noncoherent(ab->dev, ring->size,
++ &ring->paddr_unaligned,
++ DMA_FROM_DEVICE,
++ GFP_KERNEL);
++ else
+ ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
+ &ring->paddr_unaligned,
+ GFP_KERNEL);
--- /dev/null
+wifi: ath11k: Use dma_alloc_noncoherent for rx_tid buffer allocation
+
+Currently, the driver allocates cacheable DMA buffers for the rx_tid
+structure using kzalloc() and dma_map_single(). These buffers are
+long-lived and can persist for the lifetime of the peer, which is not
+advisable. Instead of using kzalloc() and dma_map_single() for allocating
+cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the
+allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid.
+Since dma_alloc_noncoherent() returns unaligned physical and virtual
+addresses, align them internally before use within the driver. This
+ensures proper allocation of non-coherent memory through the kernel
+helper.
+
+Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+
+Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
+--- a/drivers/net/wireless/ath/ath11k/dp.h
++++ b/drivers/net/wireless/ath/ath11k/dp.h
+@@ -1,7 +1,7 @@
+ /* SPDX-License-Identifier: BSD-3-Clause-Clear */
+ /*
+ * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+ #ifndef ATH11K_DP_H
+@@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp;
+
+ struct dp_rx_tid {
+ u8 tid;
+- u32 *vaddr;
+ dma_addr_t paddr;
+ u32 size;
+ u32 ba_win_sz;
+@@ -37,6 +36,9 @@ struct dp_rx_tid {
+ /* Timer info related to fragments */
+ struct timer_list frag_timer;
+ struct ath11k_base *ab;
++ u32 *vaddr_unaligned;
++ dma_addr_t paddr_unaligned;
++ u32 unaligned_size;
+ };
+
+ #define DP_REO_DESC_FREE_THRESHOLD 64
+--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
++++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: BSD-3-Clause-Clear
+ /*
+ * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+ #include <linux/ieee80211.h>
+@@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
+ list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) {
+ list_del(&cmd->list);
+ rx_tid = &cmd->data;
+- if (rx_tid->vaddr) {
+- dma_unmap_single(ab->dev, rx_tid->paddr,
+- rx_tid->size, DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ if (rx_tid->vaddr_unaligned) {
++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++ rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+ }
+ kfree(cmd);
+ }
+@@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
+ list_del(&cmd_cache->list);
+ dp->reo_cmd_cache_flush_count--;
+ rx_tid = &cmd_cache->data;
+- if (rx_tid->vaddr) {
+- dma_unmap_single(ab->dev, rx_tid->paddr,
+- rx_tid->size, DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ if (rx_tid->vaddr_unaligned) {
++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++ rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+ }
+ kfree(cmd_cache);
+ }
+@@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struc
+ if (status != HAL_REO_CMD_SUCCESS)
+ ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n",
+ rx_tid->tid, status);
+- if (rx_tid->vaddr) {
+- dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size,
+- DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ if (rx_tid->vaddr_unaligned) {
++ dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size,
++ rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+ }
+ }
+
+@@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(st
+ if (ret) {
+ ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
+ rx_tid->tid, ret);
+- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
+- DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++ rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+ }
+ }
+
+@@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(st
+
+ return;
+ free_desc:
+- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
+- DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
++ rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+ }
+
+ void ath11k_peer_rx_tid_delete(struct ath11k *ar,
+@@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct at
+ if (ret != -ESHUTDOWN)
+ ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n",
+ tid, ret);
+- dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size,
+- DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size,
++ rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+ }
+
+ rx_tid->paddr = 0;
++ rx_tid->paddr_unaligned = 0;
+ rx_tid->size = 0;
++ rx_tid->unaligned_size = 0;
+ }
+
+ static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab,
+@@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(st
+ if (!rx_tid->active)
+ goto unlock_exit;
+
+- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
+- DMA_BIDIRECTIONAL);
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned,
++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
++ rx_tid->vaddr_unaligned = NULL;
+
+ rx_tid->active = false;
+
+@@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath1
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_peer *peer;
+ struct dp_rx_tid *rx_tid;
+- u32 hw_desc_sz;
+- u32 *addr_aligned;
+- void *vaddr;
++ u32 hw_desc_sz, *vaddr;
++ void *vaddr_unaligned;
+ dma_addr_t paddr;
+ int ret;
+
+@@ -1050,37 +1050,34 @@ int ath11k_peer_rx_tid_setup(struct ath1
+ else
+ hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
+
+- vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
+- if (!vaddr) {
++ rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1;
++ vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr,
++ DMA_BIDIRECTIONAL, GFP_ATOMIC);
++ if (!vaddr_unaligned) {
+ spin_unlock_bh(&ab->base_lock);
+ return -ENOMEM;
+ }
+
+- addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN);
+-
+- ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz,
+- ssn, pn_type);
+-
+- paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz,
+- DMA_BIDIRECTIONAL);
+-
+- ret = dma_mapping_error(ab->dev, paddr);
+- if (ret) {
+- spin_unlock_bh(&ab->base_lock);
+- ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n",
+- peer_mac, tid, ret);
+- goto err_mem_free;
+- }
+-
+- rx_tid->vaddr = vaddr;
+- rx_tid->paddr = paddr;
++ rx_tid->vaddr_unaligned = vaddr_unaligned;
++ vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN);
++ rx_tid->paddr_unaligned = paddr;
++ rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr -
++ (unsigned long)rx_tid->vaddr_unaligned);
++ ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type);
+ rx_tid->size = hw_desc_sz;
+ rx_tid->active = true;
+
++ /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup.
++ * Since these changes are not reflected in the device, driver now needs to
++ * explicitly call dma_sync_single_for_device.
++ */
++ dma_sync_single_for_device(ab->dev, rx_tid->paddr,
++ rx_tid->size,
++ DMA_TO_DEVICE);
+ spin_unlock_bh(&ab->base_lock);
+
+- ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac,
+- paddr, tid, 1, ba_win_sz);
++ ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr,
++ tid, 1, ba_win_sz);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n",
+ peer_mac, tid, ret);
+@@ -1088,12 +1085,6 @@ int ath11k_peer_rx_tid_setup(struct ath1
+ }
+
+ return ret;
+-
+-err_mem_free:
+- kfree(rx_tid->vaddr);
+- rx_tid->vaddr = NULL;
+-
+- return ret;
+ }
+
+ int ath11k_dp_rx_ampdu_start(struct ath11k *ar,