1 wifi: ath11k: Use dma_alloc_noncoherent for rx_tid buffer allocation
3 Currently, the driver allocates cacheable DMA buffers for the rx_tid
4 structure using kzalloc() and dma_map_single(). These buffers are
5 long-lived and can persist for the lifetime of the peer, which is not
6 advisable. Instead of using kzalloc() and dma_map_single() for allocating
7 cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the
8 allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid.
9 Since dma_alloc_noncoherent() returns unaligned physical and virtual
10 addresses, align them internally before use within the driver. This
11 ensures proper allocation of non-coherent memory through the kernel
14 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
15 Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
17 Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
18 --- a/drivers/net/wireless/ath/ath11k/dp.h
19 +++ b/drivers/net/wireless/ath/ath11k/dp.h
21 /* SPDX-License-Identifier: BSD-3-Clause-Clear */
23 * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
24 - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
25 + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
29 @@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp;
37 @@ -37,6 +36,9 @@ struct dp_rx_tid {
38 /* Timer info related to fragments */
39 struct timer_list frag_timer;
40 struct ath11k_base *ab;
41 + u32 *vaddr_unaligned;
42 + dma_addr_t paddr_unaligned;
46 #define DP_REO_DESC_FREE_THRESHOLD 64
47 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c
48 +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
50 // SPDX-License-Identifier: BSD-3-Clause-Clear
52 * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
53 - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
54 + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
57 #include <linux/ieee80211.h>
58 @@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
59 list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) {
62 - if (rx_tid->vaddr) {
63 - dma_unmap_single(ab->dev, rx_tid->paddr,
64 - rx_tid->size, DMA_BIDIRECTIONAL);
65 - kfree(rx_tid->vaddr);
66 - rx_tid->vaddr = NULL;
67 + if (rx_tid->vaddr_unaligned) {
68 + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
69 + rx_tid->vaddr_unaligned,
70 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
71 + rx_tid->vaddr_unaligned = NULL;
75 @@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
76 list_del(&cmd_cache->list);
77 dp->reo_cmd_cache_flush_count--;
78 rx_tid = &cmd_cache->data;
79 - if (rx_tid->vaddr) {
80 - dma_unmap_single(ab->dev, rx_tid->paddr,
81 - rx_tid->size, DMA_BIDIRECTIONAL);
82 - kfree(rx_tid->vaddr);
83 - rx_tid->vaddr = NULL;
84 + if (rx_tid->vaddr_unaligned) {
85 + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
86 + rx_tid->vaddr_unaligned,
87 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
88 + rx_tid->vaddr_unaligned = NULL;
92 @@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struc
93 if (status != HAL_REO_CMD_SUCCESS)
94 ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n",
96 - if (rx_tid->vaddr) {
97 - dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size,
99 - kfree(rx_tid->vaddr);
100 - rx_tid->vaddr = NULL;
101 + if (rx_tid->vaddr_unaligned) {
102 + dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size,
103 + rx_tid->vaddr_unaligned,
104 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
105 + rx_tid->vaddr_unaligned = NULL;
109 @@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(st
111 ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
113 - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
114 - DMA_BIDIRECTIONAL);
115 - kfree(rx_tid->vaddr);
116 - rx_tid->vaddr = NULL;
117 + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
118 + rx_tid->vaddr_unaligned,
119 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
120 + rx_tid->vaddr_unaligned = NULL;
124 @@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(st
128 - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
129 - DMA_BIDIRECTIONAL);
130 - kfree(rx_tid->vaddr);
131 - rx_tid->vaddr = NULL;
132 + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
133 + rx_tid->vaddr_unaligned,
134 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
135 + rx_tid->vaddr_unaligned = NULL;
138 void ath11k_peer_rx_tid_delete(struct ath11k *ar,
139 @@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct at
140 if (ret != -ESHUTDOWN)
141 ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n",
143 - dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size,
144 - DMA_BIDIRECTIONAL);
145 - kfree(rx_tid->vaddr);
146 - rx_tid->vaddr = NULL;
147 + dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size,
148 + rx_tid->vaddr_unaligned,
149 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
150 + rx_tid->vaddr_unaligned = NULL;
154 + rx_tid->paddr_unaligned = 0;
156 + rx_tid->unaligned_size = 0;
159 static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab,
160 @@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(st
164 - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
165 - DMA_BIDIRECTIONAL);
166 - kfree(rx_tid->vaddr);
167 - rx_tid->vaddr = NULL;
168 + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned,
169 + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
170 + rx_tid->vaddr_unaligned = NULL;
172 rx_tid->active = false;
174 @@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath1
175 struct ath11k_base *ab = ar->ab;
176 struct ath11k_peer *peer;
177 struct dp_rx_tid *rx_tid;
181 + u32 hw_desc_sz, *vaddr;
182 + void *vaddr_unaligned;
186 @@ -1050,37 +1050,34 @@ int ath11k_peer_rx_tid_setup(struct ath1
188 hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
190 - vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
192 + rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1;
193 + vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr,
194 + DMA_BIDIRECTIONAL, GFP_ATOMIC);
195 + if (!vaddr_unaligned) {
196 spin_unlock_bh(&ab->base_lock);
200 - addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN);
202 - ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz,
205 - paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz,
206 - DMA_BIDIRECTIONAL);
208 - ret = dma_mapping_error(ab->dev, paddr);
210 - spin_unlock_bh(&ab->base_lock);
211 - ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n",
212 - peer_mac, tid, ret);
216 - rx_tid->vaddr = vaddr;
217 - rx_tid->paddr = paddr;
218 + rx_tid->vaddr_unaligned = vaddr_unaligned;
219 + vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN);
220 + rx_tid->paddr_unaligned = paddr;
221 + rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr -
222 + (unsigned long)rx_tid->vaddr_unaligned);
223 + ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type);
224 rx_tid->size = hw_desc_sz;
225 rx_tid->active = true;
227 + /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup.
228 + * Since these changes are not reflected in the device, driver now needs to
229 + * explicitly call dma_sync_single_for_device.
231 + dma_sync_single_for_device(ab->dev, rx_tid->paddr,
234 spin_unlock_bh(&ab->base_lock);
236 - ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac,
237 - paddr, tid, 1, ba_win_sz);
238 + ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr,
239 + tid, 1, ba_win_sz);
241 ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n",
243 @@ -1088,12 +1085,6 @@ int ath11k_peer_rx_tid_setup(struct ath1
249 - kfree(rx_tid->vaddr);
250 - rx_tid->vaddr = NULL;
255 int ath11k_dp_rx_ampdu_start(struct ath11k *ar,