]>
Commit | Line | Data |
---|---|---|
a6ff85d3 AC |
1 | /* |
2 | * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
20 | * DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
a7f6da6e AC |
23 | /* |
24 | * GK20A does not have dedicated video memory, and to accurately represent this | |
25 | * fact Nouveau will not create a RAM device for it. Therefore its instmem | |
69c49382 AC |
26 | * implementation must be done directly on top of system memory, while |
27 | * preserving coherency for read and write operations. | |
a7f6da6e AC |
28 | * |
29 | * Instmem can be allocated through two means: | |
69c49382 | 30 | * 1) If an IOMMU unit has been probed, the IOMMU API is used to make memory |
a7f6da6e | 31 | * pages contiguous to the GPU. This is the preferred way. |
69c49382 | 32 | * 2) If no IOMMU unit is probed, the DMA API is used to allocate physically |
a7f6da6e AC |
33 | * contiguous memory. |
34 | * | |
69c49382 AC |
35 | * In both cases CPU read and writes are performed by creating a write-combined |
36 | * mapping. The GPU L2 cache must thus be flushed/invalidated when required. To | |
37 | * be conservative we do this every time we acquire or release an instobj, but | |
38 | * ideally L2 management should be handled at a higher level. | |
39 | * | |
40 | * To improve performance, CPU mappings are not removed upon instobj release. | |
41 | * Instead they are placed into a LRU list to be recycled when the mapped space | |
42 | * goes beyond a certain threshold. At the moment this limit is 1MB. | |
a7f6da6e | 43 | */ |
d8e83994 | 44 | #include "priv.h" |
a7f6da6e | 45 | |
d8e83994 | 46 | #include <core/memory.h> |
a6ff85d3 | 47 | #include <core/mm.h> |
43a70661 | 48 | #include <core/tegra.h> |
d8e83994 | 49 | #include <subdev/fb.h> |
69c49382 | 50 | #include <subdev/ltc.h> |
a6ff85d3 | 51 | |
c44c06ae | 52 | struct gk20a_instobj { |
d8e83994 | 53 | struct nvkm_memory memory; |
d8e83994 | 54 | struct nvkm_mem mem; |
69c49382 AC |
55 | struct gk20a_instmem *imem; |
56 | ||
57 | /* CPU mapping */ | |
58 | u32 *vaddr; | |
59 | struct list_head vaddr_node; | |
338840ee AC |
60 | /* How many clients are using vaddr? */ |
61 | u32 use_cpt; | |
a7f6da6e | 62 | }; |
69c49382 | 63 | #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) |
a7f6da6e AC |
64 | |
65 | /* | |
66 | * Used for objects allocated using the DMA API | |
67 | */ | |
68 | struct gk20a_instobj_dma { | |
c44c06ae | 69 | struct gk20a_instobj base; |
a7f6da6e | 70 | |
69c49382 | 71 | u32 *cpuaddr; |
a6ff85d3 AC |
72 | dma_addr_t handle; |
73 | struct nvkm_mm_node r; | |
74 | }; | |
69c49382 AC |
75 | #define gk20a_instobj_dma(p) \ |
76 | container_of(gk20a_instobj(p), struct gk20a_instobj_dma, base) | |
a6ff85d3 | 77 | |
a7f6da6e AC |
78 | /* |
79 | * Used for objects flattened using the IOMMU API | |
80 | */ | |
81 | struct gk20a_instobj_iommu { | |
c44c06ae | 82 | struct gk20a_instobj base; |
a7f6da6e | 83 | |
69c49382 AC |
84 | /* will point to the higher half of pages */ |
85 | dma_addr_t *dma_addrs; | |
86 | /* array of base.mem->size pages (+ dma_addr_ts) */ | |
a7f6da6e AC |
87 | struct page *pages[]; |
88 | }; | |
69c49382 AC |
89 | #define gk20a_instobj_iommu(p) \ |
90 | container_of(gk20a_instobj(p), struct gk20a_instobj_iommu, base) | |
a7f6da6e | 91 | |
c44c06ae | 92 | struct gk20a_instmem { |
a6ff85d3 | 93 | struct nvkm_instmem base; |
69c49382 AC |
94 | |
95 | /* protects vaddr_* and gk20a_instobj::vaddr* */ | |
a6ff85d3 | 96 | spinlock_t lock; |
69c49382 AC |
97 | |
98 | /* CPU mappings LRU */ | |
99 | unsigned int vaddr_use; | |
100 | unsigned int vaddr_max; | |
101 | struct list_head vaddr_lru; | |
a7f6da6e AC |
102 | |
103 | /* Only used if IOMMU if present */ | |
104 | struct mutex *mm_mutex; | |
105 | struct nvkm_mm *mm; | |
106 | struct iommu_domain *domain; | |
107 | unsigned long iommu_pgshift; | |
68b56653 | 108 | u16 iommu_bit; |
a7f6da6e AC |
109 | |
110 | /* Only used by DMA API */ | |
5dc240bc | 111 | struct dma_attrs attrs; |
69c49382 AC |
112 | |
113 | void __iomem * (*cpu_map)(struct nvkm_memory *); | |
a6ff85d3 | 114 | }; |
69c49382 | 115 | #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) |
a6ff85d3 | 116 | |
d8e83994 BS |
117 | static enum nvkm_memory_target |
118 | gk20a_instobj_target(struct nvkm_memory *memory) | |
119 | { | |
120 | return NVKM_MEM_TARGET_HOST; | |
121 | } | |
122 | ||
123 | static u64 | |
124 | gk20a_instobj_addr(struct nvkm_memory *memory) | |
125 | { | |
126 | return gk20a_instobj(memory)->mem.offset; | |
d8e83994 BS |
127 | } |
128 | ||
129 | static u64 | |
130 | gk20a_instobj_size(struct nvkm_memory *memory) | |
131 | { | |
132 | return (u64)gk20a_instobj(memory)->mem.size << 12; | |
133 | } | |
134 | ||
69c49382 AC |
135 | static void __iomem * |
136 | gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory) | |
137 | { | |
10855aeb | 138 | #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) |
69c49382 AC |
139 | struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); |
140 | struct device *dev = node->base.imem->base.subdev.device->dev; | |
141 | int npages = nvkm_memory_size(memory) >> 12; | |
142 | struct page *pages[npages]; | |
143 | int i; | |
144 | ||
10855aeb | 145 | /* we shouldn't see a gk20a on anything but arm/arm64 anyways */ |
69c49382 AC |
146 | /* phys_to_page does not exist on all platforms... */ |
147 | pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT); | |
148 | for (i = 1; i < npages; i++) | |
149 | pages[i] = pages[0] + i; | |
150 | ||
151 | return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); | |
10855aeb DA |
152 | #else |
153 | BUG(); | |
154 | return NULL; | |
155 | #endif | |
69c49382 AC |
156 | } |
157 | ||
158 | static void __iomem * | |
159 | gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory) | |
160 | { | |
161 | struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); | |
162 | int npages = nvkm_memory_size(memory) >> 12; | |
163 | ||
164 | return vmap(node->pages, npages, VM_MAP, | |
165 | pgprot_writecombine(PAGE_KERNEL)); | |
166 | } | |
167 | ||
168 | /* | |
338840ee AC |
169 | * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held. |
170 | */ | |
171 | static void | |
172 | gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj) | |
173 | { | |
174 | struct gk20a_instmem *imem = obj->imem; | |
175 | /* there should not be any user left... */ | |
176 | WARN_ON(obj->use_cpt); | |
177 | list_del(&obj->vaddr_node); | |
178 | vunmap(obj->vaddr); | |
179 | obj->vaddr = NULL; | |
180 | imem->vaddr_use -= nvkm_memory_size(&obj->memory); | |
181 | nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, | |
182 | imem->vaddr_max); | |
183 | } | |
184 | ||
185 | /* | |
186 | * Must be called while holding gk20a_instmem::lock | |
69c49382 AC |
187 | */ |
188 | static void | |
189 | gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size) | |
190 | { | |
191 | while (imem->vaddr_use + size > imem->vaddr_max) { | |
69c49382 AC |
192 | /* no candidate that can be unmapped, abort... */ |
193 | if (list_empty(&imem->vaddr_lru)) | |
194 | break; | |
195 | ||
338840ee AC |
196 | gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru, |
197 | struct gk20a_instobj, vaddr_node)); | |
69c49382 AC |
198 | } |
199 | } | |
200 | ||
d8e83994 BS |
201 | static void __iomem * |
202 | gk20a_instobj_acquire(struct nvkm_memory *memory) | |
203 | { | |
69c49382 AC |
204 | struct gk20a_instobj *node = gk20a_instobj(memory); |
205 | struct gk20a_instmem *imem = node->imem; | |
206 | struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; | |
207 | const u64 size = nvkm_memory_size(memory); | |
d8e83994 | 208 | unsigned long flags; |
69c49382 AC |
209 | |
210 | nvkm_ltc_flush(ltc); | |
211 | ||
d8e83994 | 212 | spin_lock_irqsave(&imem->lock, flags); |
69c49382 AC |
213 | |
214 | if (node->vaddr) { | |
338840ee AC |
215 | if (!node->use_cpt) { |
216 | /* remove from LRU list since mapping in use again */ | |
217 | list_del(&node->vaddr_node); | |
218 | } | |
69c49382 AC |
219 | goto out; |
220 | } | |
221 | ||
222 | /* try to free some address space if we reached the limit */ | |
223 | gk20a_instmem_vaddr_gc(imem, size); | |
224 | ||
225 | node->vaddr = imem->cpu_map(memory); | |
226 | ||
227 | if (!node->vaddr) { | |
228 | nvkm_error(&imem->base.subdev, "cannot map instobj - " | |
229 | "this is not going to end well...\n"); | |
230 | goto out; | |
231 | } | |
232 | ||
233 | imem->vaddr_use += size; | |
234 | nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", | |
235 | imem->vaddr_use, imem->vaddr_max); | |
236 | ||
237 | out: | |
338840ee | 238 | node->use_cpt++; |
69c49382 AC |
239 | spin_unlock_irqrestore(&imem->lock, flags); |
240 | ||
241 | return node->vaddr; | |
d8e83994 BS |
242 | } |
243 | ||
244 | static void | |
245 | gk20a_instobj_release(struct nvkm_memory *memory) | |
246 | { | |
69c49382 AC |
247 | struct gk20a_instobj *node = gk20a_instobj(memory); |
248 | struct gk20a_instmem *imem = node->imem; | |
249 | struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; | |
250 | unsigned long flags; | |
d8e83994 | 251 | |
69c49382 AC |
252 | spin_lock_irqsave(&imem->lock, flags); |
253 | ||
338840ee AC |
254 | /* we should at least have one user to release... */ |
255 | if (WARN_ON(node->use_cpt == 0)) | |
256 | goto out; | |
257 | ||
258 | /* add unused objs to the LRU list to recycle their mapping */ | |
259 | if (--node->use_cpt == 0) | |
260 | list_add_tail(&node->vaddr_node, &imem->vaddr_lru); | |
69c49382 | 261 | |
338840ee | 262 | out: |
69c49382 AC |
263 | spin_unlock_irqrestore(&imem->lock, flags); |
264 | ||
265 | wmb(); | |
266 | nvkm_ltc_invalidate(ltc); | |
267 | } | |
a7f6da6e | 268 | |
a6ff85d3 | 269 | static u32 |
d8e83994 | 270 | gk20a_instobj_rd32(struct nvkm_memory *memory, u64 offset) |
a6ff85d3 | 271 | { |
d8e83994 | 272 | struct gk20a_instobj *node = gk20a_instobj(memory); |
69c49382 AC |
273 | |
274 | return node->vaddr[offset / 4]; | |
a6ff85d3 AC |
275 | } |
276 | ||
277 | static void | |
d8e83994 | 278 | gk20a_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) |
a6ff85d3 | 279 | { |
d8e83994 | 280 | struct gk20a_instobj *node = gk20a_instobj(memory); |
a6ff85d3 | 281 | |
69c49382 | 282 | node->vaddr[offset / 4] = data; |
d8e83994 BS |
283 | } |
284 | ||
285 | static void | |
286 | gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) | |
287 | { | |
288 | struct gk20a_instobj *node = gk20a_instobj(memory); | |
69c49382 | 289 | |
d8e83994 | 290 | nvkm_vm_map_at(vma, offset, &node->mem); |
a6ff85d3 AC |
291 | } |
292 | ||
69c49382 AC |
293 | /* |
294 | * Clear the CPU mapping of an instobj if it exists | |
295 | */ | |
a6ff85d3 | 296 | static void |
69c49382 AC |
297 | gk20a_instobj_dtor(struct gk20a_instobj *node) |
298 | { | |
299 | struct gk20a_instmem *imem = node->imem; | |
69c49382 AC |
300 | unsigned long flags; |
301 | ||
302 | spin_lock_irqsave(&imem->lock, flags); | |
303 | ||
338840ee | 304 | /* vaddr has already been recycled */ |
69c49382 AC |
305 | if (!node->vaddr) |
306 | goto out; | |
307 | ||
338840ee | 308 | gk20a_instobj_recycle_vaddr(node); |
69c49382 AC |
309 | |
310 | out: | |
311 | spin_unlock_irqrestore(&imem->lock, flags); | |
312 | } | |
313 | ||
314 | static void * | |
315 | gk20a_instobj_dtor_dma(struct nvkm_memory *memory) | |
a6ff85d3 | 316 | { |
69c49382 AC |
317 | struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); |
318 | struct gk20a_instmem *imem = node->base.imem; | |
26c9e8ef | 319 | struct device *dev = imem->base.subdev.device->dev; |
a6ff85d3 | 320 | |
69c49382 AC |
321 | gk20a_instobj_dtor(&node->base); |
322 | ||
df16896b | 323 | if (unlikely(!node->cpuaddr)) |
69c49382 | 324 | goto out; |
a6ff85d3 | 325 | |
69c49382 | 326 | dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr, |
c44c06ae | 327 | node->handle, &imem->attrs); |
69c49382 AC |
328 | |
329 | out: | |
330 | return node; | |
a7f6da6e AC |
331 | } |
332 | ||
69c49382 AC |
333 | static void * |
334 | gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) | |
a7f6da6e | 335 | { |
69c49382 AC |
336 | struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); |
337 | struct gk20a_instmem *imem = node->base.imem; | |
338 | struct device *dev = imem->base.subdev.device->dev; | |
a7f6da6e AC |
339 | struct nvkm_mm_node *r; |
340 | int i; | |
341 | ||
69c49382 | 342 | gk20a_instobj_dtor(&node->base); |
a7f6da6e | 343 | |
69c49382 AC |
344 | if (unlikely(list_empty(&node->base.mem.regions))) |
345 | goto out; | |
346 | ||
347 | r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, | |
a7f6da6e AC |
348 | rl_entry); |
349 | ||
68b56653 AC |
350 | /* clear IOMMU bit to unmap pages */ |
351 | r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift); | |
a7f6da6e AC |
352 | |
353 | /* Unmap pages from GPU address space and free them */ | |
69c49382 | 354 | for (i = 0; i < node->base.mem.size; i++) { |
c44c06ae BS |
355 | iommu_unmap(imem->domain, |
356 | (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE); | |
69c49382 AC |
357 | dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE, |
358 | DMA_BIDIRECTIONAL); | |
a7f6da6e AC |
359 | __free_page(node->pages[i]); |
360 | } | |
361 | ||
362 | /* Release area from GPU address space */ | |
c44c06ae BS |
363 | mutex_lock(imem->mm_mutex); |
364 | nvkm_mm_free(imem->mm, &r); | |
365 | mutex_unlock(imem->mm_mutex); | |
a6ff85d3 | 366 | |
69c49382 | 367 | out: |
d8e83994 | 368 | return node; |
a6ff85d3 AC |
369 | } |
370 | ||
d8e83994 | 371 | static const struct nvkm_memory_func |
69c49382 AC |
372 | gk20a_instobj_func_dma = { |
373 | .dtor = gk20a_instobj_dtor_dma, | |
374 | .target = gk20a_instobj_target, | |
375 | .addr = gk20a_instobj_addr, | |
376 | .size = gk20a_instobj_size, | |
377 | .acquire = gk20a_instobj_acquire, | |
378 | .release = gk20a_instobj_release, | |
379 | .rd32 = gk20a_instobj_rd32, | |
380 | .wr32 = gk20a_instobj_wr32, | |
381 | .map = gk20a_instobj_map, | |
382 | }; | |
383 | ||
384 | static const struct nvkm_memory_func | |
385 | gk20a_instobj_func_iommu = { | |
386 | .dtor = gk20a_instobj_dtor_iommu, | |
d8e83994 BS |
387 | .target = gk20a_instobj_target, |
388 | .addr = gk20a_instobj_addr, | |
389 | .size = gk20a_instobj_size, | |
390 | .acquire = gk20a_instobj_acquire, | |
391 | .release = gk20a_instobj_release, | |
392 | .rd32 = gk20a_instobj_rd32, | |
393 | .wr32 = gk20a_instobj_wr32, | |
394 | .map = gk20a_instobj_map, | |
395 | }; | |
396 | ||
a6ff85d3 | 397 | static int |
d8e83994 | 398 | gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, |
c44c06ae | 399 | struct gk20a_instobj **_node) |
a6ff85d3 | 400 | { |
a7f6da6e | 401 | struct gk20a_instobj_dma *node; |
00c55507 | 402 | struct nvkm_subdev *subdev = &imem->base.subdev; |
d8e83994 | 403 | struct device *dev = subdev->device->dev; |
a6ff85d3 | 404 | |
d8e83994 BS |
405 | if (!(node = kzalloc(sizeof(*node), GFP_KERNEL))) |
406 | return -ENOMEM; | |
a7f6da6e | 407 | *_node = &node->base; |
a6ff85d3 | 408 | |
69c49382 AC |
409 | nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); |
410 | ||
5dc240bc AC |
411 | node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, |
412 | &node->handle, GFP_KERNEL, | |
c44c06ae | 413 | &imem->attrs); |
a6ff85d3 | 414 | if (!node->cpuaddr) { |
00c55507 | 415 | nvkm_error(subdev, "cannot allocate DMA memory\n"); |
a6ff85d3 AC |
416 | return -ENOMEM; |
417 | } | |
418 | ||
419 | /* alignment check */ | |
420 | if (unlikely(node->handle & (align - 1))) | |
00c55507 BS |
421 | nvkm_warn(subdev, |
422 | "memory not aligned as requested: %pad (0x%x)\n", | |
423 | &node->handle, align); | |
a6ff85d3 | 424 | |
a7f6da6e AC |
425 | /* present memory for being mapped using small pages */ |
426 | node->r.type = 12; | |
427 | node->r.offset = node->handle >> 12; | |
428 | node->r.length = (npages << PAGE_SHIFT) >> 12; | |
429 | ||
d8e83994 | 430 | node->base.mem.offset = node->handle; |
a7f6da6e | 431 | |
d8e83994 BS |
432 | INIT_LIST_HEAD(&node->base.mem.regions); |
433 | list_add_tail(&node->r.rl_entry, &node->base.mem.regions); | |
a7f6da6e AC |
434 | |
435 | return 0; | |
436 | } | |
437 | ||
438 | static int | |
d8e83994 | 439 | gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, |
c44c06ae | 440 | struct gk20a_instobj **_node) |
a7f6da6e AC |
441 | { |
442 | struct gk20a_instobj_iommu *node; | |
00c55507 | 443 | struct nvkm_subdev *subdev = &imem->base.subdev; |
69c49382 | 444 | struct device *dev = subdev->device->dev; |
a7f6da6e AC |
445 | struct nvkm_mm_node *r; |
446 | int ret; | |
447 | int i; | |
448 | ||
69c49382 AC |
449 | /* |
450 | * despite their variable size, instmem allocations are small enough | |
451 | * (< 1 page) to be handled by kzalloc | |
452 | */ | |
453 | if (!(node = kzalloc(sizeof(*node) + ((sizeof(node->pages[0]) + | |
454 | sizeof(*node->dma_addrs)) * npages), GFP_KERNEL))) | |
d8e83994 | 455 | return -ENOMEM; |
a7f6da6e | 456 | *_node = &node->base; |
69c49382 AC |
457 | node->dma_addrs = (void *)(node->pages + npages); |
458 | ||
459 | nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); | |
a7f6da6e AC |
460 | |
461 | /* Allocate backing memory */ | |
462 | for (i = 0; i < npages; i++) { | |
463 | struct page *p = alloc_page(GFP_KERNEL); | |
69c49382 | 464 | dma_addr_t dma_adr; |
a7f6da6e AC |
465 | |
466 | if (p == NULL) { | |
467 | ret = -ENOMEM; | |
468 | goto free_pages; | |
469 | } | |
470 | node->pages[i] = p; | |
69c49382 AC |
471 | dma_adr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
472 | if (dma_mapping_error(dev, dma_adr)) { | |
473 | nvkm_error(subdev, "DMA mapping error!\n"); | |
474 | ret = -ENOMEM; | |
475 | goto free_pages; | |
476 | } | |
477 | node->dma_addrs[i] = dma_adr; | |
a7f6da6e AC |
478 | } |
479 | ||
c44c06ae | 480 | mutex_lock(imem->mm_mutex); |
a7f6da6e | 481 | /* Reserve area from GPU address space */ |
c44c06ae BS |
482 | ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages, |
483 | align >> imem->iommu_pgshift, &r); | |
484 | mutex_unlock(imem->mm_mutex); | |
a7f6da6e | 485 | if (ret) { |
69c49382 | 486 | nvkm_error(subdev, "IOMMU space is full!\n"); |
a7f6da6e AC |
487 | goto free_pages; |
488 | } | |
489 | ||
490 | /* Map into GPU address space */ | |
491 | for (i = 0; i < npages; i++) { | |
c44c06ae | 492 | u32 offset = (r->offset + i) << imem->iommu_pgshift; |
a7f6da6e | 493 | |
69c49382 | 494 | ret = iommu_map(imem->domain, offset, node->dma_addrs[i], |
a7f6da6e AC |
495 | PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); |
496 | if (ret < 0) { | |
00c55507 | 497 | nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret); |
a7f6da6e AC |
498 | |
499 | while (i-- > 0) { | |
500 | offset -= PAGE_SIZE; | |
c44c06ae | 501 | iommu_unmap(imem->domain, offset, PAGE_SIZE); |
a7f6da6e AC |
502 | } |
503 | goto release_area; | |
504 | } | |
505 | } | |
506 | ||
68b56653 AC |
507 | /* IOMMU bit tells that an address is to be resolved through the IOMMU */ |
508 | r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift); | |
a7f6da6e | 509 | |
d8e83994 | 510 | node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift; |
a7f6da6e | 511 | |
d8e83994 BS |
512 | INIT_LIST_HEAD(&node->base.mem.regions); |
513 | list_add_tail(&r->rl_entry, &node->base.mem.regions); | |
a7f6da6e AC |
514 | |
515 | return 0; | |
516 | ||
517 | release_area: | |
c44c06ae BS |
518 | mutex_lock(imem->mm_mutex); |
519 | nvkm_mm_free(imem->mm, &r); | |
520 | mutex_unlock(imem->mm_mutex); | |
a7f6da6e AC |
521 | |
522 | free_pages: | |
69c49382 AC |
523 | for (i = 0; i < npages && node->pages[i] != NULL; i++) { |
524 | dma_addr_t dma_addr = node->dma_addrs[i]; | |
525 | if (dma_addr) | |
526 | dma_unmap_page(dev, dma_addr, PAGE_SIZE, | |
527 | DMA_BIDIRECTIONAL); | |
a7f6da6e | 528 | __free_page(node->pages[i]); |
69c49382 | 529 | } |
a7f6da6e AC |
530 | |
531 | return ret; | |
532 | } | |
533 | ||
534 | static int | |
d8e83994 BS |
535 | gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, |
536 | struct nvkm_memory **pmemory) | |
a7f6da6e | 537 | { |
d8e83994 | 538 | struct gk20a_instmem *imem = gk20a_instmem(base); |
00c55507 | 539 | struct nvkm_subdev *subdev = &imem->base.subdev; |
69c49382 | 540 | struct gk20a_instobj *node = NULL; |
a7f6da6e AC |
541 | int ret; |
542 | ||
00c55507 | 543 | nvkm_debug(subdev, "%s (%s): size: %x align: %x\n", __func__, |
d8e83994 | 544 | imem->domain ? "IOMMU" : "DMA", size, align); |
a7f6da6e AC |
545 | |
546 | /* Round size and align to page bounds */ | |
d8e83994 BS |
547 | size = max(roundup(size, PAGE_SIZE), PAGE_SIZE); |
548 | align = max(roundup(align, PAGE_SIZE), PAGE_SIZE); | |
a7f6da6e | 549 | |
c44c06ae | 550 | if (imem->domain) |
d8e83994 BS |
551 | ret = gk20a_instobj_ctor_iommu(imem, size >> PAGE_SHIFT, |
552 | align, &node); | |
a7f6da6e | 553 | else |
d8e83994 BS |
554 | ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT, |
555 | align, &node); | |
b7a2bc18 | 556 | *pmemory = node ? &node->memory : NULL; |
a7f6da6e AC |
557 | if (ret) |
558 | return ret; | |
559 | ||
d8e83994 | 560 | node->imem = imem; |
a7f6da6e AC |
561 | |
562 | /* present memory for being mapped using small pages */ | |
d8e83994 BS |
563 | node->mem.size = size >> 12; |
564 | node->mem.memtype = 0; | |
565 | node->mem.page_shift = 12; | |
a6ff85d3 | 566 | |
00c55507 | 567 | nvkm_debug(subdev, "alloc size: 0x%x, align: 0x%x, gaddr: 0x%llx\n", |
d8e83994 | 568 | size, align, node->mem.offset); |
a6ff85d3 AC |
569 | |
570 | return 0; | |
571 | } | |
572 | ||
69c49382 AC |
573 | static void * |
574 | gk20a_instmem_dtor(struct nvkm_instmem *base) | |
a6ff85d3 | 575 | { |
69c49382 AC |
576 | struct gk20a_instmem *imem = gk20a_instmem(base); |
577 | ||
578 | /* perform some sanity checks... */ | |
579 | if (!list_empty(&imem->vaddr_lru)) | |
580 | nvkm_warn(&base->subdev, "instobj LRU not empty!\n"); | |
581 | ||
582 | if (imem->vaddr_use != 0) | |
583 | nvkm_warn(&base->subdev, "instobj vmap area not empty! " | |
584 | "0x%x bytes still mapped\n", imem->vaddr_use); | |
585 | ||
586 | return imem; | |
a6ff85d3 AC |
587 | } |
588 | ||
b7a2bc18 BS |
589 | static const struct nvkm_instmem_func |
590 | gk20a_instmem = { | |
69c49382 | 591 | .dtor = gk20a_instmem_dtor, |
b7a2bc18 BS |
592 | .memory_new = gk20a_instobj_new, |
593 | .persistent = true, | |
594 | .zero = false, | |
595 | }; | |
596 | ||
597 | int | |
598 | gk20a_instmem_new(struct nvkm_device *device, int index, | |
43a70661 | 599 | struct nvkm_instmem **pimem) |
a6ff85d3 | 600 | { |
43a70661 | 601 | struct nvkm_device_tegra *tdev = device->func->tegra(device); |
c44c06ae | 602 | struct gk20a_instmem *imem; |
a6ff85d3 | 603 | |
b7a2bc18 BS |
604 | if (!(imem = kzalloc(sizeof(*imem), GFP_KERNEL))) |
605 | return -ENOMEM; | |
606 | nvkm_instmem_ctor(&gk20a_instmem, device, index, &imem->base); | |
c44c06ae | 607 | spin_lock_init(&imem->lock); |
b7a2bc18 | 608 | *pimem = &imem->base; |
a6ff85d3 | 609 | |
69c49382 AC |
610 | /* do not allow more than 1MB of CPU-mapped instmem */ |
611 | imem->vaddr_use = 0; | |
612 | imem->vaddr_max = 0x100000; | |
613 | INIT_LIST_HEAD(&imem->vaddr_lru); | |
614 | ||
43a70661 | 615 | if (tdev->iommu.domain) { |
69c49382 | 616 | imem->mm_mutex = &tdev->iommu.mutex; |
43a70661 | 617 | imem->mm = &tdev->iommu.mm; |
69c49382 | 618 | imem->domain = tdev->iommu.domain; |
43a70661 | 619 | imem->iommu_pgshift = tdev->iommu.pgshift; |
69c49382 | 620 | imem->cpu_map = gk20a_instobj_cpu_map_iommu; |
68b56653 | 621 | imem->iommu_bit = tdev->func->iommu_bit; |
a7f6da6e | 622 | |
00c55507 | 623 | nvkm_info(&imem->base.subdev, "using IOMMU\n"); |
a7f6da6e | 624 | } else { |
c44c06ae | 625 | init_dma_attrs(&imem->attrs); |
69c49382 | 626 | /* We will access the memory through our own mapping */ |
c44c06ae BS |
627 | dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); |
628 | dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); | |
629 | dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); | |
630 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs); | |
69c49382 | 631 | imem->cpu_map = gk20a_instobj_cpu_map_dma; |
a7f6da6e | 632 | |
00c55507 | 633 | nvkm_info(&imem->base.subdev, "using DMA API\n"); |
a7f6da6e | 634 | } |
5dc240bc | 635 | |
a6ff85d3 AC |
636 | return 0; |
637 | } |