2 * Tegra host1x Command DMA
4 * Copyright (c) 2010-2013, NVIDIA Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include <asm/cacheflush.h>
21 #include <linux/device.h>
22 #include <linux/dma-mapping.h>
23 #include <linux/host1x.h>
24 #include <linux/interrupt.h>
25 #include <linux/kernel.h>
26 #include <linux/kfifo.h>
27 #include <linux/slab.h>
28 #include <trace/events/host1x.h>
39 * The push buffer is a circular array of words to be fetched by command DMA.
40 * Note that it works slightly differently to the sync queue; fence == pos
41 * means that the push buffer is full, not empty.
45 * Typically the commands written into the push buffer are a pair of words. We
46 * use slots to represent each of these pairs and to simplify things. Note the
47 * strange number of slots allocated here. 512 slots will fit exactly within a
48 * single memory page. We also need one additional word at the end of the push
49 * buffer for the RESTART opcode that will instruct the CDMA to jump back to
50 * the beginning of the push buffer. With 512 slots, this means that we'll use
51 * 2 memory pages and waste 4092 bytes of the second page that will never be
54 #define HOST1X_PUSHBUFFER_SLOTS 511
57 * Clean up push buffer resources
59 static void host1x_pushbuffer_destroy(struct push_buffer
*pb
)
61 struct host1x_cdma
*cdma
= pb_to_cdma(pb
);
62 struct host1x
*host1x
= cdma_to_host1x(cdma
);
68 iommu_unmap(host1x
->domain
, pb
->dma
, pb
->alloc_size
);
69 free_iova(&host1x
->iova
, iova_pfn(&host1x
->iova
, pb
->dma
));
72 dma_free_wc(host1x
->dev
, pb
->alloc_size
, pb
->mapped
, pb
->phys
);
79 * Init push buffer resources
81 static int host1x_pushbuffer_init(struct push_buffer
*pb
)
83 struct host1x_cdma
*cdma
= pb_to_cdma(pb
);
84 struct host1x
*host1x
= cdma_to_host1x(cdma
);
91 pb
->size
= HOST1X_PUSHBUFFER_SLOTS
* 8;
95 /* initialize buffer pointers */
96 pb
->fence
= pb
->size
- 8;
102 size
= iova_align(&host1x
->iova
, size
);
104 pb
->mapped
= dma_alloc_wc(host1x
->dev
, size
, &pb
->phys
,
109 shift
= iova_shift(&host1x
->iova
);
110 alloc
= alloc_iova(&host1x
->iova
, size
>> shift
,
111 host1x
->iova_end
>> shift
, true);
117 pb
->dma
= iova_dma_addr(&host1x
->iova
, alloc
);
118 err
= iommu_map(host1x
->domain
, pb
->dma
, pb
->phys
, size
,
121 goto iommu_free_iova
;
123 pb
->mapped
= dma_alloc_wc(host1x
->dev
, size
, &pb
->phys
,
131 pb
->alloc_size
= size
;
133 host1x_hw_pushbuffer_init(host1x
, pb
);
138 __free_iova(&host1x
->iova
, alloc
);
140 dma_free_wc(host1x
->dev
, size
, pb
->mapped
, pb
->phys
);
146 * Push two words to the push buffer
147 * Caller must ensure push buffer is not full
149 static void host1x_pushbuffer_push(struct push_buffer
*pb
, u32 op1
, u32 op2
)
151 u32
*p
= (u32
*)((void *)pb
->mapped
+ pb
->pos
);
153 WARN_ON(pb
->pos
== pb
->fence
);
158 if (pb
->pos
>= pb
->size
)
163 * Pop a number of two word slots from the push buffer
164 * Caller must ensure push buffer is not empty
166 static void host1x_pushbuffer_pop(struct push_buffer
*pb
, unsigned int slots
)
168 /* Advance the next write position */
169 pb
->fence
+= slots
* 8;
171 if (pb
->fence
>= pb
->size
)
172 pb
->fence
-= pb
->size
;
176 * Return the number of two word slots free in the push buffer
178 static u32
host1x_pushbuffer_space(struct push_buffer
*pb
)
180 unsigned int fence
= pb
->fence
;
182 if (pb
->fence
< pb
->pos
)
185 return (fence
- pb
->pos
) / 8;
189 * Sleep (if necessary) until the requested event happens
190 * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty.
192 * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer
193 * - Return the amount of space (> 0)
194 * Must be called with the cdma lock held.
196 unsigned int host1x_cdma_wait_locked(struct host1x_cdma
*cdma
,
197 enum cdma_event event
)
200 struct push_buffer
*pb
= &cdma
->push_buffer
;
204 case CDMA_EVENT_SYNC_QUEUE_EMPTY
:
205 space
= list_empty(&cdma
->sync_queue
) ? 1 : 0;
208 case CDMA_EVENT_PUSH_BUFFER_SPACE
:
209 space
= host1x_pushbuffer_space(pb
);
220 trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma
)->dev
),
223 /* If somebody has managed to already start waiting, yield */
224 if (cdma
->event
!= CDMA_EVENT_NONE
) {
225 mutex_unlock(&cdma
->lock
);
227 mutex_lock(&cdma
->lock
);
233 mutex_unlock(&cdma
->lock
);
234 wait_for_completion(&cdma
->complete
);
235 mutex_lock(&cdma
->lock
);
242 * Sleep (if necessary) until the push buffer has enough free space.
244 * Must be called with the cdma lock held.
246 int host1x_cdma_wait_pushbuffer_space(struct host1x
*host1x
,
247 struct host1x_cdma
*cdma
,
251 struct push_buffer
*pb
= &cdma
->push_buffer
;
254 space
= host1x_pushbuffer_space(pb
);
258 trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma
)->dev
),
259 CDMA_EVENT_PUSH_BUFFER_SPACE
);
261 host1x_hw_cdma_flush(host1x
, cdma
);
263 /* If somebody has managed to already start waiting, yield */
264 if (cdma
->event
!= CDMA_EVENT_NONE
) {
265 mutex_unlock(&cdma
->lock
);
267 mutex_lock(&cdma
->lock
);
271 cdma
->event
= CDMA_EVENT_PUSH_BUFFER_SPACE
;
273 mutex_unlock(&cdma
->lock
);
274 wait_for_completion(&cdma
->complete
);
275 mutex_lock(&cdma
->lock
);
281 * Start timer that tracks the time spent by the job.
282 * Must be called with the cdma lock held.
284 static void cdma_start_timer_locked(struct host1x_cdma
*cdma
,
285 struct host1x_job
*job
)
287 struct host1x
*host
= cdma_to_host1x(cdma
);
289 if (cdma
->timeout
.client
) {
290 /* timer already started */
294 cdma
->timeout
.client
= job
->client
;
295 cdma
->timeout
.syncpt
= host1x_syncpt_get(host
, job
->syncpt_id
);
296 cdma
->timeout
.syncpt_val
= job
->syncpt_end
;
297 cdma
->timeout
.start_ktime
= ktime_get();
299 schedule_delayed_work(&cdma
->timeout
.wq
,
300 msecs_to_jiffies(job
->timeout
));
304 * Stop timer when a buffer submission completes.
305 * Must be called with the cdma lock held.
307 static void stop_cdma_timer_locked(struct host1x_cdma
*cdma
)
309 cancel_delayed_work(&cdma
->timeout
.wq
);
310 cdma
->timeout
.client
= NULL
;
314 * For all sync queue entries that have already finished according to the
315 * current sync point registers:
316 * - unpin & unref their mems
317 * - pop their push buffer slots
318 * - remove them from the sync queue
319 * This is normally called from the host code's worker thread, but can be
320 * called manually if necessary.
321 * Must be called with the cdma lock held.
323 static void update_cdma_locked(struct host1x_cdma
*cdma
)
326 struct host1x
*host1x
= cdma_to_host1x(cdma
);
327 struct host1x_job
*job
, *n
;
329 /* If CDMA is stopped, queue is cleared and we can return */
334 * Walk the sync queue, reading the sync point registers as necessary,
335 * to consume as many sync queue entries as possible without blocking
337 list_for_each_entry_safe(job
, n
, &cdma
->sync_queue
, list
) {
338 struct host1x_syncpt
*sp
=
339 host1x_syncpt_get(host1x
, job
->syncpt_id
);
341 /* Check whether this syncpt has completed, and bail if not */
342 if (!host1x_syncpt_is_expired(sp
, job
->syncpt_end
)) {
343 /* Start timer on next pending syncpt */
345 cdma_start_timer_locked(cdma
, job
);
350 /* Cancel timeout, when a buffer completes */
351 if (cdma
->timeout
.client
)
352 stop_cdma_timer_locked(cdma
);
354 /* Unpin the memory */
355 host1x_job_unpin(job
);
357 /* Pop push buffer slots */
358 if (job
->num_slots
) {
359 struct push_buffer
*pb
= &cdma
->push_buffer
;
361 host1x_pushbuffer_pop(pb
, job
->num_slots
);
363 if (cdma
->event
== CDMA_EVENT_PUSH_BUFFER_SPACE
)
367 list_del(&job
->list
);
371 if (cdma
->event
== CDMA_EVENT_SYNC_QUEUE_EMPTY
&&
372 list_empty(&cdma
->sync_queue
))
376 cdma
->event
= CDMA_EVENT_NONE
;
377 complete(&cdma
->complete
);
381 void host1x_cdma_update_sync_queue(struct host1x_cdma
*cdma
,
384 struct host1x
*host1x
= cdma_to_host1x(cdma
);
385 u32 restart_addr
, syncpt_incrs
, syncpt_val
;
386 struct host1x_job
*job
, *next_job
= NULL
;
388 syncpt_val
= host1x_syncpt_load(cdma
->timeout
.syncpt
);
390 dev_dbg(dev
, "%s: starting cleanup (thresh %d)\n",
391 __func__
, syncpt_val
);
394 * Move the sync_queue read pointer to the first entry that hasn't
395 * completed based on the current HW syncpt value. It's likely there
396 * won't be any (i.e. we're still at the head), but covers the case
397 * where a syncpt incr happens just prior/during the teardown.
400 dev_dbg(dev
, "%s: skip completed buffers still in sync_queue\n",
403 list_for_each_entry(job
, &cdma
->sync_queue
, list
) {
404 if (syncpt_val
< job
->syncpt_end
) {
406 if (!list_is_last(&job
->list
, &cdma
->sync_queue
))
407 next_job
= list_next_entry(job
, list
);
412 host1x_job_dump(dev
, job
);
415 /* all jobs have been completed */
421 * Increment with CPU the remaining syncpts of a partially executed job.
423 * CDMA will continue execution starting with the next job or will get
427 restart_addr
= next_job
->first_get
;
429 restart_addr
= cdma
->last_pos
;
431 /* do CPU increments for the remaining syncpts */
433 dev_dbg(dev
, "%s: perform CPU incr on pending buffers\n",
436 /* won't need a timeout when replayed */
439 syncpt_incrs
= job
->syncpt_end
- syncpt_val
;
440 dev_dbg(dev
, "%s: CPU incr (%d)\n", __func__
, syncpt_incrs
);
442 host1x_job_dump(dev
, job
);
444 /* safe to use CPU to incr syncpts */
445 host1x_hw_cdma_timeout_cpu_incr(host1x
, cdma
, job
->first_get
,
446 syncpt_incrs
, job
->syncpt_end
,
449 dev_dbg(dev
, "%s: finished sync_queue modification\n",
453 /* roll back DMAGET and start up channel again */
454 host1x_hw_cdma_resume(host1x
, cdma
, restart_addr
);
460 int host1x_cdma_init(struct host1x_cdma
*cdma
)
464 mutex_init(&cdma
->lock
);
465 init_completion(&cdma
->complete
);
467 INIT_LIST_HEAD(&cdma
->sync_queue
);
469 cdma
->event
= CDMA_EVENT_NONE
;
470 cdma
->running
= false;
471 cdma
->torndown
= false;
473 err
= host1x_pushbuffer_init(&cdma
->push_buffer
);
483 int host1x_cdma_deinit(struct host1x_cdma
*cdma
)
485 struct push_buffer
*pb
= &cdma
->push_buffer
;
486 struct host1x
*host1x
= cdma_to_host1x(cdma
);
489 pr_warn("%s: CDMA still running\n", __func__
);
493 host1x_pushbuffer_destroy(pb
);
494 host1x_hw_cdma_timeout_destroy(host1x
, cdma
);
500 * Begin a cdma submit
502 int host1x_cdma_begin(struct host1x_cdma
*cdma
, struct host1x_job
*job
)
504 struct host1x
*host1x
= cdma_to_host1x(cdma
);
506 mutex_lock(&cdma
->lock
);
509 /* init state on first submit with timeout value */
510 if (!cdma
->timeout
.initialized
) {
513 err
= host1x_hw_cdma_timeout_init(host1x
, cdma
,
516 mutex_unlock(&cdma
->lock
);
523 host1x_hw_cdma_start(host1x
, cdma
);
525 cdma
->slots_free
= 0;
526 cdma
->slots_used
= 0;
527 cdma
->first_get
= cdma
->push_buffer
.pos
;
529 trace_host1x_cdma_begin(dev_name(job
->channel
->dev
));
534 * Push two words into a push buffer slot
535 * Blocks as necessary if the push buffer is full.
537 void host1x_cdma_push(struct host1x_cdma
*cdma
, u32 op1
, u32 op2
)
539 struct host1x
*host1x
= cdma_to_host1x(cdma
);
540 struct push_buffer
*pb
= &cdma
->push_buffer
;
541 u32 slots_free
= cdma
->slots_free
;
543 if (host1x_debug_trace_cmdbuf
)
544 trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma
)->dev
),
547 if (slots_free
== 0) {
548 host1x_hw_cdma_flush(host1x
, cdma
);
549 slots_free
= host1x_cdma_wait_locked(cdma
,
550 CDMA_EVENT_PUSH_BUFFER_SPACE
);
553 cdma
->slots_free
= slots_free
- 1;
555 host1x_pushbuffer_push(pb
, op1
, op2
);
559 * Push four words into two consecutive push buffer slots. Note that extra
560 * care needs to be taken not to split the two slots across the end of the
561 * push buffer. Otherwise the RESTART opcode at the end of the push buffer
562 * that ensures processing will restart at the beginning will break up the
565 * Blocks as necessary if the push buffer is full.
567 void host1x_cdma_push_wide(struct host1x_cdma
*cdma
, u32 op1
, u32 op2
,
570 struct host1x_channel
*channel
= cdma_to_channel(cdma
);
571 struct host1x
*host1x
= cdma_to_host1x(cdma
);
572 struct push_buffer
*pb
= &cdma
->push_buffer
;
573 unsigned int needed
= 2, extra
= 0, i
;
574 unsigned int space
= cdma
->slots_free
;
576 if (host1x_debug_trace_cmdbuf
)
577 trace_host1x_cdma_push_wide(dev_name(channel
->dev
), op1
, op2
,
580 /* compute number of extra slots needed for padding */
581 if (pb
->pos
+ 16 > pb
->size
) {
582 extra
= (pb
->size
- pb
->pos
) / 8;
586 host1x_cdma_wait_pushbuffer_space(host1x
, cdma
, needed
);
587 space
= host1x_pushbuffer_space(pb
);
589 cdma
->slots_free
= space
- needed
;
590 cdma
->slots_used
+= needed
;
593 * Note that we rely on the fact that this is only used to submit wide
594 * gather opcodes, which consist of 3 words, and they are padded with
595 * a NOP to avoid having to deal with fractional slots (a slot always
596 * represents 2 words). The fourth opcode passed to this function will
597 * therefore always be a NOP.
599 * This works around a slight ambiguity when it comes to opcodes. For
600 * all current host1x incarnations the NOP opcode uses the exact same
601 * encoding (0x20000000), so we could hard-code the value here, but a
602 * new incarnation may change it and break that assumption.
604 for (i
= 0; i
< extra
; i
++)
605 host1x_pushbuffer_push(pb
, op4
, op4
);
607 host1x_pushbuffer_push(pb
, op1
, op2
);
608 host1x_pushbuffer_push(pb
, op3
, op4
);
613 * Kick off DMA, add job to the sync queue, and a number of slots to be freed
614 * from the pushbuffer. The handles for a submit must all be pinned at the same
615 * time, but they can be unpinned in smaller chunks.
617 void host1x_cdma_end(struct host1x_cdma
*cdma
,
618 struct host1x_job
*job
)
620 struct host1x
*host1x
= cdma_to_host1x(cdma
);
621 bool idle
= list_empty(&cdma
->sync_queue
);
623 host1x_hw_cdma_flush(host1x
, cdma
);
625 job
->first_get
= cdma
->first_get
;
626 job
->num_slots
= cdma
->slots_used
;
628 list_add_tail(&job
->list
, &cdma
->sync_queue
);
630 /* start timer on idle -> active transitions */
631 if (job
->timeout
&& idle
)
632 cdma_start_timer_locked(cdma
, job
);
634 trace_host1x_cdma_end(dev_name(job
->channel
->dev
));
635 mutex_unlock(&cdma
->lock
);
639 * Update cdma state according to current sync point values
641 void host1x_cdma_update(struct host1x_cdma
*cdma
)
643 mutex_lock(&cdma
->lock
);
644 update_cdma_locked(cdma
);
645 mutex_unlock(&cdma
->lock
);