]> git.ipfire.org Git - thirdparty/openwrt.git/blob
d887d77c06c8d1a7bfee7d469b4b9fc50a5195f0
[thirdparty/openwrt.git] /
1 From a73f2a05b8c2a221a5ccdf674cd58ef3ae4508de Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
3 Date: Sat, 3 Oct 2015 22:22:55 +0200
4 Subject: [PATCH 0071/1085] dmaengine: bcm2835: Load driver early and support
5 legacy API
6 MIME-Version: 1.0
7 Content-Type: text/plain; charset=UTF-8
8 Content-Transfer-Encoding: 8bit
9
10 Load driver early since at least bcm2708_fb doesn't support deferred
11 probing and even if it did, we don't want the video driver deferred.
12 Support the legacy DMA API which is needed by bcm2708_fb.
13 Don't mask out channel 2.
14
15 Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
16
17 bcm2835-dma: Add support for per-channel flags
18
19 Add the ability to interpret the high bits of the dreq specifier as
20 flags to be included in the DMA_CS register. The motivation for this
21 change is the ability to set the DISDEBUG flag for SD card transfers
22 to avoid corruption when using the VPU debugger.
23
24 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
25
26 bcm2835-dma: Add proper 40-bit DMA support
27
28 BCM2711 has 4 DMA channels with a 40-bit address range, allowing them
29 to access the full 4GB of memory on a Pi 4.
30
31 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
32
33 bcm2835-dma: Derive slave DMA addresses correctly
34
35 Slave addresses for DMA are meant to be supplied as physical addresses
36 (contrary to what struct snd_dmaengine_dai_dma_data does). It is up to
37 the DMA controller driver to perform the translation based on its own
38 view of the world, as described in Device Tree.
39
40 Now that the Pi Device Trees have the correct peripheral mappings,
41 replace the hacky address munging with phys_to_dma().
42
43 Signed-off-by: Phil Elwell <phil@raspberrypi.com>
44
45 bcm2835-dma: Add NO_WAIT_RESP flag
46
47 Use bit 27 of the dreq value (the second cell of the DT DMA descriptor)
48 to request that the WAIT_RESP bit is not set.
49
50 Signed-off-by: Phil Elwell <phil@raspberrypi.com>
51
52 bcm2835-dma: Advertise the full DMA range
53
54 Unless the DMA mask is set wider than 32 bits, DMA mapping will use a
55 bounce buffer.
56
57 Signed-off-by: Phil Elwell <phil@raspberrypi.com>
58
59 bcm2835-dma: only reserve channel 0 if legacy dma driver is enabled
60
61 If CONFIG_DMA_BCM2708 isn't enabled there's no need to mask out
62 one of the already scarce DMA channels.
63
64 Signed-off-by: Matthias Reichl <hias@horus.com>
65
66 bcm2835-dma: Avoid losing CS flags after interrupt
67
68 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
69
70 bcm2835-dma: Add bcm2835-dma: Add DMA_WIDE_SOURCE and DMA_WIDE_DEST flags
71
72 Use (reserved) bits 24 and 25 of the dreq value
73 (the second cell of the DT DMA descriptor) to request
74 that wide source reads or wide dest writes are required
75
76 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
77
78 dmaengine: bcm2835: Fix position reporting for 40 bits channels
79
80 For 40 bits channels, the position is reported by reading the upper byte
81 in the SRCI/DESTI registers. However the driver adds that upper byte
82 with an 8-bits left shift, while it should be 32.
83
84 Fixes: 9a52a9918306 ("bcm2835-dma: Add proper 40-bit DMA support")
85 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
86
87 dmaengine: bcm2835: Use to_bcm2711_cbaddr where relevant
88
89 bcm2711_dma40_memcpy has some code strictly equivalent to the
90 to_bcm2711_cbaddr() function. Let's use it instead.
91
92 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
93
94 dmaengine: bcm2835: Fix descriptors usage for 40-bits channels
95
96 The bcm2835_dma_create_cb_chain() function is in charge of building up
97 the descriptors chain for a given transfer.
98
99 It was initially supporting only the BCM2835-style DMA controller, and
100 was later expanded to support controllers with 40-bits channels that use
101 a different descriptor layout.
102
103 However, some part of the function only use the old style descriptor,
104 even when building a chain of new-style descriptors, resulting in weird
105 bugs.
106
107 Fixes: 9a52a9918306 ("bcm2835-dma: Add proper 40-bit DMA support")
108 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
109
110 bcm2835-dma: Fix WAIT_RESP on memcpy
111
112 It goes in info not extra
113
114 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
115
116 bcm2835-dma: Fix dma_abort for 40-bit channels
117
118 It wasn't aborting the transfer and caused stop/start
119 of hdmi audio dma to be unreliable.
120
121 New sequence approved by Broadcom.
122
123 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
124
125 bcm2835-dma: Fix dma_abort for non-40bit channels
126
127 The sequence we were doing was not safe.
128
129 Clearing CS meant BCM2835_DMA_WAIT_FOR_WRITES was cleared
130 and so polling BCM2835_DMA_WAITING_FOR_WRITES has no benefit
131
132 Broadcom have provided a recommended sequence to abort
133 a dma lite channel, so switch to that.
134
135 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
136
137 bcm2835-dma: Support dma flags for multi-beat burst
138
139 Add a control bit to enable a multi-beat burst on a DMA.
140 This improves DMA performance and is required for HDMI audio.
141
142 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
143
144 bcm2835-dma: Need to keep PROT bits set in CS on 40bit controller
145
146 Resetting them to zero puts DMA channel into secure mode
147 which makes further accesses impossible
148
149 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
150 ---
151 drivers/dma/Kconfig | 2 +-
152 drivers/dma/bcm2835-dma.c | 714 ++++++++++++++++++++++++++++++++------
153 2 files changed, 602 insertions(+), 114 deletions(-)
154
155 --- a/drivers/dma/Kconfig
156 +++ b/drivers/dma/Kconfig
157 @@ -136,7 +136,7 @@ config BCM_SBA_RAID
158
159 config DMA_BCM2835
160 tristate "BCM2835 DMA engine support"
161 - depends on ARCH_BCM2835
162 + depends on ARCH_BCM2835 || ARCH_BCM2708 || ARCH_BCM2709
163 select DMA_ENGINE
164 select DMA_VIRTUAL_CHANNELS
165
166 --- a/drivers/dma/bcm2835-dma.c
167 +++ b/drivers/dma/bcm2835-dma.c
168 @@ -18,6 +18,7 @@
169 * Copyright 2012 Marvell International Ltd.
170 */
171 #include <linux/dmaengine.h>
172 +#include <linux/dma-direct.h>
173 #include <linux/dma-mapping.h>
174 #include <linux/dmapool.h>
175 #include <linux/err.h>
176 @@ -25,6 +26,7 @@
177 #include <linux/interrupt.h>
178 #include <linux/list.h>
179 #include <linux/module.h>
180 +#include <linux/platform_data/dma-bcm2708.h>
181 #include <linux/platform_device.h>
182 #include <linux/slab.h>
183 #include <linux/io.h>
184 @@ -36,6 +38,13 @@
185
186 #define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14
187 #define BCM2835_DMA_CHAN_NAME_SIZE 8
188 +#define BCM2835_DMA_BULK_MASK BIT(0)
189 +#define BCM2711_DMA_MEMCPY_CHAN 14
190 +
191 +struct bcm2835_dma_cfg_data {
192 + u64 dma_mask;
193 + u32 chan_40bit_mask;
194 +};
195
196 /**
197 * struct bcm2835_dmadev - BCM2835 DMA controller
198 @@ -48,6 +57,7 @@ struct bcm2835_dmadev {
199 struct dma_device ddev;
200 void __iomem *base;
201 dma_addr_t zero_page;
202 + const struct bcm2835_dma_cfg_data *cfg_data;
203 };
204
205 struct bcm2835_dma_cb {
206 @@ -60,6 +70,17 @@ struct bcm2835_dma_cb {
207 uint32_t pad[2];
208 };
209
210 +struct bcm2711_dma40_scb {
211 + uint32_t ti;
212 + uint32_t src;
213 + uint32_t srci;
214 + uint32_t dst;
215 + uint32_t dsti;
216 + uint32_t len;
217 + uint32_t next_cb;
218 + uint32_t rsvd;
219 +};
220 +
221 struct bcm2835_cb_entry {
222 struct bcm2835_dma_cb *cb;
223 dma_addr_t paddr;
224 @@ -80,6 +101,7 @@ struct bcm2835_chan {
225 unsigned int irq_flags;
226
227 bool is_lite_channel;
228 + bool is_40bit_channel;
229 };
230
231 struct bcm2835_desc {
232 @@ -136,11 +158,37 @@ struct bcm2835_desc {
233 #define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */
234 #define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */
235 #define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */
236 -#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12)
237 +#define BCM2835_DMA_BURST_LENGTH(x) (((x) & 15) << 12)
238 +#define BCM2835_DMA_GET_BURST_LENGTH(x) (((x) >> 12) & 15)
239 +#define BCM2835_DMA_CS_FLAGS(x) (x & (BCM2835_DMA_PRIORITY(15) | \
240 + BCM2835_DMA_PANIC_PRIORITY(15) | \
241 + BCM2835_DMA_WAIT_FOR_WRITES | \
242 + BCM2835_DMA_DIS_DEBUG))
243 #define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */
244 #define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */
245 #define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */
246
247 +/* A fake bit to request that the driver doesn't set the WAIT_RESP bit. */
248 +#define BCM2835_DMA_NO_WAIT_RESP BIT(27)
249 +#define WAIT_RESP(x) ((x & BCM2835_DMA_NO_WAIT_RESP) ? \
250 + 0 : BCM2835_DMA_WAIT_RESP)
251 +
252 +/* A fake bit to request that the driver requires wide reads */
253 +#define BCM2835_DMA_WIDE_SOURCE BIT(24)
254 +#define WIDE_SOURCE(x) ((x & BCM2835_DMA_WIDE_SOURCE) ? \
255 + BCM2835_DMA_S_WIDTH : 0)
256 +
257 +/* A fake bit to request that the driver requires wide writes */
258 +#define BCM2835_DMA_WIDE_DEST BIT(25)
259 +#define WIDE_DEST(x) ((x & BCM2835_DMA_WIDE_DEST) ? \
260 + BCM2835_DMA_D_WIDTH : 0)
261 +
262 +/* A fake bit to request that the driver requires multi-beat burst */
263 +#define BCM2835_DMA_BURST BIT(30)
264 +#define BURST_LENGTH(x) ((x & BCM2835_DMA_BURST) ? \
265 + BCM2835_DMA_BURST_LENGTH(3) : 0)
266 +
267 +
268 /* debug register bits */
269 #define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0)
270 #define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1)
271 @@ -165,13 +213,124 @@ struct bcm2835_desc {
272 #define BCM2835_DMA_DATA_TYPE_S128 16
273
274 /* Valid only for channels 0 - 14, 15 has its own base address */
275 -#define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */
276 +#define BCM2835_DMA_CHAN_SIZE 0x100
277 +#define BCM2835_DMA_CHAN(n) ((n) * BCM2835_DMA_CHAN_SIZE) /* Base address */
278 #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
279
280 /* the max dma length for different channels */
281 #define MAX_DMA_LEN SZ_1G
282 #define MAX_LITE_DMA_LEN (SZ_64K - 4)
283
284 +/* 40-bit DMA support */
285 +#define BCM2711_DMA40_CS 0x00
286 +#define BCM2711_DMA40_CB 0x04
287 +#define BCM2711_DMA40_DEBUG 0x0c
288 +#define BCM2711_DMA40_TI 0x10
289 +#define BCM2711_DMA40_SRC 0x14
290 +#define BCM2711_DMA40_SRCI 0x18
291 +#define BCM2711_DMA40_DEST 0x1c
292 +#define BCM2711_DMA40_DESTI 0x20
293 +#define BCM2711_DMA40_LEN 0x24
294 +#define BCM2711_DMA40_NEXT_CB 0x28
295 +#define BCM2711_DMA40_DEBUG2 0x2c
296 +
297 +#define BCM2711_DMA40_ACTIVE BIT(0)
298 +#define BCM2711_DMA40_END BIT(1)
299 +#define BCM2711_DMA40_INT BIT(2)
300 +#define BCM2711_DMA40_DREQ BIT(3) /* DREQ state */
301 +#define BCM2711_DMA40_RD_PAUSED BIT(4) /* Reading is paused */
302 +#define BCM2711_DMA40_WR_PAUSED BIT(5) /* Writing is paused */
303 +#define BCM2711_DMA40_DREQ_PAUSED BIT(6) /* Is paused by DREQ flow control */
304 +#define BCM2711_DMA40_WAITING_FOR_WRITES BIT(7) /* Waiting for last write */
305 +// we always want to run in supervisor mode
306 +#define BCM2711_DMA40_PROT (BIT(8)|BIT(9))
307 +#define BCM2711_DMA40_ERR BIT(10)
308 +#define BCM2711_DMA40_QOS(x) (((x) & 0x1f) << 16)
309 +#define BCM2711_DMA40_PANIC_QOS(x) (((x) & 0x1f) << 20)
310 +#define BCM2711_DMA40_TRANSACTIONS BIT(25)
311 +#define BCM2711_DMA40_WAIT_FOR_WRITES BIT(28)
312 +#define BCM2711_DMA40_DISDEBUG BIT(29)
313 +#define BCM2711_DMA40_ABORT BIT(30)
314 +#define BCM2711_DMA40_HALT BIT(31)
315 +
316 +#define BCM2711_DMA40_CS_FLAGS(x) (x & (BCM2711_DMA40_QOS(15) | \
317 + BCM2711_DMA40_PANIC_QOS(15) | \
318 + BCM2711_DMA40_WAIT_FOR_WRITES | \
319 + BCM2711_DMA40_DISDEBUG))
320 +
321 +/* Transfer information bits */
322 +#define BCM2711_DMA40_INTEN BIT(0)
323 +#define BCM2711_DMA40_TDMODE BIT(1) /* 2D-Mode */
324 +#define BCM2711_DMA40_WAIT_RESP BIT(2) /* wait for AXI write to be acked */
325 +#define BCM2711_DMA40_WAIT_RD_RESP BIT(3) /* wait for AXI read to complete */
326 +#define BCM2711_DMA40_PER_MAP(x) ((x & 31) << 9) /* REQ source */
327 +#define BCM2711_DMA40_S_DREQ BIT(14) /* enable SREQ for source */
328 +#define BCM2711_DMA40_D_DREQ BIT(15) /* enable DREQ for destination */
329 +#define BCM2711_DMA40_S_WAIT(x) ((x & 0xff) << 16) /* add DMA read-wait cycles */
330 +#define BCM2711_DMA40_D_WAIT(x) ((x & 0xff) << 24) /* add DMA write-wait cycles */
331 +
332 +/* debug register bits */
333 +#define BCM2711_DMA40_DEBUG_WRITE_ERR BIT(0)
334 +#define BCM2711_DMA40_DEBUG_FIFO_ERR BIT(1)
335 +#define BCM2711_DMA40_DEBUG_READ_ERR BIT(2)
336 +#define BCM2711_DMA40_DEBUG_READ_CB_ERR BIT(3)
337 +#define BCM2711_DMA40_DEBUG_IN_ON_ERR BIT(8)
338 +#define BCM2711_DMA40_DEBUG_ABORT_ON_ERR BIT(9)
339 +#define BCM2711_DMA40_DEBUG_HALT_ON_ERR BIT(10)
340 +#define BCM2711_DMA40_DEBUG_DISABLE_CLK_GATE BIT(11)
341 +#define BCM2711_DMA40_DEBUG_RSTATE_SHIFT 14
342 +#define BCM2711_DMA40_DEBUG_RSTATE_BITS 4
343 +#define BCM2711_DMA40_DEBUG_WSTATE_SHIFT 18
344 +#define BCM2711_DMA40_DEBUG_WSTATE_BITS 4
345 +#define BCM2711_DMA40_DEBUG_RESET BIT(23)
346 +#define BCM2711_DMA40_DEBUG_ID_SHIFT 24
347 +#define BCM2711_DMA40_DEBUG_ID_BITS 4
348 +#define BCM2711_DMA40_DEBUG_VERSION_SHIFT 28
349 +#define BCM2711_DMA40_DEBUG_VERSION_BITS 4
350 +
351 +/* Valid only for channels 0 - 3 (11 - 14) */
352 +#define BCM2711_DMA40_CHAN(n) (((n) + 11) << 8) /* Base address */
353 +#define BCM2711_DMA40_CHANIO(base, n) ((base) + BCM2711_DMA_CHAN(n))
354 +
355 +/* the max dma length for different channels */
356 +#define MAX_DMA40_LEN SZ_1G
357 +
358 +#define BCM2711_DMA40_BURST_LEN(x) (((x) & 15) << 8)
359 +#define BCM2711_DMA40_INC BIT(12)
360 +#define BCM2711_DMA40_SIZE_32 (0 << 13)
361 +#define BCM2711_DMA40_SIZE_64 (1 << 13)
362 +#define BCM2711_DMA40_SIZE_128 (2 << 13)
363 +#define BCM2711_DMA40_SIZE_256 (3 << 13)
364 +#define BCM2711_DMA40_IGNORE BIT(15)
365 +#define BCM2711_DMA40_STRIDE(x) ((x) << 16) /* For 2D mode */
366 +
367 +#define BCM2711_DMA40_MEMCPY_FLAGS \
368 + (BCM2711_DMA40_QOS(0) | \
369 + BCM2711_DMA40_PANIC_QOS(0) | \
370 + BCM2711_DMA40_WAIT_FOR_WRITES | \
371 + BCM2711_DMA40_DISDEBUG)
372 +
373 +#define BCM2711_DMA40_MEMCPY_XFER_INFO \
374 + (BCM2711_DMA40_SIZE_128 | \
375 + BCM2711_DMA40_INC | \
376 + BCM2711_DMA40_BURST_LEN(16))
377 +
378 +struct bcm2835_dmadev *memcpy_parent;
379 +static void __iomem *memcpy_chan;
380 +static struct bcm2711_dma40_scb *memcpy_scb;
381 +static dma_addr_t memcpy_scb_dma;
382 +DEFINE_SPINLOCK(memcpy_lock);
383 +
384 +static const struct bcm2835_dma_cfg_data bcm2835_dma_cfg = {
385 + .chan_40bit_mask = 0,
386 + .dma_mask = DMA_BIT_MASK(32),
387 +};
388 +
389 +static const struct bcm2835_dma_cfg_data bcm2711_dma_cfg = {
390 + .chan_40bit_mask = BIT(11) | BIT(12) | BIT(13) | BIT(14),
391 + .dma_mask = DMA_BIT_MASK(36),
392 +};
393 +
394 static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
395 {
396 /* lite and normal channels have different max frame length */
397 @@ -201,6 +360,36 @@ static inline struct bcm2835_desc *to_bc
398 return container_of(t, struct bcm2835_desc, vd.tx);
399 }
400
401 +static inline uint32_t to_bcm2711_ti(uint32_t info)
402 +{
403 + return ((info & BCM2835_DMA_INT_EN) ? BCM2711_DMA40_INTEN : 0) |
404 + ((info & BCM2835_DMA_WAIT_RESP) ? BCM2711_DMA40_WAIT_RESP : 0) |
405 + ((info & BCM2835_DMA_S_DREQ) ?
406 + (BCM2711_DMA40_S_DREQ | BCM2711_DMA40_WAIT_RD_RESP) : 0) |
407 + ((info & BCM2835_DMA_D_DREQ) ? BCM2711_DMA40_D_DREQ : 0) |
408 + BCM2711_DMA40_PER_MAP((info >> 16) & 0x1f);
409 +}
410 +
411 +static inline uint32_t to_bcm2711_srci(uint32_t info)
412 +{
413 + return ((info & BCM2835_DMA_S_INC) ? BCM2711_DMA40_INC : 0) |
414 + ((info & BCM2835_DMA_S_WIDTH) ? BCM2711_DMA40_SIZE_128 : 0) |
415 + BCM2711_DMA40_BURST_LEN(BCM2835_DMA_GET_BURST_LENGTH(info));
416 +}
417 +
418 +static inline uint32_t to_bcm2711_dsti(uint32_t info)
419 +{
420 + return ((info & BCM2835_DMA_D_INC) ? BCM2711_DMA40_INC : 0) |
421 + ((info & BCM2835_DMA_D_WIDTH) ? BCM2711_DMA40_SIZE_128 : 0) |
422 + BCM2711_DMA40_BURST_LEN(BCM2835_DMA_GET_BURST_LENGTH(info));
423 +}
424 +
425 +static inline uint32_t to_bcm2711_cbaddr(dma_addr_t addr)
426 +{
427 + BUG_ON(addr & 0x1f);
428 + return (addr >> 5);
429 +}
430 +
431 static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc)
432 {
433 size_t i;
434 @@ -219,45 +408,53 @@ static void bcm2835_dma_desc_free(struct
435 }
436
437 static void bcm2835_dma_create_cb_set_length(
438 - struct bcm2835_chan *chan,
439 + struct bcm2835_chan *c,
440 struct bcm2835_dma_cb *control_block,
441 size_t len,
442 size_t period_len,
443 size_t *total_len,
444 u32 finalextrainfo)
445 {
446 - size_t max_len = bcm2835_dma_max_frame_length(chan);
447 + size_t max_len = bcm2835_dma_max_frame_length(c);
448 + uint32_t cb_len;
449
450 /* set the length taking lite-channel limitations into account */
451 - control_block->length = min_t(u32, len, max_len);
452 + cb_len = min_t(u32, len, max_len);
453
454 - /* finished if we have no period_length */
455 - if (!period_len)
456 - return;
457 + if (period_len) {
458 + /*
459 + * period_len means: that we need to generate
460 + * transfers that are terminating at every
461 + * multiple of period_len - this is typically
462 + * used to set the interrupt flag in info
463 + * which is required during cyclic transfers
464 + */
465
466 - /*
467 - * period_len means: that we need to generate
468 - * transfers that are terminating at every
469 - * multiple of period_len - this is typically
470 - * used to set the interrupt flag in info
471 - * which is required during cyclic transfers
472 - */
473 + /* have we filled in period_length yet? */
474 + if (*total_len + cb_len < period_len) {
475 + /* update number of bytes in this period so far */
476 + *total_len += cb_len;
477 + } else {
478 + /* calculate the length that remains to reach period_len */
479 + cb_len = period_len - *total_len;
480
481 - /* have we filled in period_length yet? */
482 - if (*total_len + control_block->length < period_len) {
483 - /* update number of bytes in this period so far */
484 - *total_len += control_block->length;
485 - return;
486 + /* reset total_length for next period */
487 + *total_len = 0;
488 + }
489 }
490
491 - /* calculate the length that remains to reach period_length */
492 - control_block->length = period_len - *total_len;
493 -
494 - /* reset total_length for next period */
495 - *total_len = 0;
496 -
497 - /* add extrainfo bits in info */
498 - control_block->info |= finalextrainfo;
499 + if (c->is_40bit_channel) {
500 + struct bcm2711_dma40_scb *scb =
501 + (struct bcm2711_dma40_scb *)control_block;
502 +
503 + scb->len = cb_len;
504 + /* add extrainfo bits to ti */
505 + scb->ti |= to_bcm2711_ti(finalextrainfo);
506 + } else {
507 + control_block->length = cb_len;
508 + /* add extrainfo bits to info */
509 + control_block->info |= finalextrainfo;
510 + }
511 }
512
513 static inline size_t bcm2835_dma_count_frames_for_sg(
514 @@ -280,7 +477,7 @@ static inline size_t bcm2835_dma_count_f
515 /**
516 * bcm2835_dma_create_cb_chain - create a control block and fills data in
517 *
518 - * @chan: the @dma_chan for which we run this
519 + * @c: the @bcm2835_chan for which we run this
520 * @direction: the direction in which we transfer
521 * @cyclic: it is a cyclic transfer
522 * @info: the default info bits to apply per controlblock
523 @@ -298,12 +495,11 @@ static inline size_t bcm2835_dma_count_f
524 * @gfp: the GFP flag to use for allocation
525 */
526 static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
527 - struct dma_chan *chan, enum dma_transfer_direction direction,
528 + struct bcm2835_chan *c, enum dma_transfer_direction direction,
529 bool cyclic, u32 info, u32 finalextrainfo, size_t frames,
530 dma_addr_t src, dma_addr_t dst, size_t buf_len,
531 size_t period_len, gfp_t gfp)
532 {
533 - struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
534 size_t len = buf_len, total_len;
535 size_t frame;
536 struct bcm2835_desc *d;
537 @@ -335,11 +531,23 @@ static struct bcm2835_desc *bcm2835_dma_
538
539 /* fill in the control block */
540 control_block = cb_entry->cb;
541 - control_block->info = info;
542 - control_block->src = src;
543 - control_block->dst = dst;
544 - control_block->stride = 0;
545 - control_block->next = 0;
546 + if (c->is_40bit_channel) {
547 + struct bcm2711_dma40_scb *scb =
548 + (struct bcm2711_dma40_scb *)control_block;
549 + scb->ti = to_bcm2711_ti(info);
550 + scb->src = lower_32_bits(src);
551 + scb->srci= upper_32_bits(src) | to_bcm2711_srci(info);
552 + scb->dst = lower_32_bits(dst);
553 + scb->dsti = upper_32_bits(dst) | to_bcm2711_dsti(info);
554 + scb->next_cb = 0;
555 + } else {
556 + control_block->info = info;
557 + control_block->src = src;
558 + control_block->dst = dst;
559 + control_block->stride = 0;
560 + control_block->next = 0;
561 + }
562 +
563 /* set up length in control_block if requested */
564 if (buf_len) {
565 /* calculate length honoring period_length */
566 @@ -349,25 +557,51 @@ static struct bcm2835_desc *bcm2835_dma_
567 cyclic ? finalextrainfo : 0);
568
569 /* calculate new remaining length */
570 - len -= control_block->length;
571 + if (c->is_40bit_channel)
572 + len -= ((struct bcm2711_dma40_scb *)control_block)->len;
573 + else
574 + len -= control_block->length;
575 }
576
577 /* link this the last controlblock */
578 - if (frame)
579 + if (frame && c->is_40bit_channel)
580 + ((struct bcm2711_dma40_scb *)
581 + d->cb_list[frame - 1].cb)->next_cb =
582 + to_bcm2711_cbaddr(cb_entry->paddr);
583 + if (frame && !c->is_40bit_channel)
584 d->cb_list[frame - 1].cb->next = cb_entry->paddr;
585
586 /* update src and dst and length */
587 - if (src && (info & BCM2835_DMA_S_INC))
588 - src += control_block->length;
589 - if (dst && (info & BCM2835_DMA_D_INC))
590 - dst += control_block->length;
591 + if (src && (info & BCM2835_DMA_S_INC)) {
592 + if (c->is_40bit_channel)
593 + src += ((struct bcm2711_dma40_scb *)control_block)->len;
594 + else
595 + src += control_block->length;
596 + }
597 +
598 + if (dst && (info & BCM2835_DMA_D_INC)) {
599 + if (c->is_40bit_channel)
600 + dst += ((struct bcm2711_dma40_scb *)control_block)->len;
601 + else
602 + dst += control_block->length;
603 + }
604
605 /* Length of total transfer */
606 - d->size += control_block->length;
607 + if (c->is_40bit_channel)
608 + d->size += ((struct bcm2711_dma40_scb *)control_block)->len;
609 + else
610 + d->size += control_block->length;
611 }
612
613 /* the last frame requires extra flags */
614 - d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
615 + if (c->is_40bit_channel) {
616 + struct bcm2711_dma40_scb *scb =
617 + (struct bcm2711_dma40_scb *)d->cb_list[d->frames-1].cb;
618 +
619 + scb->ti |= to_bcm2711_ti(finalextrainfo);
620 + } else {
621 + d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
622 + }
623
624 /* detect a size missmatch */
625 if (buf_len && (d->size != buf_len))
626 @@ -381,13 +615,12 @@ error_cb:
627 }
628
629 static void bcm2835_dma_fill_cb_chain_with_sg(
630 - struct dma_chan *chan,
631 + struct bcm2835_chan *c,
632 enum dma_transfer_direction direction,
633 struct bcm2835_cb_entry *cb,
634 struct scatterlist *sgl,
635 unsigned int sg_len)
636 {
637 - struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
638 size_t len, max_len;
639 unsigned int i;
640 dma_addr_t addr;
641 @@ -395,14 +628,35 @@ static void bcm2835_dma_fill_cb_chain_wi
642
643 max_len = bcm2835_dma_max_frame_length(c);
644 for_each_sg(sgl, sgent, sg_len, i) {
645 - for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
646 - len > 0;
647 - addr += cb->cb->length, len -= cb->cb->length, cb++) {
648 - if (direction == DMA_DEV_TO_MEM)
649 - cb->cb->dst = addr;
650 - else
651 - cb->cb->src = addr;
652 - cb->cb->length = min(len, max_len);
653 + if (c->is_40bit_channel) {
654 + struct bcm2711_dma40_scb *scb;
655 +
656 + for (addr = sg_dma_address(sgent),
657 + len = sg_dma_len(sgent);
658 + len > 0;
659 + addr += scb->len, len -= scb->len, cb++) {
660 + scb = (struct bcm2711_dma40_scb *)cb->cb;
661 + if (direction == DMA_DEV_TO_MEM) {
662 + scb->dst = lower_32_bits(addr);
663 + scb->dsti = upper_32_bits(addr) | BCM2711_DMA40_INC;
664 + } else {
665 + scb->src = lower_32_bits(addr);
666 + scb->srci = upper_32_bits(addr) | BCM2711_DMA40_INC;
667 + }
668 + scb->len = min(len, max_len);
669 + }
670 + } else {
671 + for (addr = sg_dma_address(sgent),
672 + len = sg_dma_len(sgent);
673 + len > 0;
674 + addr += cb->cb->length, len -= cb->cb->length,
675 + cb++) {
676 + if (direction == DMA_DEV_TO_MEM)
677 + cb->cb->dst = addr;
678 + else
679 + cb->cb->src = addr;
680 + cb->cb->length = min(len, max_len);
681 + }
682 }
683 }
684 }
685 @@ -410,29 +664,74 @@ static void bcm2835_dma_fill_cb_chain_wi
686 static void bcm2835_dma_abort(struct bcm2835_chan *c)
687 {
688 void __iomem *chan_base = c->chan_base;
689 - long int timeout = 10000;
690 + long timeout = 100;
691
692 - /*
693 - * A zero control block address means the channel is idle.
694 - * (The ACTIVE flag in the CS register is not a reliable indicator.)
695 - */
696 - if (!readl(chan_base + BCM2835_DMA_ADDR))
697 - return;
698 + if (c->is_40bit_channel) {
699 + /*
700 + * A zero control block address means the channel is idle.
701 + * (The ACTIVE flag in the CS register is not a reliable indicator.)
702 + */
703 + if (!readl(chan_base + BCM2711_DMA40_CB))
704 + return;
705 +
706 + /* Pause the current DMA */
707 + writel(readl(chan_base + BCM2711_DMA40_CS) & ~BCM2711_DMA40_ACTIVE,
708 + chan_base + BCM2711_DMA40_CS);
709 +
710 + /* wait for outstanding transactions to complete */
711 + while ((readl(chan_base + BCM2711_DMA40_CS) & BCM2711_DMA40_TRANSACTIONS) &&
712 + --timeout)
713 + cpu_relax();
714 +
715 + /* Peripheral might be stuck and fail to complete */
716 + if (!timeout)
717 + dev_err(c->vc.chan.device->dev,
718 + "failed to complete pause on dma %d (CS:%08x)\n", c->ch,
719 + readl(chan_base + BCM2711_DMA40_CS));
720 +
721 + /* Set CS back to default state */
722 + writel(BCM2711_DMA40_PROT, chan_base + BCM2711_DMA40_CS);
723 +
724 + /* Reset the DMA */
725 + writel(readl(chan_base + BCM2711_DMA40_DEBUG) | BCM2711_DMA40_DEBUG_RESET,
726 + chan_base + BCM2711_DMA40_DEBUG);
727 + } else {
728 + /*
729 + * A zero control block address means the channel is idle.
730 + * (The ACTIVE flag in the CS register is not a reliable indicator.)
731 + */
732 + if (!readl(chan_base + BCM2835_DMA_ADDR))
733 + return;
734
735 - /* Write 0 to the active bit - Pause the DMA */
736 - writel(0, chan_base + BCM2835_DMA_CS);
737 + /* We need to clear the next DMA block pending */
738 + writel(0, chan_base + BCM2835_DMA_NEXTCB);
739
740 - /* Wait for any current AXI transfer to complete */
741 - while ((readl(chan_base + BCM2835_DMA_CS) &
742 - BCM2835_DMA_WAITING_FOR_WRITES) && --timeout)
743 - cpu_relax();
744 + /* Abort the DMA, which needs to be enabled to complete */
745 + writel(readl(chan_base + BCM2835_DMA_CS) | BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE,
746 + chan_base + BCM2835_DMA_CS);
747 +
748 + /* wait for DMA to be aborted */
749 + while ((readl(chan_base + BCM2835_DMA_CS) & BCM2835_DMA_ABORT) && --timeout)
750 + cpu_relax();
751 +
752 + /* Write 0 to the active bit - Pause the DMA */
753 + writel(readl(chan_base + BCM2835_DMA_CS) & ~BCM2835_DMA_ACTIVE,
754 + chan_base + BCM2835_DMA_CS);
755
756 - /* Peripheral might be stuck and fail to signal AXI write responses */
757 - if (!timeout)
758 - dev_err(c->vc.chan.device->dev,
759 - "failed to complete outstanding writes\n");
760 + /*
761 + * Peripheral might be stuck and fail to complete
762 + * This is expected when dreqs are enabled but not asserted
763 + * so only report error in non dreq case
764 + */
765 + if (!timeout && !(readl(chan_base + BCM2835_DMA_TI) &
766 + (BCM2835_DMA_S_DREQ | BCM2835_DMA_D_DREQ)))
767 + dev_err(c->vc.chan.device->dev,
768 + "failed to complete pause on dma %d (CS:%08x)\n", c->ch,
769 + readl(chan_base + BCM2835_DMA_CS));
770
771 - writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS);
772 + /* Set CS back to default state and reset the DMA */
773 + writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS);
774 + }
775 }
776
777 static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
778 @@ -449,8 +748,16 @@ static void bcm2835_dma_start_desc(struc
779
780 c->desc = d = to_bcm2835_dma_desc(&vd->tx);
781
782 - writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
783 - writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
784 + if (c->is_40bit_channel) {
785 + writel(to_bcm2711_cbaddr(d->cb_list[0].paddr),
786 + c->chan_base + BCM2711_DMA40_CB);
787 + writel(BCM2711_DMA40_ACTIVE | BCM2711_DMA40_PROT | BCM2711_DMA40_CS_FLAGS(c->dreq),
788 + c->chan_base + BCM2711_DMA40_CS);
789 + } else {
790 + writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
791 + writel(BCM2835_DMA_ACTIVE | BCM2835_DMA_CS_FLAGS(c->dreq),
792 + c->chan_base + BCM2835_DMA_CS);
793 + }
794 }
795
796 static irqreturn_t bcm2835_dma_callback(int irq, void *data)
797 @@ -477,8 +784,13 @@ static irqreturn_t bcm2835_dma_callback(
798 * if this IRQ handler is threaded.) If the channel is finished, it
799 * will remain idle despite the ACTIVE flag being set.
800 */
801 - writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE,
802 - c->chan_base + BCM2835_DMA_CS);
803 + if (c->is_40bit_channel)
804 + writel(BCM2835_DMA_INT | BCM2711_DMA40_ACTIVE | BCM2711_DMA40_PROT |
805 + BCM2711_DMA40_CS_FLAGS(c->dreq),
806 + c->chan_base + BCM2711_DMA40_CS);
807 + else
808 + writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE | BCM2835_DMA_CS_FLAGS(c->dreq),
809 + c->chan_base + BCM2835_DMA_CS);
810
811 d = c->desc;
812
813 @@ -540,20 +852,39 @@ static size_t bcm2835_dma_desc_size_pos(
814 unsigned int i;
815 size_t size;
816
817 - for (size = i = 0; i < d->frames; i++) {
818 - struct bcm2835_dma_cb *control_block = d->cb_list[i].cb;
819 - size_t this_size = control_block->length;
820 - dma_addr_t dma;
821 + if (d->c->is_40bit_channel) {
822 + for (size = i = 0; i < d->frames; i++) {
823 + struct bcm2711_dma40_scb *control_block =
824 + (struct bcm2711_dma40_scb *)d->cb_list[i].cb;
825 + size_t this_size = control_block->len;
826 + dma_addr_t dma;
827
828 - if (d->dir == DMA_DEV_TO_MEM)
829 - dma = control_block->dst;
830 - else
831 - dma = control_block->src;
832 + if (d->dir == DMA_DEV_TO_MEM)
833 + dma = control_block->dst;
834 + else
835 + dma = control_block->src;
836 +
837 + if (size)
838 + size += this_size;
839 + else if (addr >= dma && addr < dma + this_size)
840 + size += dma + this_size - addr;
841 + }
842 + } else {
843 + for (size = i = 0; i < d->frames; i++) {
844 + struct bcm2835_dma_cb *control_block = d->cb_list[i].cb;
845 + size_t this_size = control_block->length;
846 + dma_addr_t dma;
847 +
848 + if (d->dir == DMA_DEV_TO_MEM)
849 + dma = control_block->dst;
850 + else
851 + dma = control_block->src;
852
853 - if (size)
854 - size += this_size;
855 - else if (addr >= dma && addr < dma + this_size)
856 - size += dma + this_size - addr;
857 + if (size)
858 + size += this_size;
859 + else if (addr >= dma && addr < dma + this_size)
860 + size += dma + this_size - addr;
861 + }
862 }
863
864 return size;
865 @@ -580,12 +911,25 @@ static enum dma_status bcm2835_dma_tx_st
866 struct bcm2835_desc *d = c->desc;
867 dma_addr_t pos;
868
869 - if (d->dir == DMA_MEM_TO_DEV)
870 + if (d->dir == DMA_MEM_TO_DEV && c->is_40bit_channel) {
871 + u64 lo_bits, hi_bits;
872 +
873 + lo_bits = readl(c->chan_base + BCM2711_DMA40_SRC);
874 + hi_bits = readl(c->chan_base + BCM2711_DMA40_SRCI) & 0xff;
875 + pos = (hi_bits << 32) | lo_bits;
876 + } else if (d->dir == DMA_MEM_TO_DEV && !c->is_40bit_channel) {
877 pos = readl(c->chan_base + BCM2835_DMA_SOURCE_AD);
878 - else if (d->dir == DMA_DEV_TO_MEM)
879 + } else if (d->dir == DMA_DEV_TO_MEM && c->is_40bit_channel) {
880 + u64 lo_bits, hi_bits;
881 +
882 + lo_bits = readl(c->chan_base + BCM2711_DMA40_DEST);
883 + hi_bits = readl(c->chan_base + BCM2711_DMA40_DESTI) & 0xff;
884 + pos = (hi_bits << 32) | lo_bits;
885 + } else if (d->dir == DMA_DEV_TO_MEM && !c->is_40bit_channel) {
886 pos = readl(c->chan_base + BCM2835_DMA_DEST_AD);
887 - else
888 + } else {
889 pos = 0;
890 + }
891
892 txstate->residue = bcm2835_dma_desc_size_pos(d, pos);
893 } else {
894 @@ -615,8 +959,10 @@ static struct dma_async_tx_descriptor *b
895 {
896 struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
897 struct bcm2835_desc *d;
898 - u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC;
899 - u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP;
900 + u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC |
901 + WAIT_RESP(c->dreq) | WIDE_SOURCE(c->dreq) |
902 + WIDE_DEST(c->dreq) | BURST_LENGTH(c->dreq);
903 + u32 extra = BCM2835_DMA_INT_EN;
904 size_t max_len = bcm2835_dma_max_frame_length(c);
905 size_t frames;
906
907 @@ -628,7 +974,7 @@ static struct dma_async_tx_descriptor *b
908 frames = bcm2835_dma_frames_for_length(len, max_len);
909
910 /* allocate the CB chain - this also fills in the pointers */
911 - d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false,
912 + d = bcm2835_dma_create_cb_chain(c, DMA_MEM_TO_MEM, false,
913 info, extra, frames,
914 src, dst, len, 0, GFP_KERNEL);
915 if (!d)
916 @@ -646,7 +992,8 @@ static struct dma_async_tx_descriptor *b
917 struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
918 struct bcm2835_desc *d;
919 dma_addr_t src = 0, dst = 0;
920 - u32 info = BCM2835_DMA_WAIT_RESP;
921 + u32 info = WAIT_RESP(c->dreq) | WIDE_SOURCE(c->dreq) |
922 + WIDE_DEST(c->dreq) | BURST_LENGTH(c->dreq);
923 u32 extra = BCM2835_DMA_INT_EN;
924 size_t frames;
925
926 @@ -662,12 +1009,12 @@ static struct dma_async_tx_descriptor *b
927 if (direction == DMA_DEV_TO_MEM) {
928 if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
929 return NULL;
930 - src = c->cfg.src_addr;
931 + src = phys_to_dma(chan->device->dev, c->cfg.src_addr);
932 info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
933 } else {
934 if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
935 return NULL;
936 - dst = c->cfg.dst_addr;
937 + dst = phys_to_dma(chan->device->dev, c->cfg.dst_addr);
938 info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
939 }
940
941 @@ -675,7 +1022,7 @@ static struct dma_async_tx_descriptor *b
942 frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len);
943
944 /* allocate the CB chain */
945 - d = bcm2835_dma_create_cb_chain(chan, direction, false,
946 + d = bcm2835_dma_create_cb_chain(c, direction, false,
947 info, extra,
948 frames, src, dst, 0, 0,
949 GFP_NOWAIT);
950 @@ -683,7 +1030,7 @@ static struct dma_async_tx_descriptor *b
951 return NULL;
952
953 /* fill in frames with scatterlist pointers */
954 - bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list,
955 + bcm2835_dma_fill_cb_chain_with_sg(c, direction, d->cb_list,
956 sgl, sg_len);
957
958 return vchan_tx_prep(&c->vc, &d->vd, flags);
959 @@ -698,7 +1045,8 @@ static struct dma_async_tx_descriptor *b
960 struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
961 struct bcm2835_desc *d;
962 dma_addr_t src, dst;
963 - u32 info = BCM2835_DMA_WAIT_RESP;
964 + u32 info = WAIT_RESP(c->dreq) | WIDE_SOURCE(c->dreq) |
965 + WIDE_DEST(c->dreq) | BURST_LENGTH(c->dreq);
966 u32 extra = 0;
967 size_t max_len = bcm2835_dma_max_frame_length(c);
968 size_t frames;
969 @@ -736,13 +1084,13 @@ static struct dma_async_tx_descriptor *b
970 if (direction == DMA_DEV_TO_MEM) {
971 if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
972 return NULL;
973 - src = c->cfg.src_addr;
974 + src = phys_to_dma(chan->device->dev, c->cfg.src_addr);
975 dst = buf_addr;
976 info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
977 } else {
978 if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
979 return NULL;
980 - dst = c->cfg.dst_addr;
981 + dst = phys_to_dma(chan->device->dev, c->cfg.dst_addr);
982 src = buf_addr;
983 info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
984
985 @@ -762,7 +1110,7 @@ static struct dma_async_tx_descriptor *b
986 * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine
987 * implementation calls prep_dma_cyclic with interrupts disabled.
988 */
989 - d = bcm2835_dma_create_cb_chain(chan, direction, true,
990 + d = bcm2835_dma_create_cb_chain(c, direction, true,
991 info, extra,
992 frames, src, dst, buf_len,
993 period_len, GFP_NOWAIT);
994 @@ -770,7 +1118,12 @@ static struct dma_async_tx_descriptor *b
995 return NULL;
996
997 /* wrap around into a loop */
998 - d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
999 + if (c->is_40bit_channel)
1000 + ((struct bcm2711_dma40_scb *)
1001 + d->cb_list[frames - 1].cb)->next_cb =
1002 + to_bcm2711_cbaddr(d->cb_list[0].paddr);
1003 + else
1004 + d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
1005
1006 return vchan_tx_prep(&c->vc, &d->vd, flags);
1007 }
1008 @@ -831,9 +1184,11 @@ static int bcm2835_dma_chan_init(struct
1009 c->irq_number = irq;
1010 c->irq_flags = irq_flags;
1011
1012 - /* check in DEBUG register if this is a LITE channel */
1013 - if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
1014 - BCM2835_DMA_DEBUG_LITE)
1015 + /* check for 40bit and lite channels */
1016 + if (d->cfg_data->chan_40bit_mask & BIT(chan_id))
1017 + c->is_40bit_channel = true;
1018 + else if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
1019 + BCM2835_DMA_DEBUG_LITE)
1020 c->is_lite_channel = true;
1021
1022 return 0;
1023 @@ -853,8 +1208,58 @@ static void bcm2835_dma_free(struct bcm2
1024 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1025 }
1026
1027 +int bcm2711_dma40_memcpy_init(void)
1028 +{
1029 + if (!memcpy_parent)
1030 + return -EPROBE_DEFER;
1031 +
1032 + if (!memcpy_chan)
1033 + return -EINVAL;
1034 +
1035 + if (!memcpy_scb)
1036 + return -ENOMEM;
1037 +
1038 + return 0;
1039 +}
1040 +EXPORT_SYMBOL(bcm2711_dma40_memcpy_init);
1041 +
1042 +void bcm2711_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size)
1043 +{
1044 + struct bcm2711_dma40_scb *scb = memcpy_scb;
1045 + unsigned long flags;
1046 +
1047 + if (!scb) {
1048 + pr_err("bcm2711_dma40_memcpy not initialised!\n");
1049 + return;
1050 + }
1051 +
1052 + spin_lock_irqsave(&memcpy_lock, flags);
1053 +
1054 + scb->ti = 0;
1055 + scb->src = lower_32_bits(src);
1056 + scb->srci = upper_32_bits(src) | BCM2711_DMA40_MEMCPY_XFER_INFO;
1057 + scb->dst = lower_32_bits(dst);
1058 + scb->dsti = upper_32_bits(dst) | BCM2711_DMA40_MEMCPY_XFER_INFO;
1059 + scb->len = size;
1060 + scb->next_cb = 0;
1061 +
1062 + writel(to_bcm2711_cbaddr(memcpy_scb_dma), memcpy_chan + BCM2711_DMA40_CB);
1063 + writel(BCM2711_DMA40_MEMCPY_FLAGS | BCM2711_DMA40_ACTIVE | BCM2711_DMA40_PROT,
1064 + memcpy_chan + BCM2711_DMA40_CS);
1065 +
1066 + /* Poll for completion */
1067 + while (!(readl(memcpy_chan + BCM2711_DMA40_CS) & BCM2711_DMA40_END))
1068 + cpu_relax();
1069 +
1070 + writel(BCM2711_DMA40_END | BCM2711_DMA40_PROT, memcpy_chan + BCM2711_DMA40_CS);
1071 +
1072 + spin_unlock_irqrestore(&memcpy_lock, flags);
1073 +}
1074 +EXPORT_SYMBOL(bcm2711_dma40_memcpy);
1075 +
1076 static const struct of_device_id bcm2835_dma_of_match[] = {
1077 - { .compatible = "brcm,bcm2835-dma", },
1078 + { .compatible = "brcm,bcm2835-dma", .data = &bcm2835_dma_cfg },
1079 + { .compatible = "brcm,bcm2711-dma", .data = &bcm2711_dma_cfg },
1080 {},
1081 };
1082 MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
1083 @@ -877,7 +1282,10 @@ static struct dma_chan *bcm2835_dma_xlat
1084
1085 static int bcm2835_dma_probe(struct platform_device *pdev)
1086 {
1087 + const struct bcm2835_dma_cfg_data *cfg_data;
1088 + const struct of_device_id *of_id;
1089 struct bcm2835_dmadev *od;
1090 + struct resource *res;
1091 void __iomem *base;
1092 int rc;
1093 int i, j;
1094 @@ -885,11 +1293,20 @@ static int bcm2835_dma_probe(struct plat
1095 int irq_flags;
1096 uint32_t chans_available;
1097 char chan_name[BCM2835_DMA_CHAN_NAME_SIZE];
1098 + int chan_count, chan_start, chan_end;
1099 +
1100 + of_id = of_match_node(bcm2835_dma_of_match, pdev->dev.of_node);
1101 + if (!of_id) {
1102 + dev_err(&pdev->dev, "Failed to match compatible string\n");
1103 + return -EINVAL;
1104 + }
1105 +
1106 + cfg_data = of_id->data;
1107
1108 if (!pdev->dev.dma_mask)
1109 pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
1110
1111 - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
1112 + rc = dma_set_mask_and_coherent(&pdev->dev, cfg_data->dma_mask);
1113 if (rc) {
1114 dev_err(&pdev->dev, "Unable to set DMA mask\n");
1115 return rc;
1116 @@ -901,10 +1318,17 @@ static int bcm2835_dma_probe(struct plat
1117
1118 dma_set_max_seg_size(&pdev->dev, 0x3FFFFFFF);
1119
1120 - base = devm_platform_ioremap_resource(pdev, 0);
1121 + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
1122 if (IS_ERR(base))
1123 return PTR_ERR(base);
1124
1125 + /* The set of channels can be split across multiple instances. */
1126 + chan_start = ((u32)(uintptr_t)base / BCM2835_DMA_CHAN_SIZE) & 0xf;
1127 + base -= BCM2835_DMA_CHAN(chan_start);
1128 + chan_count = resource_size(res) / BCM2835_DMA_CHAN_SIZE;
1129 + chan_end = min(chan_start + chan_count,
1130 + BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1);
1131 +
1132 od->base = base;
1133
1134 dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
1135 @@ -940,6 +1364,14 @@ static int bcm2835_dma_probe(struct plat
1136 return -ENOMEM;
1137 }
1138
1139 + of_id = of_match_node(bcm2835_dma_of_match, pdev->dev.of_node);
1140 + if (!of_id) {
1141 + dev_err(&pdev->dev, "Failed to match compatible string\n");
1142 + return -EINVAL;
1143 + }
1144 +
1145 + od->cfg_data = cfg_data;
1146 +
1147 /* Request DMA channel mask from device tree */
1148 if (of_property_read_u32(pdev->dev.of_node,
1149 "brcm,dma-channel-mask",
1150 @@ -949,8 +1381,36 @@ static int bcm2835_dma_probe(struct plat
1151 goto err_no_dma;
1152 }
1153
1154 +#ifdef CONFIG_DMA_BCM2708
1155 + /* One channel is reserved for the legacy API */
1156 + if (chans_available & BCM2835_DMA_BULK_MASK) {
1157 + rc = bcm_dmaman_probe(pdev, base,
1158 + chans_available & BCM2835_DMA_BULK_MASK);
1159 + if (rc)
1160 + dev_err(&pdev->dev,
1161 + "Failed to initialize the legacy API\n");
1162 +
1163 + chans_available &= ~BCM2835_DMA_BULK_MASK;
1164 + }
1165 +#endif
1166 +
1167 + /* And possibly one for the 40-bit DMA memcpy API */
1168 + if (chans_available & od->cfg_data->chan_40bit_mask &
1169 + BIT(BCM2711_DMA_MEMCPY_CHAN)) {
1170 + memcpy_parent = od;
1171 + memcpy_chan = BCM2835_DMA_CHANIO(base, BCM2711_DMA_MEMCPY_CHAN);
1172 + memcpy_scb = dma_alloc_coherent(memcpy_parent->ddev.dev,
1173 + sizeof(*memcpy_scb),
1174 + &memcpy_scb_dma, GFP_KERNEL);
1175 + if (!memcpy_scb)
1176 + dev_warn(&pdev->dev,
1177 + "Failed to allocated memcpy scb\n");
1178 +
1179 + chans_available &= ~BIT(BCM2711_DMA_MEMCPY_CHAN);
1180 + }
1181 +
1182 /* get irqs for each channel that we support */
1183 - for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
1184 + for (i = chan_start; i < chan_end; i++) {
1185 /* skip masked out channels */
1186 if (!(chans_available & (1 << i))) {
1187 irq[i] = -1;
1188 @@ -973,13 +1433,17 @@ static int bcm2835_dma_probe(struct plat
1189 irq[i] = platform_get_irq(pdev, i < 11 ? i : 11);
1190 }
1191
1192 + chan_count = 0;
1193 +
1194 /* get irqs for each channel */
1195 - for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
1196 + for (i = chan_start; i < chan_end; i++) {
1197 /* skip channels without irq */
1198 if (irq[i] < 0)
1199 continue;
1200
1201 /* check if there are other channels that also use this irq */
1202 + /* FIXME: This will fail if interrupts are shared across
1203 + instances */
1204 irq_flags = 0;
1205 for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++)
1206 if ((i != j) && (irq[j] == irq[i])) {
1207 @@ -991,9 +1455,10 @@ static int bcm2835_dma_probe(struct plat
1208 rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags);
1209 if (rc)
1210 goto err_no_dma;
1211 + chan_count++;
1212 }
1213
1214 - dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i);
1215 + dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", chan_count);
1216
1217 /* Device-tree DMA controller registration */
1218 rc = of_dma_controller_register(pdev->dev.of_node,
1219 @@ -1023,7 +1488,15 @@ static int bcm2835_dma_remove(struct pla
1220 {
1221 struct bcm2835_dmadev *od = platform_get_drvdata(pdev);
1222
1223 + bcm_dmaman_remove(pdev);
1224 dma_async_device_unregister(&od->ddev);
1225 + if (memcpy_parent == od) {
1226 + dma_free_coherent(&pdev->dev, sizeof(*memcpy_scb), memcpy_scb,
1227 + memcpy_scb_dma);
1228 + memcpy_parent = NULL;
1229 + memcpy_scb = NULL;
1230 + memcpy_chan = NULL;
1231 + }
1232 bcm2835_dma_free(od);
1233
1234 return 0;
1235 @@ -1038,7 +1511,22 @@ static struct platform_driver bcm2835_dm
1236 },
1237 };
1238
1239 -module_platform_driver(bcm2835_dma_driver);
1240 +static int bcm2835_dma_init(void)
1241 +{
1242 + return platform_driver_register(&bcm2835_dma_driver);
1243 +}
1244 +
1245 +static void bcm2835_dma_exit(void)
1246 +{
1247 + platform_driver_unregister(&bcm2835_dma_driver);
1248 +}
1249 +
1250 +/*
1251 + * Load after serial driver (arch_initcall) so we see the messages if it fails,
1252 + * but before drivers (module_init) that need a DMA channel.
1253 + */
1254 +subsys_initcall(bcm2835_dma_init);
1255 +module_exit(bcm2835_dma_exit);
1256
1257 MODULE_ALIAS("platform:bcm2835-dma");
1258 MODULE_DESCRIPTION("BCM2835 DMA engine driver");