2 * Copyright (C) 2005-2006 by Texas Instruments
4 * This file implements a DMA interface using TI's CPPI DMA.
5 * For now it's DaVinci-only, but CPPI isn't specific to DaVinci or USB.
6 * TUSB 6010 over VLYNQ has CPPI that looks much like DaVinci.
11 #include "musb_core.h"
15 /* CPPI DMA status 7-mar:
17 * - See musb_{host,gadget}.c for more info
19 * - Correct RX DMA generally forces the engine into irq-per-packet mode,
20 * which can easily saturate the CPU under non-mass-storage loads.
22 * NOTES 24-aug (2.6.18-rc4):
24 * - peripheral RXDMA wedged in a test with packets of length 512/512/1.
25 * evidently after the 1 byte packet was received and acked, the queue
26 * of BDs got garbaged so it wouldn't empty the fifo. (rxcsr 0x2003,
27 * and RX DMA0: 4 left, 80000000 8feff880, 8feff860 8feff860; 8f321401
28 * 004001ff 00000001 .. 8feff860) Host was just getting NAKed on tx
29 * of its next (512 byte) packet. IRQ issues?
31 * REVISIT: the "transfer DMA" glue between CPPI and USB fifos will
32 * evidently also directly update the RX and TX CSRs ... so audit all
33 * host and peripheral side DMA code to avoid CSR access after DMA has
37 /* REVISIT now we can avoid preallocating these descriptors; or
38 * more simply, switch to a global freelist not per-channel ones.
39 * Note: at full speed, 64 descriptors == 4K bulk data.
41 #define NUM_TXCHAN_BD 64
42 #define NUM_RXCHAN_BD 64
44 static inline void cpu_drain_writebuffer(void)
47 #ifdef CONFIG_CPU_ARM926T
48 /* REVISIT this "should not be needed",
49 * but lack of it sure seemed to hurt ...
51 asm("mcr p15, 0, r0, c7, c10, 4 @ drain write buffer\n");
55 static inline struct cppi_descriptor *cppi_bd_alloc(struct cppi_channel *c)
57 struct cppi_descriptor *bd = c->bdPoolHead;
60 c->bdPoolHead = bd->next;
65 cppi_bd_free(struct cppi_channel *c, struct cppi_descriptor *bd)
69 bd->next = c->bdPoolHead;
74 * Start Dma controller
76 * Initialize the Dma Controller as necessary.
79 #define CAST (void *__force __iomem)
81 /* zero out entire rx state RAM entry for the channel */
82 static void cppi_reset_rx(struct cppi_rx_stateram *__iomem rx)
84 musb_writel(CAST &rx->buffOffset, 0, 0);
85 musb_writel(CAST &rx->headPtr, 0, 0);
86 musb_writel(CAST &rx->sopDescPtr, 0, 0);
87 musb_writel(CAST &rx->currDescPtr, 0, 0);
88 musb_writel(CAST &rx->currBuffPtr, 0, 0);
89 musb_writel(CAST &rx->pktLength, 0, 0);
90 musb_writel(CAST &rx->byteCount, 0, 0);
93 static void __init cppi_pool_init(struct cppi *cppi, struct cppi_channel *c)
97 /* initialize channel fields */
98 c->activeQueueHead = NULL;
99 c->activeQueueTail = NULL;
100 c->lastHwBDProcessed = NULL;
101 c->Channel.status = MUSB_DMA_STATUS_UNKNOWN;
102 c->controller = cppi;
103 c->bLastModeRndis = 0;
104 c->Channel.private_data = c;
105 c->bdPoolHead = NULL;
107 /* build the BD Free list for the channel */
108 for (j = 0; j < NUM_TXCHAN_BD + 1; j++) {
109 struct cppi_descriptor *bd;
112 bd = dma_pool_alloc(cppi->pool, GFP_KERNEL, &dma);
118 static int cppi_channel_abort(struct dma_channel *);
120 static void cppi_pool_free(struct cppi_channel *c)
122 struct cppi *cppi = c->controller;
123 struct cppi_descriptor *bd;
125 (void) cppi_channel_abort(&c->Channel);
126 c->Channel.status = MUSB_DMA_STATUS_UNKNOWN;
127 c->controller = NULL;
129 /* free all its bds */
130 bd = c->lastHwBDProcessed;
133 dma_pool_free(cppi->pool, bd, bd->dma);
134 bd = cppi_bd_alloc(c);
136 c->lastHwBDProcessed = NULL;
139 static int __init cppi_controller_start(struct dma_controller *c)
141 struct cppi *controller;
142 void *__iomem regBase;
145 controller = container_of(c, struct cppi, Controller);
147 /* do whatever is necessary to start controller */
148 for (i = 0; i < ARRAY_SIZE(controller->txCppi); i++) {
149 controller->txCppi[i].transmit = true;
150 controller->txCppi[i].chNo = i;
152 for (i = 0; i < ARRAY_SIZE(controller->rxCppi); i++) {
153 controller->rxCppi[i].transmit = false;
154 controller->rxCppi[i].chNo = i;
157 /* setup BD list on a per channel basis */
158 for (i = 0; i < ARRAY_SIZE(controller->txCppi); i++)
159 cppi_pool_init(controller, controller->txCppi + i);
160 for (i = 0; i < ARRAY_SIZE(controller->rxCppi); i++)
161 cppi_pool_init(controller, controller->rxCppi + i);
163 /* Do Necessary configuartion in H/w to get started */
164 regBase = controller->pCoreBase - DAVINCI_BASE_OFFSET;
166 INIT_LIST_HEAD(&controller->tx_complete);
168 /* initialise tx/rx channel head pointers to zero */
169 for (i = 0; i < ARRAY_SIZE(controller->txCppi); i++) {
170 struct cppi_channel *txChannel = controller->txCppi + i;
171 struct cppi_tx_stateram *__iomem txState;
173 INIT_LIST_HEAD(&txChannel->tx_complete);
175 txState = regBase + DAVINCI_TXCPPI_STATERAM_OFFSET(i);
176 txChannel->stateRam = txState;
177 /* zero out entire state RAM entry for the channel */
178 txState->headPtr = 0;
179 txState->sopDescPtr = 0;
180 txState->currDescPtr = 0;
181 txState->currBuffPtr = 0;
183 txState->remLength = 0;
184 /*txState->dummy = 0; */
185 txState->completionPtr = 0;
188 for (i = 0; i < ARRAY_SIZE(controller->rxCppi); i++) {
189 struct cppi_channel *rxChannel = controller->rxCppi + i;
190 struct cppi_rx_stateram *__iomem rxState;
192 INIT_LIST_HEAD(&rxChannel->tx_complete);
194 rxState = regBase + DAVINCI_RXCPPI_STATERAM_OFFSET(i);
195 rxChannel->stateRam = rxState;
196 cppi_reset_rx(rxChannel->stateRam);
199 /* enable individual cppi channels */
200 musb_writel(regBase, DAVINCI_TXCPPI_INTENAB_REG,
201 DAVINCI_DMA_ALL_CHANNELS_ENABLE);
202 musb_writel(regBase, DAVINCI_RXCPPI_INTENAB_REG,
203 DAVINCI_DMA_ALL_CHANNELS_ENABLE);
205 /* enable tx/rx CPPI control */
206 musb_writel(regBase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
207 musb_writel(regBase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
209 /* disable RNDIS mode, also host rx RNDIS autorequest */
210 musb_writel(regBase, DAVINCI_RNDIS_REG, 0);
211 musb_writel(regBase, DAVINCI_AUTOREQ_REG, 0);
217 * Stop Dma controller
219 * De-Init the Dma Controller as necessary.
222 static int cppi_controller_stop(struct dma_controller *c)
224 struct cppi *controller;
225 void __iomem *regBase;
228 controller = container_of(c, struct cppi, Controller);
230 regBase = controller->pCoreBase - DAVINCI_BASE_OFFSET;
231 /* DISABLE INDIVIDUAL CHANNEL Interrupts */
232 musb_writel(regBase, DAVINCI_TXCPPI_INTCLR_REG,
233 DAVINCI_DMA_ALL_CHANNELS_ENABLE);
234 musb_writel(regBase, DAVINCI_RXCPPI_INTCLR_REG,
235 DAVINCI_DMA_ALL_CHANNELS_ENABLE);
237 DBG(1, "Tearing down RX and TX Channels\n");
238 for (i = 0; i < ARRAY_SIZE(controller->txCppi); i++) {
239 /* FIXME restructure of txdma to use bds like rxdma */
240 controller->txCppi[i].lastHwBDProcessed = NULL;
241 cppi_pool_free(controller->txCppi + i);
243 for (i = 0; i < ARRAY_SIZE(controller->rxCppi); i++)
244 cppi_pool_free(controller->rxCppi + i);
246 /* in Tx Case proper teardown is supported. We resort to disabling
247 * Tx/Rx CPPI after cleanup of Tx channels. Before TX teardown is
248 * complete TX CPPI cannot be disabled.
250 /*disable tx/rx cppi */
251 musb_writel(regBase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE);
252 musb_writel(regBase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE);
257 /* While dma channel is allocated, we only want the core irqs active
258 * for fault reports, otherwise we'd get irqs that we don't care about.
259 * Except for TX irqs, where dma done != fifo empty and reusable ...
261 * NOTE: docs don't say either way, but irq masking **enables** irqs.
263 * REVISIT same issue applies to pure PIO usage too, and non-cppi dma...
265 static inline void core_rxirq_disable(void __iomem *tibase, unsigned epnum)
267 musb_writel(tibase, DAVINCI_USB_INT_MASK_CLR_REG, 1 << (epnum + 8));
270 static inline void core_rxirq_enable(void __iomem *tibase, unsigned epnum)
272 musb_writel(tibase, DAVINCI_USB_INT_MASK_SET_REG, 1 << (epnum + 8));
277 * Allocate a CPPI Channel for DMA. With CPPI, channels are bound to
278 * each transfer direction of a non-control endpoint, so allocating
279 * (and deallocating) is mostly a way to notice bad housekeeping on
280 * the software side. We assume the irqs are always active.
282 static struct dma_channel *
283 cppi_channel_allocate(struct dma_controller *c,
284 struct musb_hw_ep *ep,
287 struct cppi *controller;
289 struct cppi_channel *otgCh;
290 void __iomem *tibase;
291 int local_end = ep->epnum;
293 controller = container_of(c, struct cppi, Controller);
294 tibase = controller->pCoreBase - DAVINCI_BASE_OFFSET;
296 /* remember local_end: 1..Max_EndPt, and cppi ChNum:0..Max_EndPt-1 */
297 chNum = local_end - 1;
299 /* return the corresponding CPPI Channel Handle, and
300 * probably disable the non-CPPI irq until we need it.
303 if (local_end > ARRAY_SIZE(controller->txCppi)) {
304 DBG(1, "no %cX DMA channel for ep%d\n", 'T', local_end);
307 otgCh = controller->txCppi + chNum;
309 if (local_end > ARRAY_SIZE(controller->rxCppi)) {
310 DBG(1, "no %cX DMA channel for ep%d\n", 'R', local_end);
313 otgCh = controller->rxCppi + chNum;
314 core_rxirq_disable(tibase, local_end);
317 /* REVISIT make this an error later once the same driver code works
318 * with the Mentor DMA engine too
321 DBG(1, "re-allocating DMA%d %cX channel %p\n",
322 chNum, transmit ? 'T' : 'R', otgCh);
324 otgCh->Channel.status = MUSB_DMA_STATUS_FREE;
326 DBG(4, "Allocate CPPI%d %cX\n", chNum, transmit ? 'T' : 'R');
327 otgCh->Channel.private_data = otgCh;
328 return &otgCh->Channel;
331 /* Release a CPPI Channel. */
332 static void cppi_channel_release(struct dma_channel *channel)
334 struct cppi_channel *c;
335 void __iomem *tibase;
338 /* REVISIT: for paranoia, check state and abort if needed... */
340 c = container_of(channel, struct cppi_channel, Channel);
342 tibase = c->controller->pCoreBase - DAVINCI_BASE_OFFSET;
344 DBG(1, "releasing idle DMA channel %p\n", c);
345 else if (!c->transmit)
346 core_rxirq_enable(tibase, epnum);
348 /* for now, leave its cppi IRQ enabled (we won't trigger it) */
350 channel->status = MUSB_DMA_STATUS_UNKNOWN;
353 /* Context: controller irqlocked */
355 cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
357 void *__iomem base = c->controller->pCoreBase;
359 musb_ep_select(base, c->chNo + 1);
361 DBG(level, "RX DMA%d%s: %d left, csr %04x, "
362 "%08x H%08x S%08x C%08x, "
363 "B%08x L%08x %08x .. %08x"
366 musb_readl(base - DAVINCI_BASE_OFFSET,
367 DAVINCI_RXCPPI_BUFCNT0_REG + 4 *c->chNo),
368 musb_readw(c->hw_ep->regs, MUSB_RXCSR),
370 musb_readl(c->stateRam, 0 * 4), /* buf offset */
371 musb_readl(c->stateRam, 1 * 4), /* head ptr */
372 musb_readl(c->stateRam, 2 * 4), /* sop bd */
373 musb_readl(c->stateRam, 3 * 4), /* current bd */
375 musb_readl(c->stateRam, 4 * 4), /* current buf */
376 musb_readl(c->stateRam, 5 * 4), /* pkt len */
377 musb_readl(c->stateRam, 6 * 4), /* byte cnt */
378 musb_readl(c->stateRam, 7 * 4) /* completion */
382 /* Context: controller irqlocked */
384 cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
386 void *__iomem base = c->controller->pCoreBase;
388 musb_ep_select(base, c->chNo + 1);
390 DBG(level, "TX DMA%d%s: csr %04x, "
391 "H%08x S%08x C%08x %08x, "
392 "F%08x L%08x .. %08x"
395 musb_readw(c->hw_ep->regs, MUSB_TXCSR),
397 musb_readl(c->stateRam, 0 * 4), /* head ptr */
398 musb_readl(c->stateRam, 1 * 4), /* sop bd */
399 musb_readl(c->stateRam, 2 * 4), /* current bd */
400 musb_readl(c->stateRam, 3 * 4), /* buf offset */
402 musb_readl(c->stateRam, 4 * 4), /* flags */
403 musb_readl(c->stateRam, 5 * 4), /* len */
404 // dummy/unused word 6
405 musb_readl(c->stateRam, 7 * 4) /* completion */
409 /* Context: controller irqlocked */
411 cppi_rndis_update(struct cppi_channel *c, int is_rx,
412 void *__iomem tibase, int is_rndis)
414 /* we may need to change the rndis flag for this cppi channel */
415 if (c->bLastModeRndis != is_rndis) {
416 u32 regVal = musb_readl(tibase, DAVINCI_RNDIS_REG);
417 u32 temp = 1 << (c->chNo);
425 musb_writel(tibase, DAVINCI_RNDIS_REG, regVal);
426 c->bLastModeRndis = is_rndis;
430 static void cppi_dump_rxbd(const char *tag, struct cppi_descriptor *bd)
432 pr_debug("RXBD/%s %08x: "
433 "nxt %08x buf %08x off.blen %08x opt.plen %08x\n",
435 bd->hNext, bd->buffPtr, bd->bOffBLen, bd->hOptions);
438 static void cppi_dump_rxq(int level, const char *tag, struct cppi_channel *rx)
441 struct cppi_descriptor *bd;
443 if (!_dbg_level(level))
445 cppi_dump_rx(level, rx, tag);
446 if (rx->lastHwBDProcessed)
447 cppi_dump_rxbd("last", rx->lastHwBDProcessed);
448 for (bd = rx->activeQueueHead; bd; bd = bd->next)
449 cppi_dump_rxbd("active", bd);
454 /* NOTE: DaVinci autoreq is ignored except for host side "RNDIS" mode RX;
455 * so we won't ever use it (see "CPPI RX Woes" below).
457 static inline int cppi_autoreq_update(struct cppi_channel *rx,
458 void *__iomem tibase, int onepacket, unsigned n_bds)
462 #ifdef RNDIS_RX_IS_USABLE
464 /* assert(is_host_active(musb)) */
466 /* start from "AutoReq never" */
467 tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
468 val = tmp & ~((0x3) << (rx->chNo * 2));
470 /* HCD arranged reqpkt for packet #1. we arrange int
471 * for all but the last one, maybe in two segments.
475 /* use two segments, autoreq "all" then the last "never" */
476 val |= ((0x3) << (rx->chNo * 2));
479 /* one segment, autoreq "all-but-last" */
480 val |= ((0x1) << (rx->chNo * 2));
487 /* make sure that autoreq is updated before continuing */
488 musb_writel(tibase, DAVINCI_AUTOREQ_REG, val);
490 tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
498 /* REQPKT is turned off after each segment */
499 if (n_bds && rx->actualLen) {
500 void *__iomem regs = rx->hw_ep->regs;
502 val = musb_readw(regs, MUSB_RXCSR);
503 if (!(val & MUSB_RXCSR_H_REQPKT)) {
504 val |= MUSB_RXCSR_H_REQPKT | MUSB_RXCSR_H_WZC_BITS;
505 musb_writew(regs, MUSB_RXCSR, val);
506 /* flush writebufer */
507 val = musb_readw(regs, MUSB_RXCSR);
514 /* Buffer enqueuing Logic:
516 * - RX builds new queues each time, to help handle routine "early
517 * termination" cases (faults, including errors and short reads)
520 * - for now, TX reuses the same queue of BDs every time
522 * REVISIT long term, we want a normal dynamic model.
523 * ... the goal will be to append to the
524 * existing queue, processing completed "dma buffers" (segments) on the fly.
526 * Otherwise we force an IRQ latency between requests, which slows us a lot
527 * (especially in "transparent" dma). Unfortunately that model seems to be
528 * inherent in the DMA model from the Mentor code, except in the rare case
529 * of transfers big enough (~128+ KB) that we could append "middle" segments
530 * in the TX paths. (RX can't do this, see below.)
532 * That's true even in the CPPI- friendly iso case, where most urbs have
533 * several small segments provided in a group and where the "packet at a time"
534 * "transparent" DMA model is always correct, even on the RX side.
540 * TX is a lot more reasonable than RX; it doesn't need to run in
541 * irq-per-packet mode very often. RNDIS mode seems to behave too
542 * (other how it handles the exactly-N-packets case). Building a
543 * txdma queue with multiple requests (urb or usb_request) looks
544 * like it would work ... but fault handling would need much testing.
546 * The main issue with TX mode RNDIS relates to transfer lengths that
547 * are an exact multiple of the packet length. It appears that there's
548 * a hiccup in that case (maybe the DMA completes before the ZLP gets
549 * written?) boiling down to not being able to rely on CPPI writing any
550 * terminating zero length packet before the next transfer is written.
551 * So that's punted to PIO; better yet, gadget drivers can avoid it.
553 * Plus, there's allegedly an undocumented constraint that rndis transfer
554 * length be a multiple of 64 bytes ... but the chip doesn't act that
555 * way, and we really don't _want_ that behavior anyway.
557 * On TX, "transparent" mode works ... although experiments have shown
558 * problems trying to use the SOP/EOP bits in different USB packets.
560 * REVISIT try to handle terminating zero length packets using CPPI
561 * instead of doing it by PIO after an IRQ. (Meanwhile, make Ethernet
562 * links avoid that issue by forcing them to avoid zlps.)
565 cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
567 unsigned maxpacket = tx->pktSize;
568 dma_addr_t addr = tx->startAddr + tx->currOffset;
569 size_t length = tx->transferSize - tx->currOffset;
570 struct cppi_descriptor *bd;
573 struct cppi_tx_stateram *txState = tx->stateRam;
576 /* TX can use the CPPI "rndis" mode, where we can probably fit this
577 * transfer in one BD and one IRQ. The only time we would NOT want
578 * to use it is when hardware constraints prevent it, or if we'd
579 * trigger the "send a ZLP?" confusion.
581 rndis = (maxpacket & 0x3f) == 0
583 && (length % maxpacket) != 0;
589 n_bds = length / maxpacket;
590 if (!length || (length % maxpacket))
592 n_bds = min(n_bds, (unsigned) NUM_TXCHAN_BD);
593 length = min(n_bds * maxpacket, length);
596 DBG(4, "TX DMA%d, pktSz %d %s bds %d dma 0x%x len %u\n",
599 rndis ? "rndis" : "transparent",
603 cppi_rndis_update(tx, 0, musb->ctrl_base, rndis);
605 /* assuming here that channel_program is called during
606 * transfer initiation ... current code maintains state
607 * for one outstanding request only (no queues, not even
608 * the implicit ones of an iso urb).
612 tx->activeQueueHead = tx->bdPoolHead;
613 tx->lastHwBDProcessed = NULL;
616 /* Prepare queue of BDs first, then hand it to hardware.
617 * All BDs except maybe the last should be of full packet
618 * size; for RNDIS there _is_ only that last packet.
620 for (i = 0; i < n_bds; ) {
621 if (++i < n_bds && bd->next)
622 bd->hNext = bd->next->dma;
626 bd->buffPtr = tx->startAddr
629 /* FIXME set EOP only on the last packet,
630 * SOP only on the first ... avoid IRQs
632 if ((tx->currOffset + maxpacket)
633 <= tx->transferSize) {
634 tx->currOffset += maxpacket;
635 bd->bOffBLen = maxpacket;
636 bd->hOptions = CPPI_SOP_SET | CPPI_EOP_SET
637 | CPPI_OWN_SET | maxpacket;
639 /* only this one may be a partial USB Packet */
642 buffSz = tx->transferSize - tx->currOffset;
643 tx->currOffset = tx->transferSize;
644 bd->bOffBLen = buffSz;
646 bd->hOptions = CPPI_SOP_SET | CPPI_EOP_SET
647 | CPPI_OWN_SET | buffSz;
649 bd->hOptions |= CPPI_ZERO_SET;
652 DBG(5, "TXBD %p: nxt %08x buf %08x len %04x opt %08x\n",
653 bd, bd->hNext, bd->buffPtr,
654 bd->bOffBLen, bd->hOptions);
656 /* update the last BD enqueued to the list */
657 tx->activeQueueTail = bd;
661 /* BDs live in DMA-coherent memory, but writes might be pending */
662 cpu_drain_writebuffer();
664 /* Write to the HeadPtr in StateRam to trigger */
665 txState->headPtr = (u32)tx->bdPoolHead->dma;
667 cppi_dump_tx(5, tx, "/S");
673 * Consider a 1KB bulk RX buffer in two scenarios: (a) it's fed two 300 byte
674 * packets back-to-back, and (b) it's fed two 512 byte packets back-to-back.
675 * (Full speed transfers have similar scenarios.)
677 * The correct behavior for Linux is that (a) fills the buffer with 300 bytes,
678 * and the next packet goes into a buffer that's queued later; while (b) fills
679 * the buffer with 1024 bytes. How to do that with CPPI?
681 * - RX queues in "rndis" mode -- one single BD -- handle (a) correctly, but
682 * (b) loses **BADLY** because nothing (!) happens when that second packet
683 * fills the buffer, much less when a third one arrives. (Which makes this
684 * not a "true" RNDIS mode. In the RNDIS protocol short-packet termination
685 * is optional, and it's fine if peripherals -- not hosts! -- pad messages
686 * out to end-of-buffer. Standard PCI host controller DMA descriptors
687 * implement that mode by default ... which is no accident.)
689 * - RX queues in "transparent" mode -- two BDs with 512 bytes each -- have
690 * converse problems: (b) is handled right, but (a) loses badly. CPPI RX
691 * ignores SOP/EOP markings and processes both of those BDs; so both packets
692 * are loaded into the buffer (with a 212 byte gap between them), and the next
693 * buffer queued will NOT get its 300 bytes of data. (It seems like SOP/EOP
694 * are intended as outputs for RX queues, not inputs...)
696 * - A variant of "transparent" mode -- one BD at a time -- is the only way to
697 * reliably make both cases work, with software handling both cases correctly
698 * and at the significant penalty of needing an IRQ per packet. (The lack of
699 * I/O overlap can be slightly ameliorated by enabling double buffering.)
701 * So how to get rid of IRQ-per-packet? The transparent multi-BD case could
702 * be used in special cases like mass storage, which sets URB_SHORT_NOT_OK
703 * (or maybe its peripheral side counterpart) to flag (a) scenarios as errors
704 * with guaranteed driver level fault recovery and scrubbing out what's left
705 * of that garbaged datastream.
707 * But there seems to be no way to identify the cases where CPPI RNDIS mode
708 * is appropriate -- which do NOT include RNDIS host drivers, but do include
709 * the CDC Ethernet driver! -- and the documentation is incomplete/wrong.
710 * So we can't _ever_ use RX RNDIS mode ... except by using a heuristic
711 * that applies best on the peripheral side (and which could fail rudely).
713 * Leaving only "transparent" mode; we avoid multi-bd modes in almost all
714 * cases other than mass storage class. Otherwise we're correct but slow,
715 * since CPPI penalizes our need for a "true RNDIS" default mode.
719 /* Heuristic, intended to kick in for ethernet/rndis peripheral ONLY
722 * (a) peripheral mode ... since rndis peripherals could pad their
723 * writes to hosts, causing i/o failure; or we'd have to cope with
724 * a largely unknowable variety of host side protocol variants
725 * (b) and short reads are NOT errors ... since full reads would
726 * cause those same i/o failures
727 * (c) and read length is
728 * - less than 64KB (max per cppi descriptor)
729 * - not a multiple of 4096 (g_zero default, full reads typical)
730 * - N (>1) packets long, ditto (full reads not EXPECTED)
734 * Cost of heuristic failing: RXDMA wedges at the end of transfers that
735 * fill out the whole buffer. Buggy host side usb network drivers could
736 * trigger that, but "in the field" such bugs seem to be all but unknown.
738 * So this module parameter lets the heuristic be disabled. When using
739 * gadgetfs, the heuristic will probably need to be disabled.
741 static int cppi_rx_rndis = 1;
743 module_param(cppi_rx_rndis, bool, 0);
744 MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic");
748 * cppi_next_rx_segment - dma read for the next chunk of a buffer
749 * @musb: the controller
751 * @onepacket: true unless caller treats short reads as errors, and
752 * performs fault recovery above usbcore.
753 * Context: controller irqlocked
755 * See above notes about why we can't use multi-BD RX queues except in
756 * rare cases (mass storage class), and can never use the hardware "rndis"
757 * mode (since it's not a "true" RNDIS mode) with complete safety..
759 * It's ESSENTIAL that callers specify "onepacket" mode unless they kick in
760 * code to recover from corrupted datastreams after each short transfer.
763 cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
765 unsigned maxpacket = rx->pktSize;
766 dma_addr_t addr = rx->startAddr + rx->currOffset;
767 size_t length = rx->transferSize - rx->currOffset;
768 struct cppi_descriptor *bd, *tail;
771 void *__iomem tibase = musb->ctrl_base;
775 /* almost every USB driver, host or peripheral side */
778 /* maybe apply the heuristic above */
780 && is_peripheral_active(musb)
781 && length > maxpacket
782 && (length & ~0xffff) == 0
783 && (length & 0x0fff) != 0
784 && (length & (maxpacket - 1)) == 0) {
789 /* virtually nothing except mass storage class */
790 if (length > 0xffff) {
791 n_bds = 0xffff / maxpacket;
792 length = n_bds * maxpacket;
794 n_bds = length / maxpacket;
795 if (length % maxpacket)
801 n_bds = min(n_bds, (unsigned) NUM_RXCHAN_BD);
804 /* In host mode, autorequest logic can generate some IN tokens; it's
805 * tricky since we can't leave REQPKT set in RXCSR after the transfer
806 * finishes. So: multipacket transfers involve two or more segments.
807 * And always at least two IRQs ... RNDIS mode is not an option.
809 if (is_host_active(musb))
810 n_bds = cppi_autoreq_update(rx, tibase, onepacket, n_bds);
812 cppi_rndis_update(rx, 1, musb->ctrl_base, is_rndis);
814 length = min(n_bds * maxpacket, length);
816 DBG(4, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) "
817 "dma 0x%x len %u %u/%u\n",
820 ? (is_rndis ? "rndis" : "onepacket")
824 DAVINCI_RXCPPI_BUFCNT0_REG + (rx->chNo * 4))
826 addr, length, rx->actualLen, rx->transferSize);
828 /* only queue one segment at a time, since the hardware prevents
829 * correct queue shutdown after unexpected short packets
831 bd = cppi_bd_alloc(rx);
832 rx->activeQueueHead = bd;
834 /* Build BDs for all packets in this segment */
835 for (i = 0, tail = NULL; bd && i < n_bds; i++, tail = bd) {
839 bd = cppi_bd_alloc(rx);
843 tail->hNext = bd->dma;
847 /* all but the last packet will be maxpacket size */
848 if (maxpacket < length)
855 rx->currOffset += buffSz;
857 bd->bOffBLen = (0 /*offset*/ << 16) + buffSz;
858 bd->enqBuffLen = buffSz;
860 bd->hOptions = CPPI_OWN_SET | (i == 0 ? length : 0);
864 /* we always expect at least one reusable BD! */
866 WARN("rx dma%d -- no BDs? need %d\n", rx->chNo, n_bds);
868 } else if (i < n_bds)
869 WARN("rx dma%d -- only %d of %d BDs\n", rx->chNo, i, n_bds);
874 bd = rx->activeQueueHead;
875 rx->activeQueueTail = tail;
877 /* short reads and other faults should terminate this entire
878 * dma segment. we want one "dma packet" per dma segment, not
879 * one per USB packet, terminating the whole queue at once...
880 * NOTE that current hardware seems to ignore SOP and EOP.
882 bd->hOptions |= CPPI_SOP_SET;
883 tail->hOptions |= CPPI_EOP_SET;
886 struct cppi_descriptor *d;
888 for (d = rx->activeQueueHead; d; d = d->next)
889 cppi_dump_rxbd("S", d);
892 /* in case the preceding transfer left some state... */
893 tail = rx->lastHwBDProcessed;
896 tail->hNext = bd->dma;
899 core_rxirq_enable(tibase, rx->chNo + 1);
901 /* BDs live in DMA-coherent memory, but writes might be pending */
902 cpu_drain_writebuffer();
904 /* REVISIT specs say to write this AFTER the BUFCNT register
905 * below ... but that loses badly.
907 musb_writel(rx->stateRam, 4, bd->dma);
909 /* bufferCount must be at least 3, and zeroes on completion
910 * unless it underflows below zero, or stops at two, or keeps
913 i = musb_readl(tibase,
914 DAVINCI_RXCPPI_BUFCNT0_REG + (rx->chNo * 4))
919 DAVINCI_RXCPPI_BUFCNT0_REG + (rx->chNo * 4),
921 else if (n_bds > (i - 3))
923 DAVINCI_RXCPPI_BUFCNT0_REG + (rx->chNo * 4),
926 i = musb_readl(tibase,
927 DAVINCI_RXCPPI_BUFCNT0_REG + (rx->chNo * 4))
929 if (i < (2 + n_bds)) {
930 DBG(2, "bufcnt%d underrun - %d (for %d)\n",
933 DAVINCI_RXCPPI_BUFCNT0_REG + (rx->chNo * 4),
937 cppi_dump_rx(4, rx, "/S");
941 * cppi_channel_program - program channel for data transfer
942 * @pChannel: the channel
943 * @wPacketSz: max packet size
944 * @mode: For RX, 1 unless the usb protocol driver promised to treat
945 * all short reads as errors and kick in high level fault recovery.
946 * For TX, ignored because of RNDIS mode races/glitches.
947 * @dma_addr: dma address of buffer
948 * @len: length of buffer
949 * Context: controller irqlocked
951 static int cppi_channel_program(struct dma_channel *pChannel,
952 u16 wPacketSz, u8 mode,
953 dma_addr_t dma_addr, u32 len)
955 struct cppi_channel *otgChannel = pChannel->private_data;
956 struct cppi *controller = otgChannel->controller;
957 struct musb *musb = controller->musb;
959 switch (pChannel->status) {
960 case MUSB_DMA_STATUS_BUS_ABORT:
961 case MUSB_DMA_STATUS_CORE_ABORT:
962 /* fault irq handler should have handled cleanup */
963 WARN("%cX DMA%d not cleaned up after abort!\n",
964 otgChannel->transmit ? 'T' : 'R',
968 case MUSB_DMA_STATUS_BUSY:
969 WARN("program active channel? %cX DMA%d\n",
970 otgChannel->transmit ? 'T' : 'R',
974 case MUSB_DMA_STATUS_UNKNOWN:
975 DBG(1, "%cX DMA%d not allocated!\n",
976 otgChannel->transmit ? 'T' : 'R',
979 case MUSB_DMA_STATUS_FREE:
983 pChannel->status = MUSB_DMA_STATUS_BUSY;
985 /* set transfer parameters, then queue up its first segment */
986 otgChannel->startAddr = dma_addr;
987 otgChannel->currOffset = 0;
988 otgChannel->pktSize = wPacketSz;
989 otgChannel->actualLen = 0;
990 otgChannel->transferSize = len;
992 /* TX channel? or RX? */
993 if (otgChannel->transmit)
994 cppi_next_tx_segment(musb, otgChannel);
996 cppi_next_rx_segment(musb, otgChannel, mode);
1001 static int cppi_rx_scan(struct cppi *cppi, unsigned ch)
1003 struct cppi_channel *rx = &cppi->rxCppi[ch];
1004 struct cppi_rx_stateram *state = rx->stateRam;
1005 struct cppi_descriptor *bd;
1006 struct cppi_descriptor *last = rx->lastHwBDProcessed;
1007 int completed = 0, acked = 0;
1009 dma_addr_t safe2ack;
1010 void *__iomem regs = rx->hw_ep->regs;
1012 cppi_dump_rx(6, rx, "/K");
1014 bd = last ? last->next : rx->activeQueueHead;
1018 /* run through all completed BDs */
1019 for (i = 0, safe2ack = musb_readl(CAST &state->completionPtr, 0);
1020 (safe2ack || completed) && bd && i < NUM_RXCHAN_BD;
1021 i++, bd = bd->next) {
1025 if (!completed && (bd->hOptions & CPPI_OWN_SET))
1028 DBG(5, "C/RXBD %08x: nxt %08x buf %08x "
1029 "off.len %08x opt.len %08x (%d)\n",
1030 bd->dma, bd->hNext, bd->buffPtr,
1031 bd->bOffBLen, bd->hOptions,
1034 /* actual packet received length */
1035 if ((bd->hOptions & CPPI_SOP_SET) && !completed)
1036 len = bd->bOffBLen & CPPI_RECV_PKTLEN_MASK;
1040 if (bd->hOptions & CPPI_EOQ_MASK)
1043 if (!completed && len < bd->enqBuffLen) {
1044 /* NOTE: when we get a short packet, RXCSR_H_REQPKT
1045 * must have been cleared, and no more DMA packets may
1046 * active be in the queue... TI docs didn't say, but
1047 * CPPI ignores those BDs even though OWN is still set.
1050 DBG(3, "rx short %d/%d (%d)\n",
1051 len, bd->enqBuffLen, rx->actualLen);
1054 /* If we got here, we expect to ack at least one BD; meanwhile
1055 * CPPI may completing other BDs while we scan this list...
1057 * RACE: we can notice OWN cleared before CPPI raises the
1058 * matching irq by writing that BD as the completion pointer.
1059 * In such cases, stop scanning and wait for the irq, avoiding
1060 * lost acks and states where BD ownership is unclear.
1062 if (bd->dma == safe2ack) {
1063 musb_writel(CAST &state->completionPtr, 0, safe2ack);
1064 safe2ack = musb_readl(CAST &state->completionPtr, 0);
1066 if (bd->dma == safe2ack)
1070 rx->actualLen += len;
1072 cppi_bd_free(rx, last);
1075 /* stop scanning on end-of-segment */
1079 rx->lastHwBDProcessed = last;
1081 /* dma abort, lost ack, or ... */
1082 if (!acked && last) {
1085 if (safe2ack == 0 || safe2ack == rx->lastHwBDProcessed->dma)
1086 musb_writel(CAST &state->completionPtr, 0, safe2ack);
1087 if (safe2ack == 0) {
1088 cppi_bd_free(rx, last);
1089 rx->lastHwBDProcessed = NULL;
1091 /* if we land here on the host side, H_REQPKT will
1092 * be clear and we need to restart the queue...
1094 WARN_ON(rx->activeQueueHead);
1096 musb_ep_select(cppi->pCoreBase, rx->chNo + 1);
1097 csr = musb_readw(regs, MUSB_RXCSR);
1098 if (csr & MUSB_RXCSR_DMAENAB) {
1099 DBG(4, "list%d %p/%p, last %08x%s, csr %04x\n",
1101 rx->activeQueueHead, rx->activeQueueTail,
1102 rx->lastHwBDProcessed
1103 ? rx->lastHwBDProcessed->dma
1105 completed ? ", completed" : "",
1107 cppi_dump_rxq(4, "/what?", rx);
1113 rx->activeQueueHead = bd;
1115 /* REVISIT seems like "autoreq all but EOP" doesn't...
1116 * setting it here "should" be racey, but seems to work
1118 csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR);
1119 if (is_host_active(cppi->musb)
1121 && !(csr & MUSB_RXCSR_H_REQPKT)) {
1122 csr |= MUSB_RXCSR_H_REQPKT;
1123 musb_writew(regs, MUSB_RXCSR,
1124 MUSB_RXCSR_H_WZC_BITS | csr);
1125 csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR);
1128 rx->activeQueueHead = NULL;
1129 rx->activeQueueTail = NULL;
1132 cppi_dump_rx(6, rx, completed ? "/completed" : "/cleaned");
1136 void cppi_completion(struct musb *musb, u32 rx, u32 tx)
1138 void *__iomem regBase;
1139 int i, chanNum, numCompleted;
1142 struct cppi_descriptor *bdPtr;
1143 struct musb_hw_ep *hw_ep = NULL;
1145 cppi = container_of(musb->dma_controller, struct cppi, Controller);
1147 regBase = musb->ctrl_base;
1150 /* process TX channels */
1151 for (chanNum = 0; tx; tx = tx >> 1, chanNum++) {
1153 struct cppi_channel *txChannel;
1154 struct cppi_tx_stateram *txState;
1156 txChannel = cppi->txCppi + chanNum;
1157 txState = txChannel->stateRam;
1159 /* FIXME need a cppi_tx_scan() routine, which
1160 * can also be called from abort code
1163 cppi_dump_tx(5, txChannel, "/E");
1165 bdPtr = txChannel->activeQueueHead;
1167 if (NULL == bdPtr) {
1168 DBG(1, "null BD\n");
1177 /* run through all completed BDs */
1181 && i < NUM_TXCHAN_BD;
1182 i++, bdPtr = bdPtr->next) {
1186 if (bdPtr->hOptions & CPPI_OWN_SET)
1189 DBG(5, "C/TXBD %p n %x b %x off %x opt %x\n",
1190 bdPtr, bdPtr->hNext,
1195 len = bdPtr->bOffBLen & CPPI_BUFFER_LEN_MASK;
1196 txChannel->actualLen += len;
1199 txChannel->lastHwBDProcessed = bdPtr;
1201 /* write completion register to acknowledge
1202 * processing of completed BDs, and possibly
1203 * release the IRQ; EOQ might not be set ...
1205 * REVISIT use the same ack strategy as rx
1207 * REVISIT have observed bit 18 set; huh??
1209 // if ((bdPtr->hOptions & CPPI_EOQ_MASK))
1210 txState->completionPtr = bdPtr->dma;
1212 /* stop scanning on end-of-segment */
1213 if (bdPtr->hNext == 0)
1217 /* on end of segment, maybe go to next one */
1219 //cppi_dump_tx(4, txChannel, "/complete");
1221 /* transfer more, or report completion */
1222 if (txChannel->currOffset
1223 >= txChannel->transferSize) {
1224 txChannel->activeQueueHead = NULL;
1225 txChannel->activeQueueTail = NULL;
1226 txChannel->Channel.status =
1227 MUSB_DMA_STATUS_FREE;
1229 hw_ep = txChannel->hw_ep;
1231 txChannel->Channel.actual_len =
1232 txChannel->actualLen;
1234 /* Peripheral role never repurposes the
1235 * endpoint, so immediate completion is
1236 * safe. Host role waits for the fifo
1237 * to empty (TXPKTRDY irq) before going
1238 * to the next queued bulk transfer.
1240 if (is_host_active(cppi->musb)) {
1242 /* WORKAROUND because we may
1243 * not always get TXKPTRDY ...
1247 csr = musb_readw(hw_ep->regs,
1249 if (csr & MUSB_TXCSR_TXPKTRDY)
1254 musb_dma_completion(
1255 musb, chanNum + 1, 1);
1258 /* Bigger transfer than we could fit in
1259 * that first batch of descriptors...
1261 cppi_next_tx_segment(musb, txChannel);
1264 txChannel->activeQueueHead = bdPtr;
1268 /* Start processing the RX block */
1269 for (chanNum = 0; rx; rx = rx >> 1, chanNum++) {
1272 struct cppi_channel *rxChannel;
1274 rxChannel = cppi->rxCppi + chanNum;
1275 bReqComplete = cppi_rx_scan(cppi, chanNum);
1277 /* let incomplete dma segments finish */
1281 /* start another dma segment if needed */
1282 if (rxChannel->actualLen != rxChannel->transferSize
1283 && rxChannel->actualLen
1284 == rxChannel->currOffset) {
1285 cppi_next_rx_segment(musb, rxChannel, 1);
1289 /* all segments completed! */
1290 rxChannel->Channel.status = MUSB_DMA_STATUS_FREE;
1292 hw_ep = rxChannel->hw_ep;
1294 rxChannel->Channel.actual_len =
1295 rxChannel->actualLen;
1296 core_rxirq_disable(regBase, chanNum + 1);
1297 musb_dma_completion(musb, chanNum + 1, 0);
1301 /* write to CPPI EOI register to re-enable interrupts */
1302 musb_writel(regBase, DAVINCI_CPPI_EOI_REG, 0);
1305 /* Instantiate a software object representing a DMA controller. */
1306 struct dma_controller *__init
1307 dma_controller_create(struct musb *musb, void __iomem *pCoreBase)
1309 struct cppi *controller;
1311 controller = kzalloc(sizeof *controller, GFP_KERNEL);
1315 /* Initialize the Cppi DmaController structure */
1316 controller->pCoreBase = pCoreBase;
1317 controller->musb = musb;
1318 controller->Controller.private_data = controller;
1319 controller->Controller.start = cppi_controller_start;
1320 controller->Controller.stop = cppi_controller_stop;
1321 controller->Controller.channel_alloc = cppi_channel_allocate;
1322 controller->Controller.channel_release = cppi_channel_release;
1323 controller->Controller.channel_program = cppi_channel_program;
1324 controller->Controller.channel_abort = cppi_channel_abort;
1326 /* NOTE: allocating from on-chip SRAM would give the least
1327 * contention for memory access, if that ever matters here.
1330 /* setup BufferPool */
1331 controller->pool = dma_pool_create("cppi",
1332 controller->musb->controller,
1333 sizeof(struct cppi_descriptor),
1334 CPPI_DESCRIPTOR_ALIGN, 0);
1335 if (!controller->pool) {
1340 return &controller->Controller;
1344 * Destroy a previously-instantiated DMA controller.
1346 void dma_controller_destroy(struct dma_controller *c)
1350 cppi = container_of(c, struct cppi, Controller);
1352 /* assert: caller stopped the controller first */
1353 dma_pool_destroy(cppi->pool);
1359 * Context: controller irqlocked, endpoint selected
1361 static int cppi_channel_abort(struct dma_channel *channel)
1363 struct cppi_channel *otgCh;
1364 struct cppi *controller;
1366 void *__iomem mbase;
1367 void *__iomem regBase;
1370 struct cppi_descriptor *queue;
1372 otgCh = container_of(channel, struct cppi_channel, Channel);
1374 controller = otgCh->controller;
1375 chNum = otgCh->chNo;
1377 switch (channel->status) {
1378 case MUSB_DMA_STATUS_BUS_ABORT:
1379 case MUSB_DMA_STATUS_CORE_ABORT:
1380 /* from RX or TX fault irq handler */
1381 case MUSB_DMA_STATUS_BUSY:
1382 /* the hardware needs shutting down */
1383 regs = otgCh->hw_ep->regs;
1385 case MUSB_DMA_STATUS_UNKNOWN:
1386 case MUSB_DMA_STATUS_FREE:
1392 if (!otgCh->transmit && otgCh->activeQueueHead)
1393 cppi_dump_rxq(3, "/abort", otgCh);
1395 mbase = controller->pCoreBase;
1396 regBase = mbase - DAVINCI_BASE_OFFSET;
1398 queue = otgCh->activeQueueHead;
1399 otgCh->activeQueueHead = NULL;
1400 otgCh->activeQueueTail = NULL;
1402 /* REVISIT should rely on caller having done this,
1403 * and caller should rely on us not changing it.
1404 * peripheral code is safe ... check host too.
1406 musb_ep_select(mbase, chNum + 1);
1408 if (otgCh->transmit) {
1409 struct cppi_tx_stateram *__iomem txState;
1412 /* mask interrupts raised to signal teardown complete. */
1413 enabled = musb_readl(regBase, DAVINCI_TXCPPI_INTENAB_REG)
1414 & (1 << otgCh->chNo);
1416 musb_writel(regBase, DAVINCI_TXCPPI_INTCLR_REG,
1417 (1 << otgCh->chNo));
1419 // REVISIT put timeouts on these controller handshakes
1421 cppi_dump_tx(6, otgCh, " (teardown)");
1423 /* teardown DMA engine then usb core */
1425 regVal = musb_readl(regBase, DAVINCI_TXCPPI_TEAR_REG);
1426 } while (!(regVal & CPPI_TEAR_READY));
1427 musb_writel(regBase, DAVINCI_TXCPPI_TEAR_REG, chNum);
1429 txState = otgCh->stateRam;
1431 regVal = txState->completionPtr;
1432 } while (0xFFFFFFFC != regVal);
1433 txState->completionPtr = 0xFFFFFFFC;
1435 /* FIXME clean up the transfer state ... here?
1436 * the completion routine should get called with
1437 * an appropriate status code.
1440 regVal = musb_readw(regs, MUSB_TXCSR);
1441 regVal &= ~MUSB_TXCSR_DMAENAB;
1442 regVal |= MUSB_TXCSR_FLUSHFIFO;
1443 musb_writew(regs, MUSB_TXCSR, regVal);
1444 musb_writew(regs, MUSB_TXCSR, regVal);
1446 /* re-enable interrupt */
1448 musb_writel(regBase, DAVINCI_TXCPPI_INTENAB_REG,
1449 (1 << otgCh->chNo));
1451 txState->headPtr = 0;
1452 txState->sopDescPtr = 0;
1453 txState->currBuffPtr = 0;
1454 txState->currDescPtr = 0;
1456 txState->remLength = 0;
1458 /* Ensure that we clean up any Interrupt asserted
1459 * 1. Write to completion Ptr value 0x1(bit 0 set)
1461 * 2. Write to completion Ptr value 0x0(bit 0 cleared)
1463 * Value written is compared(for bits 31:2) and being
1464 * equal interrupt deasserted?
1467 /* write back mode, bit 0 set, hence completion Ptr
1470 txState->completionPtr = 0x1;
1471 /* compare mode, write back zero now */
1472 txState->completionPtr = 0;
1474 cppi_dump_tx(5, otgCh, " (done teardown)");
1476 /* REVISIT tx side _should_ clean up the same way
1477 * as the RX side ... this does no cleanup at all!
1483 /* NOTE: docs don't guarantee any of this works ... we
1484 * expect that if the usb core stops telling the cppi core
1485 * to pull more data from it, then it'll be safe to flush
1486 * current RX DMA state iff any pending fifo transfer is done.
1489 core_rxirq_disable(regBase, otgCh->chNo + 1);
1491 /* for host, ensure ReqPkt is never set again */
1492 if (is_host_active(otgCh->controller->musb)) {
1493 regVal = musb_readl(regBase, DAVINCI_AUTOREQ_REG);
1494 regVal &= ~((0x3) << (otgCh->chNo * 2));
1495 musb_writel(regBase, DAVINCI_AUTOREQ_REG, regVal);
1498 csr = musb_readw(regs, MUSB_RXCSR);
1500 /* for host, clear (just) ReqPkt at end of current packet(s) */
1501 if (is_host_active(otgCh->controller->musb)) {
1502 csr |= MUSB_RXCSR_H_WZC_BITS;
1503 csr &= ~MUSB_RXCSR_H_REQPKT;
1505 csr |= MUSB_RXCSR_P_WZC_BITS;
1507 /* clear dma enable */
1508 csr &= ~(MUSB_RXCSR_DMAENAB);
1509 musb_writew(regs, MUSB_RXCSR, csr);
1510 csr = musb_readw(regs, MUSB_RXCSR);
1512 /* quiesce: wait for current dma to finish (if not cleanup)
1513 * we can't use bit zero of stateram->sopDescPtr since that
1514 * refers to an entire "DMA packet" not just emptying the
1515 * current fifo; most segments need multiple usb packets.
1517 if (channel->status == MUSB_DMA_STATUS_BUSY)
1520 /* scan the current list, reporting any data that was
1521 * transferred and acking any IRQ
1523 cppi_rx_scan(controller, chNum);
1525 /* clobber the existing state once it's idle
1527 * NOTE: arguably, we should also wait for all the other
1528 * RX channels to quiesce (how??) and then temporarily
1529 * disable RXCPPI_CTRL_REG ... but it seems that we can
1530 * rely on the controller restarting from state ram, with
1531 * only RXCPPI_BUFCNT state being bogus. BUFCNT will
1532 * correct itself after the next DMA transfer though.
1534 * REVISIT does using rndis mode change that?
1536 cppi_reset_rx(otgCh->stateRam);
1538 /* next DMA request _should_ load cppi head ptr */
1540 /* ... we don't "free" that list, only mutate it in place. */
1541 cppi_dump_rx(5, otgCh, " (done abort)");
1543 /* clean up previously pending bds */
1544 cppi_bd_free(otgCh, otgCh->lastHwBDProcessed);
1545 otgCh->lastHwBDProcessed = NULL;
1548 struct cppi_descriptor *tmp = queue->next;
1549 cppi_bd_free(otgCh, queue);
1554 channel->status = MUSB_DMA_STATUS_FREE;
1555 otgCh->startAddr = 0;
1556 otgCh->currOffset = 0;
1557 otgCh->transferSize = 0;
1564 * Power Management ... probably turn off cppi during suspend, restart;
1565 * check state ram? Clocking is presumably shared with usb core.