2 * Copyright (c) 2006 Bernd Walter. All rights reserved.
3 * Copyright (c) 2006 M. Warner Losh. All rights reserved.
4 * Copyright (c) 2010 Greg Ansley. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include <sys/param.h>
32 #include <sys/systm.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/queue.h>
44 #include <sys/resource.h>
46 #include <sys/sysctl.h>
48 #include <sys/timetc.h>
49 #include <sys/watchdog.h>
51 #include <machine/bus.h>
52 #include <machine/cpu.h>
53 #include <machine/cpufunc.h>
54 #include <machine/resource.h>
55 #include <machine/frame.h>
56 #include <machine/intr.h>
58 #include <arm/at91/at91var.h>
59 #include <arm/at91/at91_mcireg.h>
60 #include <arm/at91/at91_pdcreg.h>
62 #include <dev/mmc/bridge.h>
63 #include <dev/mmc/mmcreg.h>
64 #include <dev/mmc/mmcbrvar.h>
71 * About running the MCI bus above 25MHz
73 * Historically, the MCI bus has been run at 30MHz on systems with a 60MHz
74 * master clock, in part due to a bug in dev/mmc.c making always request
75 * 30MHz, and in part over clocking the bus because 15MHz was too slow.
76 * Fixing that bug causes the mmc driver to request a 25MHz clock (as it
77 * should) and the logic in at91_mci_update_ios() picks the highest speed that
78 * doesn't exceed that limit. With a 60MHz MCK that would be 15MHz, and
79 * that's a real performance buzzkill when you've been getting away with 30MHz
82 * By defining AT91_MCI_ALLOW_OVERCLOCK (or setting the allow_overclock=1
83 * device hint or sysctl) you can enable logic in at91_mci_update_ios() to
84 * overlcock the SD bus a little by running it at MCK / 2 when the requested
85 * speed is 25MHz and the next highest speed is 15MHz or less. This appears
86 * to work on virtually all SD cards, since it is what this driver has been
87 * doing prior to the introduction of this option, where the overclocking vs
88 * underclocking decision was automaticly "overclock". Modern SD cards can
89 * run at 45mhz/1-bit in standard mode (high speed mode enable commands not
90 * sent) without problems.
92 * Speaking of high-speed mode, the rm9200 manual says the MCI device supports
93 * the SD v1.0 specification and can run up to 50MHz. This is interesting in
94 * that the SD v1.0 spec caps the speed at 25MHz; high speed mode was added in
95 * the v1.10 spec. Furthermore, high speed mode doesn't just crank up the
96 * clock, it alters the signal timing. The rm9200 MCI device doesn't support
97 * these altered timings. So while speeds over 25MHz may work, they only work
98 * in what the SD spec calls "default" speed mode, and it amounts to violating
99 * the spec by overclocking the bus.
101 * If you also enable 4-wire mode it's possible transfers faster than 25MHz
102 * will fail. On the AT91RM9200, due to bugs in the bus contention logic, if
103 * you have the USB host device and OHCI driver enabled will fail. Even
104 * underclocking to 15MHz, intermittant overrun and underrun errors occur.
105 * Note that you don't even need to have usb devices attached to the system,
106 * the errors begin to occur as soon as the OHCI driver sets the register bit
107 * to enable periodic transfers. It appears (based on brief investigation)
108 * that the usb host controller uses so much ASB bandwidth that sometimes the
109 * DMA for MCI transfers doesn't get a bus grant in time and data gets
110 * dropped. Adding even a modicum of network activity changes the symptom
111 * from intermittant to very frequent. Members of the AT91SAM9 family have
112 * corrected this problem, or are at least better about their use of the bus.
114 #ifndef AT91_MCI_ALLOW_OVERCLOCK
115 #define AT91_MCI_ALLOW_OVERCLOCK 1
119 * Allocate 2 bounce buffers we'll use to endian-swap the data due to the rm9200
120 * erratum. We use a pair of buffers because when reading that lets us begin
121 * endian-swapping the data in the first buffer while the DMA is reading into
122 * the second buffer. (We can't use the same trick for writing because we might
123 * not get all the data in the 2nd buffer swapped before the hardware needs it;
124 * dealing with that would add complexity to the driver.)
126 * The buffers are sized at 16K each due to the way the busdma cache sync
127 * operations work on arm. A dcache_inv_range() operation on a range larger
128 * than 16K gets turned into a dcache_wbinv_all(). That needlessly flushes the
129 * entire data cache, impacting overall system performance.
132 #define BBSIZE (16*1024)
133 #define MAX_BLOCKS ((BBSIZE*BBCOUNT)/512)
135 static int mci_debug;
137 struct at91_mci_softc {
138 void *intrhand; /* Interrupt handle */
141 #define CAP_HAS_4WIRE 1 /* Has 4 wire bus */
142 #define CAP_NEEDS_BYTESWAP 2 /* broken hardware needing bounce */
143 #define CAP_MCI1_REV2XX 4 /* MCI 1 rev 2.x */
145 #define PENDING_CMD 0x01
146 #define PENDING_STOP 0x02
147 #define CMD_MULTIREAD 0x10
148 #define CMD_MULTIWRITE 0x20
151 struct resource *irq_res; /* IRQ resource */
152 struct resource *mem_res; /* Memory resource */
154 bus_dma_tag_t dmatag;
155 struct mmc_host host;
157 struct mmc_request *req;
158 struct mmc_command *curcmd;
159 bus_dmamap_t bbuf_map[BBCOUNT];
160 char * bbuf_vaddr[BBCOUNT]; /* bounce bufs in KVA space */
161 uint32_t bbuf_len[BBCOUNT]; /* len currently queued for bounce buf */
162 uint32_t bbuf_curidx; /* which bbuf is the active DMA buffer */
163 uint32_t xfer_offset; /* offset so far into caller's buf */
166 /* bus entry points */
167 static int at91_mci_probe(device_t dev);
168 static int at91_mci_attach(device_t dev);
169 static int at91_mci_detach(device_t dev);
170 static void at91_mci_intr(void *);
172 /* helper routines */
173 static int at91_mci_activate(device_t dev);
174 static void at91_mci_deactivate(device_t dev);
175 static int at91_mci_is_mci1rev2xx(void);
177 #define AT91_MCI_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx)
178 #define AT91_MCI_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx)
179 #define AT91_MCI_LOCK_INIT(_sc) \
180 mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \
182 #define AT91_MCI_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx);
183 #define AT91_MCI_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED);
184 #define AT91_MCI_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED);
186 static inline uint32_t
187 RD4(struct at91_mci_softc *sc, bus_size_t off)
189 return (bus_read_4(sc->mem_res, off));
193 WR4(struct at91_mci_softc *sc, bus_size_t off, uint32_t val)
195 bus_write_4(sc->mem_res, off, val);
199 at91_bswap_buf(struct at91_mci_softc *sc, void * dptr, void * sptr, uint32_t memsize)
201 uint32_t * dst = (uint32_t *)dptr;
202 uint32_t * src = (uint32_t *)sptr;
206 * If the hardware doesn't need byte-swapping, let bcopy() do the
207 * work. Use bounce buffer even if we don't need byteswap, since
208 * buffer may straddle a page boundry, and we don't handle
209 * multi-segment transfers in hardware. Seen from 'bsdlabel -w' which
210 * uses raw geom access to the volume. Greg Ansley (gja (at)
213 if (!(sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
214 memcpy(dptr, sptr, memsize);
219 * Nice performance boost for slightly unrolling this loop.
220 * (But very little extra boost for further unrolling it.)
222 for (i = 0; i < memsize; i += 16) {
223 *dst++ = bswap32(*src++);
224 *dst++ = bswap32(*src++);
225 *dst++ = bswap32(*src++);
226 *dst++ = bswap32(*src++);
229 /* Mop up the last 1-3 words, if any. */
230 for (i = 0; i < (memsize & 0x0F); i += 4) {
231 *dst++ = bswap32(*src++);
236 at91_mci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
240 *(bus_addr_t *)arg = segs[0].ds_addr;
244 at91_mci_pdc_disable(struct at91_mci_softc *sc)
246 WR4(sc, PDC_PTCR, PDC_PTCR_TXTDIS | PDC_PTCR_RXTDIS);
249 WR4(sc, PDC_RNPR, 0);
250 WR4(sc, PDC_RNCR, 0);
253 WR4(sc, PDC_TNPR, 0);
254 WR4(sc, PDC_TNCR, 0);
258 * Reset the controller, then restore most of the current state.
260 * This is called after detecting an error. It's also called after stopping a
261 * multi-block write, to un-wedge the device so that it will handle the NOTBUSY
262 * signal correctly. See comments in at91_mci_stop_done() for more details.
264 static void at91_mci_reset(struct at91_mci_softc *sc)
271 at91_mci_pdc_disable(sc);
273 /* save current state */
275 imr = RD4(sc, MCI_IMR);
276 mr = RD4(sc, MCI_MR) & 0x7fff;
277 sdcr = RD4(sc, MCI_SDCR);
278 dtor = RD4(sc, MCI_DTOR);
280 /* reset the controller */
282 WR4(sc, MCI_IDR, 0xffffffff);
283 WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST);
287 WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
289 WR4(sc, MCI_SDCR, sdcr);
290 WR4(sc, MCI_DTOR, dtor);
291 WR4(sc, MCI_IER, imr);
294 * Make sure sdio interrupts will fire. Not sure why reading
295 * SR ensures that, but this is in the linux driver.
302 at91_mci_init(device_t dev)
304 struct at91_mci_softc *sc = device_get_softc(dev);
307 WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
308 WR4(sc, MCI_IDR, 0xffffffff); /* Turn off interrupts */
309 WR4(sc, MCI_DTOR, MCI_DTOR_DTOMUL_1M | 1);
310 val = MCI_MR_PDCMODE;
311 val |= 0x34a; /* PWSDIV = 3; CLKDIV = 74 */
312 // if (sc->sc_cap & CAP_MCI1_REV2XX)
313 // val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF;
314 WR4(sc, MCI_MR, val);
315 #ifndef AT91_MCI_SLOT_B
316 WR4(sc, MCI_SDCR, 0); /* SLOT A, 1 bit bus */
319 * XXX Really should add second "unit" but nobody using using
320 * a two slot card that we know of. XXX
322 WR4(sc, MCI_SDCR, 1); /* SLOT B, 1 bit bus */
325 * Enable controller, including power-save. The slower clock
326 * of the power-save mode is only in effect when there is no
327 * transfer in progress, so it can be left in this mode all
330 WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
334 at91_mci_fini(device_t dev)
336 struct at91_mci_softc *sc = device_get_softc(dev);
338 WR4(sc, MCI_IDR, 0xffffffff); /* Turn off interrupts */
339 at91_mci_pdc_disable(sc);
340 WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
344 at91_mci_probe(device_t dev)
347 device_set_desc(dev, "MCI mmc/sd host bridge");
352 at91_mci_attach(device_t dev)
354 struct at91_mci_softc *sc = device_get_softc(dev);
355 struct sysctl_ctx_list *sctx;
356 struct sysctl_oid *soid;
360 sctx = device_get_sysctl_ctx(dev);
361 soid = device_get_sysctl_tree(dev);
366 sc->sc_cap |= CAP_NEEDS_BYTESWAP;
368 * MCI1 Rev 2 controllers need some workarounds, flag if so.
370 if (at91_mci_is_mci1rev2xx())
371 sc->sc_cap |= CAP_MCI1_REV2XX;
373 err = at91_mci_activate(dev);
377 AT91_MCI_LOCK_INIT(sc);
383 * Allocate DMA tags and maps and bounce buffers.
385 * The parms in the tag_create call cause the dmamem_alloc call to
386 * create each bounce buffer as a single contiguous buffer of BBSIZE
387 * bytes aligned to a 4096 byte boundary.
389 * Do not use DMA_COHERENT for these buffers because that maps the
390 * memory as non-cachable, which prevents cache line burst fills/writes,
391 * which is something we need since we're trying to overlap the
392 * byte-swapping with the DMA operations.
394 err = bus_dma_tag_create(bus_get_dma_tag(dev), 4096, 0,
395 BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
396 BBSIZE, 1, BBSIZE, 0, NULL, NULL, &sc->dmatag);
400 for (i = 0; i < BBCOUNT; ++i) {
401 err = bus_dmamem_alloc(sc->dmatag, (void **)&sc->bbuf_vaddr[i],
402 BUS_DMA_NOWAIT, &sc->bbuf_map[i]);
408 * Activate the interrupt
410 err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
411 NULL, at91_mci_intr, sc, &sc->intrhand);
413 AT91_MCI_LOCK_DESTROY(sc);
418 * Allow 4-wire to be initially set via #define.
419 * Allow a device hint to override that.
420 * Allow a sysctl to override that.
422 #if defined(AT91_MCI_HAS_4WIRE) && AT91_MCI_HAS_4WIRE != 0
425 resource_int_value(device_get_name(dev), device_get_unit(dev),
426 "4wire", &sc->has_4wire);
427 SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "4wire",
428 CTLFLAG_RW, &sc->has_4wire, 0, "has 4 wire SD Card bus");
430 sc->sc_cap |= CAP_HAS_4WIRE;
432 sc->allow_overclock = AT91_MCI_ALLOW_OVERCLOCK;
433 resource_int_value(device_get_name(dev), device_get_unit(dev),
434 "allow_overclock", &sc->allow_overclock);
435 SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "allow_overclock",
436 CTLFLAG_RW, &sc->allow_overclock, 0,
437 "Allow up to 30MHz clock for 25MHz request when next highest speed 15MHz or less.");
440 * Our real min freq is master_clock/512, but upper driver layers are
441 * going to set the min speed during card discovery, and the right speed
442 * for that is 400kHz, so advertise a safe value just under that.
444 * For max speed, while the rm9200 manual says the max is 50mhz, it also
445 * says it supports only the SD v1.0 spec, which means the real limit is
446 * 25mhz. On the other hand, historical use has been to slightly violate
447 * the standard by running the bus at 30MHz. For more information on
448 * that, see the comments at the top of this file.
450 sc->host.f_min = 375000;
451 sc->host.f_max = at91_master_clock / 2;
452 if (sc->host.f_max > 25000000)
453 sc->host.f_max = 25000000;
454 sc->host.host_ocr = MMC_OCR_320_330 | MMC_OCR_330_340;
456 if (sc->sc_cap & CAP_HAS_4WIRE)
457 sc->host.caps |= MMC_CAP_4_BIT_DATA;
459 child = device_add_child(dev, "mmc", 0);
460 device_set_ivars(dev, &sc->host);
461 err = bus_generic_attach(dev);
464 at91_mci_deactivate(dev);
469 at91_mci_detach(device_t dev)
471 struct at91_mci_softc *sc = device_get_softc(dev);
474 at91_mci_deactivate(dev);
476 bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[0], sc->bbuf_map[0]);
477 bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[1], sc->bbuf_map[1]);
478 bus_dma_tag_destroy(sc->dmatag);
480 return (EBUSY); /* XXX */
484 at91_mci_activate(device_t dev)
486 struct at91_mci_softc *sc;
489 sc = device_get_softc(dev);
491 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
493 if (sc->mem_res == NULL)
497 sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
499 if (sc->irq_res == NULL)
504 at91_mci_deactivate(dev);
509 at91_mci_deactivate(device_t dev)
511 struct at91_mci_softc *sc;
513 sc = device_get_softc(dev);
515 bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
517 bus_generic_detach(sc->dev);
519 bus_release_resource(dev, SYS_RES_MEMORY,
520 rman_get_rid(sc->mem_res), sc->mem_res);
523 bus_release_resource(dev, SYS_RES_IRQ,
524 rman_get_rid(sc->irq_res), sc->irq_res);
530 at91_mci_is_mci1rev2xx(void)
533 switch (soc_info.type) {
547 at91_mci_update_ios(device_t brdev, device_t reqdev)
549 struct at91_mci_softc *sc;
554 sc = device_get_softc(brdev);
558 * Calculate our closest available clock speed that doesn't exceed the
561 * When overclocking is allowed, the requested clock is 25MHz, the
562 * computed frequency is 15MHz or smaller and clockdiv is 1, use
563 * clockdiv of 0 to double that. If less than 12.5MHz, double
564 * regardless of the overclocking setting.
566 * Whatever we come up with, store it back into ios->clock so that the
567 * upper layer drivers can report the actual speed of the bus.
569 if (ios->clock == 0) {
570 WR4(sc, MCI_CR, MCI_CR_MCIDIS);
573 WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
574 if ((at91_master_clock % (ios->clock * 2)) == 0)
575 clkdiv = ((at91_master_clock / ios->clock) / 2) - 1;
577 clkdiv = (at91_master_clock / ios->clock) / 2;
578 freq = at91_master_clock / ((clkdiv+1) * 2);
579 if (clkdiv == 1 && ios->clock == 25000000 && freq <= 15000000) {
580 if (sc->allow_overclock || freq <= 12500000) {
582 freq = at91_master_clock / ((clkdiv+1) * 2);
587 if (ios->bus_width == bus_width_4)
588 WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) | MCI_SDCR_SDCBUS);
590 WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) & ~MCI_SDCR_SDCBUS);
591 WR4(sc, MCI_MR, (RD4(sc, MCI_MR) & ~MCI_MR_CLKDIV) | clkdiv);
592 /* Do we need a settle time here? */
593 /* XXX We need to turn the device on/off here with a GPIO pin */
598 at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd)
601 struct mmc_data *data;
606 /* XXX Upper layers don't always set this */
609 /* Begin setting up command register. */
613 if (sc->host.ios.bus_mode == opendrain)
614 cmdr |= MCI_CMDR_OPDCMD;
616 /* Set up response handling. Allow max timeout for responses. */
618 if (MMC_RSP(cmd->flags) == MMC_RSP_NONE)
619 cmdr |= MCI_CMDR_RSPTYP_NO;
621 cmdr |= MCI_CMDR_MAXLAT;
622 if (cmd->flags & MMC_RSP_136)
623 cmdr |= MCI_CMDR_RSPTYP_136;
625 cmdr |= MCI_CMDR_RSPTYP_48;
629 * If there is no data transfer, just set up the right interrupt mask
630 * and start the command.
632 * The interrupt mask needs to be CMDRDY plus all non-data-transfer
633 * errors. It's important to leave the transfer-related errors out, to
634 * avoid spurious timeout or crc errors on a STOP command following a
635 * multiblock read. When a multiblock read is in progress, sending a
636 * STOP in the middle of a block occasionally triggers such errors, but
637 * we're totally disinterested in them because we've already gotten all
638 * the data we wanted without error before sending the STOP command.
642 uint32_t ier = MCI_SR_CMDRDY |
643 MCI_SR_RTOE | MCI_SR_RENDE |
644 MCI_SR_RCRCE | MCI_SR_RDIRE | MCI_SR_RINDE;
646 at91_mci_pdc_disable(sc);
648 if (cmd->opcode == MMC_STOP_TRANSMISSION)
649 cmdr |= MCI_CMDR_TRCMD_STOP;
651 /* Ignore response CRC on CMD2 and ACMD41, per standard. */
653 if (cmd->opcode == MMC_SEND_OP_COND ||
654 cmd->opcode == ACMD_SD_SEND_OP_COND)
655 ier &= ~MCI_SR_RCRCE;
658 printf("CMDR %x (opcode %d) ARGR %x no data\n",
659 cmdr, cmd->opcode, cmd->arg);
661 WR4(sc, MCI_ARGR, cmd->arg);
662 WR4(sc, MCI_CMDR, cmdr);
663 WR4(sc, MCI_IDR, 0xffffffff);
664 WR4(sc, MCI_IER, ier);
668 /* There is data, set up the transfer-related parts of the command. */
670 if (data->flags & MMC_DATA_READ)
671 cmdr |= MCI_CMDR_TRDIR;
673 if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE))
674 cmdr |= MCI_CMDR_TRCMD_START;
676 if (data->flags & MMC_DATA_STREAM)
677 cmdr |= MCI_CMDR_TRTYP_STREAM;
678 else if (data->flags & MMC_DATA_MULTI) {
679 cmdr |= MCI_CMDR_TRTYP_MULTIPLE;
680 sc->flags |= (data->flags & MMC_DATA_READ) ?
681 CMD_MULTIREAD : CMD_MULTIWRITE;
685 * Disable PDC until we're ready.
687 * Set block size and turn on PDC mode for dma xfer.
688 * Note that the block size is the smaller of the amount of data to be
689 * transferred, or 512 bytes. The 512 size is fixed by the standard;
690 * smaller blocks are possible, but never larger.
693 WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
695 mr = RD4(sc,MCI_MR) & ~MCI_MR_BLKLEN;
696 mr |= min(data->len, 512) << 16;
697 WR4(sc, MCI_MR, mr | MCI_MR_PDCMODE|MCI_MR_PDCPADV);
702 * Use bounce buffers even if we don't need to byteswap, because doing
703 * multi-block IO with large DMA buffers is way fast (compared to
704 * single-block IO), even after incurring the overhead of also copying
705 * from/to the caller's buffers (which may be in non-contiguous physical
708 * In an ideal non-byteswap world we could create a dma tag that allows
709 * for discontiguous segments and do the IO directly from/to the
710 * caller's buffer(s), using ENDRX/ENDTX interrupts to chain the
711 * discontiguous buffers through the PDC. Someday.
713 * If a read is bigger than 2k, split it in half so that we can start
714 * byte-swapping the first half while the second half is on the wire.
715 * It would be best if we could split it into 8k chunks, but we can't
716 * always keep up with the byte-swapping due to other system activity,
717 * and if an RXBUFF interrupt happens while we're still handling the
718 * byte-swap from the prior buffer (IE, we haven't returned from
719 * handling the prior interrupt yet), then data will get dropped on the
720 * floor and we can't easily recover from that. The right fix for that
721 * would be to have the interrupt handling only keep the DMA flowing and
722 * enqueue filled buffers to be byte-swapped in a non-interrupt context.
723 * Even that won't work on the write side of things though; in that
724 * context we have to have all the data ready to go before starting the
727 * XXX what about stream transfers?
732 if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) {
734 uint32_t remaining = data->len;
738 if (remaining > (BBCOUNT*BBSIZE))
739 panic("IO read size exceeds MAXDATA\n");
741 if (data->flags & MMC_DATA_READ) {
742 if (remaining > 2048) // XXX
746 err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
747 sc->bbuf_vaddr[0], len, at91_mci_getaddr,
748 &paddr, BUS_DMA_NOWAIT);
750 panic("IO read dmamap_load failed\n");
751 bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
752 BUS_DMASYNC_PREREAD);
753 WR4(sc, PDC_RPR, paddr);
754 WR4(sc, PDC_RCR, len / 4);
755 sc->bbuf_len[0] = len;
757 if (remaining == 0) {
761 err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
762 sc->bbuf_vaddr[1], len, at91_mci_getaddr,
763 &paddr, BUS_DMA_NOWAIT);
765 panic("IO read dmamap_load failed\n");
766 bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
767 BUS_DMASYNC_PREREAD);
768 WR4(sc, PDC_RNPR, paddr);
769 WR4(sc, PDC_RNCR, len / 4);
770 sc->bbuf_len[1] = len;
773 WR4(sc, PDC_PTCR, PDC_PTCR_RXTEN);
775 len = min(BBSIZE, remaining);
777 * If this is MCI1 revision 2xx controller, apply
778 * a work-around for the "Data Write Operation and
779 * number of bytes" erratum.
781 if ((sc->sc_cap & CAP_MCI1_REV2XX) && len < 12) {
783 memset(sc->bbuf_vaddr[0], 0, 12);
785 at91_bswap_buf(sc, sc->bbuf_vaddr[0], data->data, len);
786 err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
787 sc->bbuf_vaddr[0], len, at91_mci_getaddr,
788 &paddr, BUS_DMA_NOWAIT);
790 panic("IO write dmamap_load failed\n");
791 bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
792 BUS_DMASYNC_PREWRITE);
793 WR4(sc, PDC_TPR,paddr);
794 WR4(sc, PDC_TCR, len / 4);
795 sc->bbuf_len[0] = len;
797 if (remaining == 0) {
801 at91_bswap_buf(sc, sc->bbuf_vaddr[1],
802 ((char *)data->data)+BBSIZE, len);
803 err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
804 sc->bbuf_vaddr[1], len, at91_mci_getaddr,
805 &paddr, BUS_DMA_NOWAIT);
807 panic("IO write dmamap_load failed\n");
808 bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
809 BUS_DMASYNC_PREWRITE);
810 WR4(sc, PDC_TNPR, paddr);
811 WR4(sc, PDC_TNCR, len / 4);
812 sc->bbuf_len[1] = len;
815 /* do not enable PDC xfer until CMDRDY asserted */
817 data->xfer_len = 0; /* XXX what's this? appears to be unused. */
821 printf("CMDR %x (opcode %d) ARGR %x with data len %d\n",
822 cmdr, cmd->opcode, cmd->arg, cmd->data->len);
824 WR4(sc, MCI_ARGR, cmd->arg);
825 WR4(sc, MCI_CMDR, cmdr);
826 WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_CMDRDY);
830 at91_mci_next_operation(struct at91_mci_softc *sc)
832 struct mmc_request *req;
838 if (sc->flags & PENDING_CMD) {
839 sc->flags &= ~PENDING_CMD;
840 at91_mci_start_cmd(sc, req->cmd);
842 } else if (sc->flags & PENDING_STOP) {
843 sc->flags &= ~PENDING_STOP;
844 at91_mci_start_cmd(sc, req->stop);
848 WR4(sc, MCI_IDR, 0xffffffff);
851 //printf("req done\n");
856 at91_mci_request(device_t brdev, device_t reqdev, struct mmc_request *req)
858 struct at91_mci_softc *sc = device_get_softc(brdev);
861 if (sc->req != NULL) {
865 //printf("new req\n");
867 sc->flags = PENDING_CMD;
869 sc->flags |= PENDING_STOP;
870 at91_mci_next_operation(sc);
876 at91_mci_get_ro(device_t brdev, device_t reqdev)
882 at91_mci_acquire_host(device_t brdev, device_t reqdev)
884 struct at91_mci_softc *sc = device_get_softc(brdev);
889 msleep(sc, &sc->sc_mtx, PZERO, "mciah", hz / 5);
896 at91_mci_release_host(device_t brdev, device_t reqdev)
898 struct at91_mci_softc *sc = device_get_softc(brdev);
908 at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr)
910 struct mmc_command *cmd = sc->curcmd;
911 char * dataptr = (char *)cmd->data->data;
912 uint32_t curidx = sc->bbuf_curidx;
913 uint32_t len = sc->bbuf_len[curidx];
916 * We arrive here when a DMA transfer for a read is done, whether it's
917 * a single or multi-block read.
919 * We byte-swap the buffer that just completed, and if that is the
920 * last buffer that's part of this read then we move on to the next
921 * operation, otherwise we wait for another ENDRX for the next bufer.
924 bus_dmamap_sync(sc->dmatag, sc->bbuf_map[curidx], BUS_DMASYNC_POSTREAD);
925 bus_dmamap_unload(sc->dmatag, sc->bbuf_map[curidx]);
927 at91_bswap_buf(sc, dataptr + sc->xfer_offset, sc->bbuf_vaddr[curidx], len);
930 printf("read done sr %x curidx %d len %d xfer_offset %d\n",
931 sr, curidx, len, sc->xfer_offset);
934 sc->xfer_offset += len;
935 sc->bbuf_curidx = !curidx; /* swap buffers */
938 * If we've transferred all the data, move on to the next operation.
940 * If we're still transferring the last buffer, RNCR is already zero but
941 * we have to write a zero anyway to clear the ENDRX status so we don't
942 * re-interrupt until the last buffer is done.
944 if (sc->xfer_offset == cmd->data->len) {
945 WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
946 cmd->error = MMC_ERR_NONE;
947 at91_mci_next_operation(sc);
949 WR4(sc, PDC_RNCR, 0);
950 WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_ENDRX);
955 at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr)
957 struct mmc_command *cmd = sc->curcmd;
960 * We arrive here when the entire DMA transfer for a write is done,
961 * whether it's a single or multi-block write. If it's multi-block we
962 * have to immediately move on to the next operation which is to send
963 * the stop command. If it's a single-block transfer we need to wait
964 * for NOTBUSY, but if that's already asserted we can avoid another
965 * interrupt and just move on to completing the request right away.
968 WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
970 bus_dmamap_sync(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx],
971 BUS_DMASYNC_POSTWRITE);
972 bus_dmamap_unload(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx]);
974 if ((cmd->data->flags & MMC_DATA_MULTI) || (sr & MCI_SR_NOTBUSY)) {
975 cmd->error = MMC_ERR_NONE;
976 at91_mci_next_operation(sc);
978 WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
983 at91_mci_notbusy(struct at91_mci_softc *sc)
985 struct mmc_command *cmd = sc->curcmd;
988 * We arrive here by either completion of a single-block write, or
989 * completion of the stop command that ended a multi-block write (and,
990 * I suppose, after a card-select or erase, but I haven't tested
991 * those). Anyway, we're done and it's time to move on to the next
995 cmd->error = MMC_ERR_NONE;
996 at91_mci_next_operation(sc);
1000 at91_mci_stop_done(struct at91_mci_softc *sc, uint32_t sr)
1002 struct mmc_command *cmd = sc->curcmd;
1005 * We arrive here after receiving CMDRDY for a MMC_STOP_TRANSMISSION
1006 * command. Depending on the operation being stopped, we may have to
1007 * do some unusual things to work around hardware bugs.
1011 * This is known to be true of at91rm9200 hardware; it may or may not
1012 * apply to more recent chips:
1014 * After stopping a multi-block write, the NOTBUSY bit in MCI_SR does
1015 * not properly reflect the actual busy state of the card as signaled
1016 * on the DAT0 line; it always claims the card is not-busy. If we
1017 * believe that and let operations continue, following commands will
1018 * fail with response timeouts (except of course MMC_SEND_STATUS -- it
1019 * indicates the card is busy in the PRG state, which was the smoking
1020 * gun that showed MCI_SR NOTBUSY was not tracking DAT0 correctly).
1022 * The atmel docs are emphatic: "This flag [NOTBUSY] must be used only
1023 * for Write Operations." I guess technically since we sent a stop
1024 * it's not a write operation anymore. But then just what did they
1025 * think it meant for the stop command to have "...an optional busy
1026 * signal transmitted on the data line" according to the SD spec?
1028 * I tried a variety of things to un-wedge the MCI and get the status
1029 * register to reflect NOTBUSY correctly again, but the only thing
1030 * that worked was a full device reset. It feels like an awfully big
1031 * hammer, but doing a full reset after every multiblock write is
1032 * still faster than doing single-block IO (by almost two orders of
1033 * magnitude: 20KB/sec improves to about 1.8MB/sec best case).
1035 * After doing the reset, wait for a NOTBUSY interrupt before
1036 * continuing with the next operation.
1038 * This workaround breaks multiwrite on the rev2xx parts, but some other
1039 * workaround is needed.
1041 if ((sc->flags & CMD_MULTIWRITE) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
1043 WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1048 * This is known to be true of at91rm9200 hardware; it may or may not
1049 * apply to more recent chips:
1051 * After stopping a multi-block read, loop to read and discard any
1052 * data that coasts in after we sent the stop command. The docs don't
1053 * say anything about it, but empirical testing shows that 1-3
1054 * additional words of data get buffered up in some unmentioned
1055 * internal fifo and if we don't read and discard them here they end
1056 * up on the front of the next read DMA transfer we do.
1058 * This appears to be unnecessary for rev2xx parts.
1060 if ((sc->flags & CMD_MULTIREAD) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
1065 sr = RD4(sc, MCI_SR);
1066 if (sr & MCI_SR_RXRDY) {
1070 } while (sr & MCI_SR_RXRDY);
1074 cmd->error = MMC_ERR_NONE;
1075 at91_mci_next_operation(sc);
1080 at91_mci_cmdrdy(struct at91_mci_softc *sc, uint32_t sr)
1082 struct mmc_command *cmd = sc->curcmd;
1089 * We get here at the end of EVERY command. We retrieve the command
1090 * response (if any) then decide what to do next based on the command.
1093 if (cmd->flags & MMC_RSP_PRESENT) {
1094 for (i = 0; i < ((cmd->flags & MMC_RSP_136) ? 4 : 1); i++) {
1095 cmd->resp[i] = RD4(sc, MCI_RSPR + i * 4);
1097 printf("RSPR[%d] = %x sr=%x\n", i, cmd->resp[i], sr);
1102 * If this was a stop command, go handle the various special
1103 * conditions (read: bugs) that have to be dealt with following a stop.
1105 if (cmd->opcode == MMC_STOP_TRANSMISSION) {
1106 at91_mci_stop_done(sc, sr);
1111 * If this command can continue to assert BUSY beyond the response then
1112 * we need to wait for NOTBUSY before the command is really done.
1114 * Note that this may not work properly on the at91rm9200. It certainly
1115 * doesn't work for the STOP command that follows a multi-block write,
1116 * so post-stop CMDRDY is handled separately; see the special handling
1117 * in at91_mci_stop_done().
1119 * Beside STOP, there are other R1B-type commands that use the busy
1120 * signal after CMDRDY: CMD7 (card select), CMD28-29 (write protect),
1121 * CMD38 (erase). I haven't tested any of them, but I rather expect
1122 * them all to have the same sort of problem with MCI_SR not actually
1123 * reflecting the state of the DAT0-line busy indicator. So this code
1124 * may need to grow some sort of special handling for them too. (This
1125 * just in: CMD7 isn't a problem right now because dev/mmc.c incorrectly
1126 * sets the response flags to R1 rather than R1B.) XXX
1128 if ((cmd->flags & MMC_RSP_BUSY)) {
1129 WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1134 * If there is a data transfer with this command, then...
1135 * - If it's a read, we need to wait for ENDRX.
1136 * - If it's a write, now is the time to enable the PDC, and we need
1137 * to wait for a BLKE that follows a TXBUFE, because if we're doing
1138 * a split transfer we get a BLKE after the first half (when TPR/TCR
1139 * get loaded from TNPR/TNCR). So first we wait for the TXBUFE, and
1140 * the handling for that interrupt will then invoke the wait for the
1141 * subsequent BLKE which indicates actual completion.
1145 if (cmd->data->flags & MMC_DATA_READ) {
1148 ier = MCI_SR_TXBUFE;
1149 WR4(sc, PDC_PTCR, PDC_PTCR_TXTEN);
1151 WR4(sc, MCI_IER, MCI_SR_ERROR | ier);
1156 * If we made it to here, we don't need to wait for anything more for
1157 * the current command, move on to the next command (will complete the
1158 * request if there is no next command).
1160 cmd->error = MMC_ERR_NONE;
1161 at91_mci_next_operation(sc);
1165 at91_mci_intr(void *arg)
1167 struct at91_mci_softc *sc = (struct at91_mci_softc*)arg;
1168 struct mmc_command *cmd = sc->curcmd;
1173 sr = RD4(sc, MCI_SR);
1174 isr = sr & RD4(sc, MCI_IMR);
1177 printf("i 0x%x sr 0x%x\n", isr, sr);
1180 * All interrupts are one-shot; disable it now.
1181 * The next operation will re-enable whatever interrupts it wants.
1183 WR4(sc, MCI_IDR, isr);
1184 if (isr & MCI_SR_ERROR) {
1185 if (isr & (MCI_SR_RTOE | MCI_SR_DTOE))
1186 cmd->error = MMC_ERR_TIMEOUT;
1187 else if (isr & (MCI_SR_RCRCE | MCI_SR_DCRCE))
1188 cmd->error = MMC_ERR_BADCRC;
1189 else if (isr & (MCI_SR_OVRE | MCI_SR_UNRE))
1190 cmd->error = MMC_ERR_FIFO;
1192 cmd->error = MMC_ERR_FAILED;
1194 * CMD8 is used to probe for SDHC cards, a standard SD card
1195 * will get a response timeout; don't report it because it's a
1196 * normal and expected condition. One might argue that all
1197 * error reporting should be left to higher levels, but when
1198 * they report at all it's always EIO, which isn't very
1199 * helpful. XXX bootverbose?
1201 if (cmd->opcode != 8) {
1202 device_printf(sc->dev,
1203 "IO error; status MCI_SR = 0x%x cmd opcode = %d%s\n",
1205 (cmd->opcode != 12) ? "" :
1206 (sc->flags & CMD_MULTIREAD) ? " after read" : " after write");
1209 at91_mci_next_operation(sc);
1211 if (isr & MCI_SR_TXBUFE) {
1212 // printf("TXBUFE\n");
1214 * We need to wait for a BLKE that follows TXBUFE
1215 * (intermediate BLKEs might happen after ENDTXes if
1216 * we're chaining multiple buffers). If BLKE is also
1217 * asserted at the time we get TXBUFE, we can avoid
1218 * another interrupt and process it right away, below.
1220 if (sr & MCI_SR_BLKE)
1223 WR4(sc, MCI_IER, MCI_SR_BLKE);
1225 if (isr & MCI_SR_RXBUFF) {
1226 // printf("RXBUFF\n");
1228 if (isr & MCI_SR_ENDTX) {
1229 // printf("ENDTX\n");
1231 if (isr & MCI_SR_ENDRX) {
1232 // printf("ENDRX\n");
1233 at91_mci_read_done(sc, sr);
1235 if (isr & MCI_SR_NOTBUSY) {
1236 // printf("NOTBUSY\n");
1237 at91_mci_notbusy(sc);
1239 if (isr & MCI_SR_DTIP) {
1240 // printf("Data transfer in progress\n");
1242 if (isr & MCI_SR_BLKE) {
1243 // printf("Block transfer end\n");
1244 at91_mci_write_done(sc, sr);
1246 if (isr & MCI_SR_TXRDY) {
1247 // printf("Ready to transmit\n");
1249 if (isr & MCI_SR_RXRDY) {
1250 // printf("Ready to receive\n");
1252 if (isr & MCI_SR_CMDRDY) {
1253 // printf("Command ready\n");
1254 at91_mci_cmdrdy(sc, sr);
1257 AT91_MCI_UNLOCK(sc);
1261 at91_mci_read_ivar(device_t bus, device_t child, int which, uintptr_t *result)
1263 struct at91_mci_softc *sc = device_get_softc(bus);
1268 case MMCBR_IVAR_BUS_MODE:
1269 *(int *)result = sc->host.ios.bus_mode;
1271 case MMCBR_IVAR_BUS_WIDTH:
1272 *(int *)result = sc->host.ios.bus_width;
1274 case MMCBR_IVAR_CHIP_SELECT:
1275 *(int *)result = sc->host.ios.chip_select;
1277 case MMCBR_IVAR_CLOCK:
1278 *(int *)result = sc->host.ios.clock;
1280 case MMCBR_IVAR_F_MIN:
1281 *(int *)result = sc->host.f_min;
1283 case MMCBR_IVAR_F_MAX:
1284 *(int *)result = sc->host.f_max;
1286 case MMCBR_IVAR_HOST_OCR:
1287 *(int *)result = sc->host.host_ocr;
1289 case MMCBR_IVAR_MODE:
1290 *(int *)result = sc->host.mode;
1292 case MMCBR_IVAR_OCR:
1293 *(int *)result = sc->host.ocr;
1295 case MMCBR_IVAR_POWER_MODE:
1296 *(int *)result = sc->host.ios.power_mode;
1298 case MMCBR_IVAR_VDD:
1299 *(int *)result = sc->host.ios.vdd;
1301 case MMCBR_IVAR_CAPS:
1302 if (sc->has_4wire) {
1303 sc->sc_cap |= CAP_HAS_4WIRE;
1304 sc->host.caps |= MMC_CAP_4_BIT_DATA;
1306 sc->sc_cap &= ~CAP_HAS_4WIRE;
1307 sc->host.caps &= ~MMC_CAP_4_BIT_DATA;
1309 *(int *)result = sc->host.caps;
1311 case MMCBR_IVAR_MAX_DATA:
1313 * Something is wrong with the 2x parts and multiblock, so
1314 * just do 1 block at a time for now, which really kills
1317 if (sc->sc_cap & CAP_MCI1_REV2XX)
1320 *(int *)result = MAX_BLOCKS;
1327 at91_mci_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
1329 struct at91_mci_softc *sc = device_get_softc(bus);
1334 case MMCBR_IVAR_BUS_MODE:
1335 sc->host.ios.bus_mode = value;
1337 case MMCBR_IVAR_BUS_WIDTH:
1338 sc->host.ios.bus_width = value;
1340 case MMCBR_IVAR_CHIP_SELECT:
1341 sc->host.ios.chip_select = value;
1343 case MMCBR_IVAR_CLOCK:
1344 sc->host.ios.clock = value;
1346 case MMCBR_IVAR_MODE:
1347 sc->host.mode = value;
1349 case MMCBR_IVAR_OCR:
1350 sc->host.ocr = value;
1352 case MMCBR_IVAR_POWER_MODE:
1353 sc->host.ios.power_mode = value;
1355 case MMCBR_IVAR_VDD:
1356 sc->host.ios.vdd = value;
1358 /* These are read-only */
1359 case MMCBR_IVAR_CAPS:
1360 case MMCBR_IVAR_HOST_OCR:
1361 case MMCBR_IVAR_F_MIN:
1362 case MMCBR_IVAR_F_MAX:
1363 case MMCBR_IVAR_MAX_DATA:
1369 static device_method_t at91_mci_methods[] = {
1371 DEVMETHOD(device_probe, at91_mci_probe),
1372 DEVMETHOD(device_attach, at91_mci_attach),
1373 DEVMETHOD(device_detach, at91_mci_detach),
1376 DEVMETHOD(bus_read_ivar, at91_mci_read_ivar),
1377 DEVMETHOD(bus_write_ivar, at91_mci_write_ivar),
1380 DEVMETHOD(mmcbr_update_ios, at91_mci_update_ios),
1381 DEVMETHOD(mmcbr_request, at91_mci_request),
1382 DEVMETHOD(mmcbr_get_ro, at91_mci_get_ro),
1383 DEVMETHOD(mmcbr_acquire_host, at91_mci_acquire_host),
1384 DEVMETHOD(mmcbr_release_host, at91_mci_release_host),
1389 static driver_t at91_mci_driver = {
1392 sizeof(struct at91_mci_softc),
1395 static devclass_t at91_mci_devclass;
1397 DRIVER_MODULE(at91_mci, atmelarm, at91_mci_driver, at91_mci_devclass, NULL,