From 1bdeba6cdb3b7999e7c71418d775a1b4c9269234 Mon Sep 17 00:00:00 2001 From: ian Date: Sat, 27 Dec 2014 01:28:52 +0000 Subject: [PATCH] MFC r274538, r274545, r274596, r274602, r274603, r274604, r274605, r274839: When doing busdma sync ops for BUSDMA_COHERENT memory, there is no need for cache maintenance operations, but ensure that all prior writes have reached memory when doing a PREWRITE sync. Do not do a cache invalidate on a PREREAD sync that is also a PREWRITE sync. Do the cache invalidate sequence from the outermost to innermost, required for correct operation. Correct the sequence of busdma sync ops involved with PRE/POSTREAD syncs. When doing a PREREAD sync of an mbuf-type dma buffer, do a writeback of the first cacheline if the buffer start address is not on a cacheline boundary. git-svn-id: svn://svn.freebsd.org/base/stable/10@276274 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/arm/arm/busdma_machdep-v6.c | 134 ++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 34 deletions(-) diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c index 6dd08806e..9a2a74238 100644 --- a/sys/arm/arm/busdma_machdep-v6.c +++ b/sys/arm/arm/busdma_machdep-v6.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2012 Ian Lepore + * Copyright (c) 2012-2014 Ian Lepore * Copyright (c) 2010 Mark Tinguely * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2002 Peter Grehan @@ -322,6 +322,7 @@ static __inline int might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { + return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || alignment_bounce(dmat, addr) || cacheline_bounce(map, addr, size)); @@ -420,6 +421,7 @@ busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) static void dflt_lock(void *arg, bus_dma_lock_op_t op) { + panic("driver error: busdma dflt_lock called"); } @@ -600,7 +602,7 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat) static int allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp) { - struct bounce_zone *bz; + struct bounce_zone *bz; int maxpages; int error; @@ -1227,13 +1229,13 @@ _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) } #ifdef notyetbounceuser - /* If busdma uses user pages, then the interrupt handler could - * be use the kernel vm mapping. Both bounce pages and sync list - * do not cross page boundaries. - * Below is a rough sequence that a person would do to fix the - * user page reference in the kernel vmspace. This would be - * done in the dma post routine. - */ +/* If busdma uses user pages, then the interrupt handler could + * be use the kernel vm mapping. Both bounce pages and sync list + * do not cross page boundaries. + * Below is a rough sequence that a person would do to fix the + * user page reference in the kernel vmspace. This would be + * done in the dma post routine. + */ void _bus_dmamap_fix_user(vm_offset_t buf, bus_size_t len, pmap_t pmap, int op) @@ -1242,10 +1244,10 @@ _bus_dmamap_fix_user(vm_offset_t buf, bus_size_t len, bus_addr_t curaddr; vm_offset_t va; - /* each synclist entry is contained within a single page. - * - * this would be needed if BUS_DMASYNC_POSTxxxx was implemented - */ + /* + * each synclist entry is contained within a single page. + * this would be needed if BUS_DMASYNC_POSTxxxx was implemented + */ curaddr = pmap_extract(pmap, buf); va = pmap_dma_map(curaddr); switch (op) { @@ -1287,17 +1289,20 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) /* * If the buffer was from user space, it is possible that this is not * the same vm map, especially on a POST operation. It's not clear that - * dma on userland buffers can work at all right now, certainly not if a - * partial cacheline flush has to be handled. To be safe, until we're - * able to test direct userland dma, panic on a map mismatch. + * dma on userland buffers can work at all right now. To be safe, until + * we're able to test direct userland dma, panic on a map mismatch. */ if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { if (!pmap_dmap_iscurrent(map->pmap)) panic("_bus_dmamap_sync: wrong user map for bounce sync."); - /* Handle data bouncing. */ + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->flags, op); + /* + * For PREWRITE do a writeback. Clean the caches from the + * innermost to the outermost levels. + */ if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { if (bpage->datavaddr != 0) @@ -1309,7 +1314,7 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) (void *)bpage->vaddr, bpage->datacount); cpu_dcache_wb_range((vm_offset_t)bpage->vaddr, - bpage->datacount); + bpage->datacount); l2cache_wb_range((vm_offset_t)bpage->vaddr, (vm_offset_t)bpage->busaddr, bpage->datacount); @@ -1318,7 +1323,18 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) dmat->bounce_zone->total_bounced++; } - if (op & BUS_DMASYNC_PREREAD) { + /* + * Do an invalidate for PREREAD unless a writeback was already + * done above due to PREWRITE also being set. The reason for a + * PREREAD invalidate is to prevent dirty lines currently in the + * cache from being evicted during the DMA. If a writeback was + * done due to PREWRITE also being set there will be no dirty + * lines and the POSTREAD invalidate handles the rest. The + * invalidate is done from the innermost to outermost level. If + * L2 were done first, a dirty cacheline could be automatically + * evicted from L1 before we invalidated it, re-dirtying the L2. + */ + if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { bpage = STAILQ_FIRST(&map->bpages); while (bpage != NULL) { cpu_dcache_inv_range((vm_offset_t)bpage->vaddr, @@ -1329,6 +1345,16 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) bpage = STAILQ_NEXT(bpage, links); } } + + /* + * Re-invalidate the caches on a POSTREAD, even though they were + * already invalidated at PREREAD time. Aggressive prefetching + * due to accesses to other data near the dma buffer could have + * brought buffer data into the caches which is now stale. The + * caches are invalidated from the outermost to innermost; the + * prefetches could be happening right now, and if L1 were + * invalidated first, stale L2 data could be prefetched into L1. + */ if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { vm_offset_t startv; @@ -1345,8 +1371,8 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) len = (len - (len & arm_dcache_align_mask)) + arm_dcache_align; - cpu_dcache_inv_range(startv, len); l2cache_inv_range(startv, startp, len); + cpu_dcache_inv_range(startv, len); if (bpage->datavaddr != 0) bcopy((void *)bpage->vaddr, (void *)bpage->datavaddr, @@ -1360,13 +1386,33 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) dmat->bounce_zone->total_bounced++; } } - if (map->flags & DMAMAP_COHERENT) + + /* + * For COHERENT memory no cache maintenance is necessary, but ensure all + * writes have reached memory for the PREWRITE case. No action is + * needed for a PREREAD without PREWRITE also set, because that would + * imply that the cpu had written to the COHERENT buffer and expected + * the dma device to see that change, and by definition a PREWRITE sync + * is required to make that happen. + */ + if (map->flags & DMAMAP_COHERENT) { + if (op & BUS_DMASYNC_PREWRITE) { + dsb(); + cpu_l2cache_drain_writebuf(); + } return; + } + /* + * Cache maintenance for normal (non-COHERENT non-bounce) buffers. All + * the comments about the sequences for flushing cache levels in the + * bounce buffer code above apply here as well. In particular, the fact + * that the sequence is inner-to-outer for PREREAD invalidation and + * outer-to-inner for POSTREAD invalidation is not a mistake. + */ if (map->sync_count != 0) { if (!pmap_dmap_iscurrent(map->pmap)) panic("_bus_dmamap_sync: wrong user map for sync."); - /* ARM caches are not self-snooping for dma */ sl = &map->slist[0]; end = &map->slist[map->sync_count]; @@ -1375,16 +1421,34 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) switch (op) { case BUS_DMASYNC_PREWRITE: + case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: while (sl != end) { - cpu_dcache_wb_range(sl->vaddr, sl->datacount); - l2cache_wb_range(sl->vaddr, sl->busaddr, - sl->datacount); - sl++; + cpu_dcache_wb_range(sl->vaddr, sl->datacount); + l2cache_wb_range(sl->vaddr, sl->busaddr, + sl->datacount); + sl++; } break; case BUS_DMASYNC_PREREAD: + /* + * An mbuf may start in the middle of a cacheline. There + * will be no cpu writes to the beginning of that line + * (which contains the mbuf header) while dma is in + * progress. Handle that case by doing a writeback of + * just the first cacheline before invalidating the + * overall buffer. Any mbuf in a chain may have this + * misalignment. Buffers which are not mbufs bounce if + * they are not aligned to a cacheline. + */ while (sl != end) { + if (sl->vaddr & arm_dcache_align_mask) { + KASSERT(map->flags & DMAMAP_MBUF, + ("unaligned buffer is not an mbuf")); + cpu_dcache_wb_range(sl->vaddr, 1); + l2cache_wb_range(sl->vaddr, + sl->busaddr, 1); + } cpu_dcache_inv_range(sl->vaddr, sl->datacount); l2cache_inv_range(sl->vaddr, sl->busaddr, sl->datacount); @@ -1392,19 +1456,19 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) } break; - case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: - while (sl != end) { - cpu_dcache_wbinv_range(sl->vaddr, sl->datacount); - l2cache_wbinv_range(sl->vaddr, - sl->busaddr, sl->datacount); - sl++; - } + case BUS_DMASYNC_POSTWRITE: break; case BUS_DMASYNC_POSTREAD: - case BUS_DMASYNC_POSTWRITE: case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: + while (sl != end) { + l2cache_inv_range(sl->vaddr, sl->busaddr, + sl->datacount); + cpu_dcache_inv_range(sl->vaddr, sl->datacount); + sl++; + } break; + default: panic("unsupported combination of sync operations: 0x%08x\n", op); break; @@ -1427,12 +1491,14 @@ SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { + return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { + return (bz->sysctl_tree_top); } -- 2.45.0