1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
4 * Copyright 2001 Wasabi Systems, Inc.
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
41 #include <sys/types.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
45 #include <machine/cache.h>
46 #include <machine/cache_r4k.h>
47 #include <machine/cpuinfo.h>
49 #define round_line16(x) (((x) + 15) & ~15)
50 #define trunc_line16(x) ((x) & ~15)
52 #define round_line32(x) (((x) + 31) & ~31)
53 #define trunc_line32(x) ((x) & ~31)
73 #if defined(SB1250_PASS1)
74 #define SYNC __asm volatile("sync; sync")
75 #elif defined(CPU_NLM)
76 #define SYNC xlp_sync()
78 #define SYNC __asm volatile("sync")
81 #if defined(CPU_CNMIPS)
82 #define SYNCI mips_sync_icache();
83 #elif defined(CPU_NLM)
84 #define SYNCI xlp_sync()
90 * Exported variables for consumers like bus_dma code
92 int mips_picache_linesize;
93 int mips_pdcache_linesize;
95 static int picache_size;
96 static int picache_stride;
97 static int picache_loopcount;
98 static int picache_way_mask;
99 static int pdcache_size;
100 static int pdcache_stride;
101 static int pdcache_loopcount;
102 static int pdcache_way_mask;
105 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
107 int flush_multiple_lines_per_way;
109 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
110 if (cpuinfo->icache_virtual) {
112 * With a virtual Icache we don't need to flush
113 * multiples of the page size with index ops; we just
114 * need to flush one pages' worth.
116 flush_multiple_lines_per_way = 0;
119 if (flush_multiple_lines_per_way) {
120 picache_stride = PAGE_SIZE;
121 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
122 cpuinfo->l1.ic_nways;
124 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
125 picache_loopcount = cpuinfo->l1.ic_nways;
128 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
129 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
130 pdcache_loopcount = cpuinfo->l1.dc_nways;
132 pdcache_stride = PAGE_SIZE;
133 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
134 cpuinfo->l1.dc_nways;
137 mips_picache_linesize = cpuinfo->l1.ic_linesize;
138 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
140 picache_size = cpuinfo->l1.ic_size;
141 picache_way_mask = cpuinfo->l1.ic_nways - 1;
142 pdcache_size = cpuinfo->l1.dc_size;
143 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
147 printf("Cache info:\n");
148 if (cpuinfo->icache_virtual)
149 printf(" icache is virtual\n");
150 printf(" picache_stride = %d\n", picache_stride);
151 printf(" picache_loopcount = %d\n", picache_loopcount);
152 printf(" pdcache_stride = %d\n", pdcache_stride);
153 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
158 mipsNN_icache_sync_all_16(void)
162 va = MIPS_PHYS_TO_KSEG0(0);
163 eva = va + picache_size;
166 * Since we're hitting the whole thing, we don't have to
167 * worry about the N different "ways".
170 mips_intern_dcache_wbinv_all();
173 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
181 mipsNN_icache_sync_all_32(void)
185 va = MIPS_PHYS_TO_KSEG0(0);
186 eva = va + picache_size;
189 * Since we're hitting the whole thing, we don't have to
190 * worry about the N different "ways".
193 mips_intern_dcache_wbinv_all();
196 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
204 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
208 eva = round_line16(va + size);
209 va = trunc_line16(va);
211 mips_intern_dcache_wb_range(va, (eva - va));
213 while ((eva - va) >= (32 * 16)) {
214 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
219 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
227 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
231 eva = round_line32(va + size);
232 va = trunc_line32(va);
234 mips_intern_dcache_wb_range(va, (eva - va));
236 while ((eva - va) >= (32 * 32)) {
237 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
242 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
250 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
252 vm_offset_t eva, tmpva;
253 int i, stride, loopcount;
256 * Since we're doing Index ops, we expect to not be able
257 * to access the address we've been given. So, get the
258 * bits that determine the cache index, and make a KSEG0
259 * address out of them.
261 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
263 eva = round_line16(va + size);
264 va = trunc_line16(va);
267 * GCC generates better code in the loops if we reference local
268 * copies of these global variables.
270 stride = picache_stride;
271 loopcount = picache_loopcount;
273 mips_intern_dcache_wbinv_range_index(va, (eva - va));
275 while ((eva - va) >= (8 * 16)) {
277 for (i = 0; i < loopcount; i++, tmpva += stride)
278 cache_r4k_op_8lines_16(tmpva,
279 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
285 for (i = 0; i < loopcount; i++, tmpva += stride)
286 cache_op_r4k_line(tmpva,
287 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
293 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
295 vm_offset_t eva, tmpva;
296 int i, stride, loopcount;
299 * Since we're doing Index ops, we expect to not be able
300 * to access the address we've been given. So, get the
301 * bits that determine the cache index, and make a KSEG0
302 * address out of them.
304 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
306 eva = round_line32(va + size);
307 va = trunc_line32(va);
310 * GCC generates better code in the loops if we reference local
311 * copies of these global variables.
313 stride = picache_stride;
314 loopcount = picache_loopcount;
316 mips_intern_dcache_wbinv_range_index(va, (eva - va));
318 while ((eva - va) >= (8 * 32)) {
320 for (i = 0; i < loopcount; i++, tmpva += stride)
321 cache_r4k_op_8lines_32(tmpva,
322 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
328 for (i = 0; i < loopcount; i++, tmpva += stride)
329 cache_op_r4k_line(tmpva,
330 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
336 mipsNN_pdcache_wbinv_all_16(void)
340 va = MIPS_PHYS_TO_KSEG0(0);
341 eva = va + pdcache_size;
344 * Since we're hitting the whole thing, we don't have to
345 * worry about the N different "ways".
349 cache_r4k_op_32lines_16(va,
350 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
358 mipsNN_pdcache_wbinv_all_32(void)
362 va = MIPS_PHYS_TO_KSEG0(0);
363 eva = va + pdcache_size;
366 * Since we're hitting the whole thing, we don't have to
367 * worry about the N different "ways".
371 cache_r4k_op_32lines_32(va,
372 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
380 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
384 eva = round_line16(va + size);
385 va = trunc_line16(va);
387 while ((eva - va) >= (32 * 16)) {
388 cache_r4k_op_32lines_16(va,
389 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
394 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
402 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
406 eva = round_line32(va + size);
407 va = trunc_line32(va);
409 while ((eva - va) >= (32 * 32)) {
410 cache_r4k_op_32lines_32(va,
411 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
416 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
424 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
426 vm_offset_t eva, tmpva;
427 int i, stride, loopcount;
430 * Since we're doing Index ops, we expect to not be able
431 * to access the address we've been given. So, get the
432 * bits that determine the cache index, and make a KSEG0
433 * address out of them.
435 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
437 eva = round_line16(va + size);
438 va = trunc_line16(va);
441 * GCC generates better code in the loops if we reference local
442 * copies of these global variables.
444 stride = pdcache_stride;
445 loopcount = pdcache_loopcount;
447 while ((eva - va) >= (8 * 16)) {
449 for (i = 0; i < loopcount; i++, tmpva += stride)
450 cache_r4k_op_8lines_16(tmpva,
451 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
457 for (i = 0; i < loopcount; i++, tmpva += stride)
458 cache_op_r4k_line(tmpva,
459 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
465 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
467 vm_offset_t eva, tmpva;
468 int i, stride, loopcount;
471 * Since we're doing Index ops, we expect to not be able
472 * to access the address we've been given. So, get the
473 * bits that determine the cache index, and make a KSEG0
474 * address out of them.
476 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
478 eva = round_line32(va + size);
479 va = trunc_line32(va);
482 * GCC generates better code in the loops if we reference local
483 * copies of these global variables.
485 stride = pdcache_stride;
486 loopcount = pdcache_loopcount;
488 while ((eva - va) >= (8 * 32)) {
490 for (i = 0; i < loopcount; i++, tmpva += stride)
491 cache_r4k_op_8lines_32(tmpva,
492 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
498 for (i = 0; i < loopcount; i++, tmpva += stride)
499 cache_op_r4k_line(tmpva,
500 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
506 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
510 eva = round_line16(va + size);
511 va = trunc_line16(va);
513 while ((eva - va) >= (32 * 16)) {
514 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
519 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
527 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
531 eva = round_line32(va + size);
532 va = trunc_line32(va);
534 while ((eva - va) >= (32 * 32)) {
535 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
540 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
548 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
552 eva = round_line16(va + size);
553 va = trunc_line16(va);
555 while ((eva - va) >= (32 * 16)) {
556 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
561 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
569 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
573 eva = round_line32(va + size);
574 va = trunc_line32(va);
576 while ((eva - va) >= (32 * 32)) {
577 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
582 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
593 mipsNN_icache_sync_all_128(void)
599 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
605 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
611 mipsNN_pdcache_wbinv_all_128(void)
617 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
623 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
628 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
633 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)