1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
4 * Copyright 2001 Wasabi Systems, Inc.
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
41 #include "opt_cputype.h"
43 #include <sys/types.h>
44 #include <sys/systm.h>
45 #include <sys/param.h>
47 #include <machine/cache.h>
48 #include <machine/cache_r4k.h>
49 #include <machine/cpuinfo.h>
51 #define round_line16(x) (((x) + 15) & ~15)
52 #define trunc_line16(x) ((x) & ~15)
54 #define round_line32(x) (((x) + 31) & ~31)
55 #define trunc_line32(x) ((x) & ~31)
59 #define SYNC __asm volatile("sync; sync")
61 #define SYNC __asm volatile("sync")
65 #define SYNCI mips_sync_icache();
71 * Exported variables for consumers like bus_dma code
73 int mips_picache_linesize;
74 int mips_pdcache_linesize;
76 static int picache_size;
77 static int picache_stride;
78 static int picache_loopcount;
79 static int picache_way_mask;
80 static int pdcache_size;
81 static int pdcache_stride;
82 static int pdcache_loopcount;
83 static int pdcache_way_mask;
86 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
88 int flush_multiple_lines_per_way;
90 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
91 if (cpuinfo->icache_virtual) {
93 * With a virtual Icache we don't need to flush
94 * multiples of the page size with index ops; we just
95 * need to flush one pages' worth.
97 flush_multiple_lines_per_way = 0;
100 if (flush_multiple_lines_per_way) {
101 picache_stride = PAGE_SIZE;
102 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
103 cpuinfo->l1.ic_nways;
105 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
106 picache_loopcount = cpuinfo->l1.ic_nways;
109 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
110 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
111 pdcache_loopcount = cpuinfo->l1.dc_nways;
113 pdcache_stride = PAGE_SIZE;
114 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
115 cpuinfo->l1.dc_nways;
118 mips_picache_linesize = cpuinfo->l1.ic_linesize;
119 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
121 picache_size = cpuinfo->l1.ic_size;
122 picache_way_mask = cpuinfo->l1.ic_nways - 1;
123 pdcache_size = cpuinfo->l1.dc_size;
124 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
128 printf("Cache info:\n");
129 if (cpuinfo->icache_virtual)
130 printf(" icache is virtual\n");
131 printf(" picache_stride = %d\n", picache_stride);
132 printf(" picache_loopcount = %d\n", picache_loopcount);
133 printf(" pdcache_stride = %d\n", pdcache_stride);
134 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
139 mipsNN_icache_sync_all_16(void)
143 va = MIPS_PHYS_TO_KSEG0(0);
144 eva = va + picache_size;
147 * Since we're hitting the whole thing, we don't have to
148 * worry about the N different "ways".
151 mips_intern_dcache_wbinv_all();
154 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
162 mipsNN_icache_sync_all_32(void)
166 va = MIPS_PHYS_TO_KSEG0(0);
167 eva = va + picache_size;
170 * Since we're hitting the whole thing, we don't have to
171 * worry about the N different "ways".
174 mips_intern_dcache_wbinv_all();
177 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
185 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
189 eva = round_line16(va + size);
190 va = trunc_line16(va);
192 mips_intern_dcache_wb_range(va, (eva - va));
194 while ((eva - va) >= (32 * 16)) {
195 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
200 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
208 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
212 eva = round_line32(va + size);
213 va = trunc_line32(va);
215 mips_intern_dcache_wb_range(va, (eva - va));
217 while ((eva - va) >= (32 * 32)) {
218 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
223 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
231 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
233 unsigned int eva, tmpva;
234 int i, stride, loopcount;
237 * Since we're doing Index ops, we expect to not be able
238 * to access the address we've been given. So, get the
239 * bits that determine the cache index, and make a KSEG0
240 * address out of them.
242 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
244 eva = round_line16(va + size);
245 va = trunc_line16(va);
248 * GCC generates better code in the loops if we reference local
249 * copies of these global variables.
251 stride = picache_stride;
252 loopcount = picache_loopcount;
254 mips_intern_dcache_wbinv_range_index(va, (eva - va));
256 while ((eva - va) >= (8 * 16)) {
258 for (i = 0; i < loopcount; i++, tmpva += stride)
259 cache_r4k_op_8lines_16(tmpva,
260 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
266 for (i = 0; i < loopcount; i++, tmpva += stride)
267 cache_op_r4k_line(tmpva,
268 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
274 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
276 unsigned int eva, tmpva;
277 int i, stride, loopcount;
280 * Since we're doing Index ops, we expect to not be able
281 * to access the address we've been given. So, get the
282 * bits that determine the cache index, and make a KSEG0
283 * address out of them.
285 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
287 eva = round_line32(va + size);
288 va = trunc_line32(va);
291 * GCC generates better code in the loops if we reference local
292 * copies of these global variables.
294 stride = picache_stride;
295 loopcount = picache_loopcount;
297 mips_intern_dcache_wbinv_range_index(va, (eva - va));
299 while ((eva - va) >= (8 * 32)) {
301 for (i = 0; i < loopcount; i++, tmpva += stride)
302 cache_r4k_op_8lines_32(tmpva,
303 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
309 for (i = 0; i < loopcount; i++, tmpva += stride)
310 cache_op_r4k_line(tmpva,
311 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
317 mipsNN_pdcache_wbinv_all_16(void)
321 va = MIPS_PHYS_TO_KSEG0(0);
322 eva = va + pdcache_size;
325 * Since we're hitting the whole thing, we don't have to
326 * worry about the N different "ways".
330 cache_r4k_op_32lines_16(va,
331 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
339 mipsNN_pdcache_wbinv_all_32(void)
343 va = MIPS_PHYS_TO_KSEG0(0);
344 eva = va + pdcache_size;
347 * Since we're hitting the whole thing, we don't have to
348 * worry about the N different "ways".
352 cache_r4k_op_32lines_32(va,
353 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
361 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
365 eva = round_line16(va + size);
366 va = trunc_line16(va);
368 while ((eva - va) >= (32 * 16)) {
369 cache_r4k_op_32lines_16(va,
370 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
375 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
383 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
387 eva = round_line32(va + size);
388 va = trunc_line32(va);
390 while ((eva - va) >= (32 * 32)) {
391 cache_r4k_op_32lines_32(va,
392 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
397 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
405 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
407 vm_offset_t eva, tmpva;
408 int i, stride, loopcount;
411 * Since we're doing Index ops, we expect to not be able
412 * to access the address we've been given. So, get the
413 * bits that determine the cache index, and make a KSEG0
414 * address out of them.
416 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
418 eva = round_line16(va + size);
419 va = trunc_line16(va);
422 * GCC generates better code in the loops if we reference local
423 * copies of these global variables.
425 stride = pdcache_stride;
426 loopcount = pdcache_loopcount;
428 while ((eva - va) >= (8 * 16)) {
430 for (i = 0; i < loopcount; i++, tmpva += stride)
431 cache_r4k_op_8lines_16(tmpva,
432 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
438 for (i = 0; i < loopcount; i++, tmpva += stride)
439 cache_op_r4k_line(tmpva,
440 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
446 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
448 vm_offset_t eva, tmpva;
449 int i, stride, loopcount;
452 * Since we're doing Index ops, we expect to not be able
453 * to access the address we've been given. So, get the
454 * bits that determine the cache index, and make a KSEG0
455 * address out of them.
457 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
459 eva = round_line32(va + size);
460 va = trunc_line32(va);
463 * GCC generates better code in the loops if we reference local
464 * copies of these global variables.
466 stride = pdcache_stride;
467 loopcount = pdcache_loopcount;
469 while ((eva - va) >= (8 * 32)) {
471 for (i = 0; i < loopcount; i++, tmpva += stride)
472 cache_r4k_op_8lines_32(tmpva,
473 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
479 for (i = 0; i < loopcount; i++, tmpva += stride)
480 cache_op_r4k_line(tmpva,
481 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
487 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
491 eva = round_line16(va + size);
492 va = trunc_line16(va);
494 while ((eva - va) >= (32 * 16)) {
495 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
500 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
508 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
512 eva = round_line32(va + size);
513 va = trunc_line32(va);
515 while ((eva - va) >= (32 * 32)) {
516 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
521 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
529 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
533 eva = round_line16(va + size);
534 va = trunc_line16(va);
536 while ((eva - va) >= (32 * 16)) {
537 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
542 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
550 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
554 eva = round_line32(va + size);
555 va = trunc_line32(va);
557 while ((eva - va) >= (32 * 32)) {
558 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
563 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
574 mipsNN_icache_sync_all_128(void)
580 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
586 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
592 mipsNN_pdcache_wbinv_all_128(void)
598 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
604 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
609 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
614 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)