1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
4 * Copyright 2001 Wasabi Systems, Inc.
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
41 #include <sys/types.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
45 #include <machine/cache.h>
46 #include <machine/cache_r4k.h>
47 #include <machine/cpuinfo.h>
49 #define round_line16(x) (((x) + 15) & ~15)
50 #define trunc_line16(x) ((x) & ~15)
52 #define round_line32(x) (((x) + 31) & ~31)
53 #define trunc_line32(x) ((x) & ~31)
55 #define round_line128(x) (((x) + 127) & ~127)
56 #define trunc_line128(x) ((x) & ~127)
76 #if defined(SB1250_PASS1)
77 #define SYNC __asm volatile("sync; sync")
78 #elif defined(CPU_NLM)
79 #define SYNC xlp_sync()
81 #define SYNC __asm volatile("sync")
84 #if defined(CPU_CNMIPS)
85 #define SYNCI mips_sync_icache();
86 #elif defined(CPU_NLM)
87 #define SYNCI xlp_sync()
93 * Exported variables for consumers like bus_dma code
95 int mips_picache_linesize;
96 int mips_pdcache_linesize;
98 static int picache_size;
99 static int picache_stride;
100 static int picache_loopcount;
101 static int picache_way_mask;
102 static int pdcache_size;
103 static int pdcache_stride;
104 static int pdcache_loopcount;
105 static int pdcache_way_mask;
106 static int sdcache_size;
107 static int sdcache_stride;
108 static int sdcache_loopcount;
109 static int sdcache_way_mask;
112 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
114 int flush_multiple_lines_per_way;
116 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
117 if (cpuinfo->icache_virtual) {
119 * With a virtual Icache we don't need to flush
120 * multiples of the page size with index ops; we just
121 * need to flush one pages' worth.
123 flush_multiple_lines_per_way = 0;
126 if (flush_multiple_lines_per_way) {
127 picache_stride = PAGE_SIZE;
128 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
129 cpuinfo->l1.ic_nways;
131 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
132 picache_loopcount = cpuinfo->l1.ic_nways;
135 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
136 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
137 pdcache_loopcount = cpuinfo->l1.dc_nways;
139 pdcache_stride = PAGE_SIZE;
140 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
141 cpuinfo->l1.dc_nways;
144 mips_picache_linesize = cpuinfo->l1.ic_linesize;
145 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
147 picache_size = cpuinfo->l1.ic_size;
148 picache_way_mask = cpuinfo->l1.ic_nways - 1;
149 pdcache_size = cpuinfo->l1.dc_size;
150 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
152 sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize;
153 sdcache_loopcount = cpuinfo->l2.dc_nways;
154 sdcache_size = cpuinfo->l2.dc_size;
155 sdcache_way_mask = cpuinfo->l2.dc_nways - 1;
159 printf("Cache info:\n");
160 if (cpuinfo->icache_virtual)
161 printf(" icache is virtual\n");
162 printf(" picache_stride = %d\n", picache_stride);
163 printf(" picache_loopcount = %d\n", picache_loopcount);
164 printf(" pdcache_stride = %d\n", pdcache_stride);
165 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
170 mipsNN_icache_sync_all_16(void)
174 va = MIPS_PHYS_TO_KSEG0(0);
175 eva = va + picache_size;
178 * Since we're hitting the whole thing, we don't have to
179 * worry about the N different "ways".
182 mips_intern_dcache_wbinv_all();
185 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
193 mipsNN_icache_sync_all_32(void)
197 va = MIPS_PHYS_TO_KSEG0(0);
198 eva = va + picache_size;
201 * Since we're hitting the whole thing, we don't have to
202 * worry about the N different "ways".
205 mips_intern_dcache_wbinv_all();
208 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
216 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
220 eva = round_line16(va + size);
221 va = trunc_line16(va);
223 mips_intern_dcache_wb_range(va, (eva - va));
225 while ((eva - va) >= (32 * 16)) {
226 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
231 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
239 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
243 eva = round_line32(va + size);
244 va = trunc_line32(va);
246 mips_intern_dcache_wb_range(va, (eva - va));
248 while ((eva - va) >= (32 * 32)) {
249 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
254 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
262 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
264 vm_offset_t eva, tmpva;
265 int i, stride, loopcount;
268 * Since we're doing Index ops, we expect to not be able
269 * to access the address we've been given. So, get the
270 * bits that determine the cache index, and make a KSEG0
271 * address out of them.
273 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
275 eva = round_line16(va + size);
276 va = trunc_line16(va);
279 * GCC generates better code in the loops if we reference local
280 * copies of these global variables.
282 stride = picache_stride;
283 loopcount = picache_loopcount;
285 mips_intern_dcache_wbinv_range_index(va, (eva - va));
287 while ((eva - va) >= (8 * 16)) {
289 for (i = 0; i < loopcount; i++, tmpva += stride)
290 cache_r4k_op_8lines_16(tmpva,
291 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
297 for (i = 0; i < loopcount; i++, tmpva += stride)
298 cache_op_r4k_line(tmpva,
299 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
305 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
307 vm_offset_t eva, tmpva;
308 int i, stride, loopcount;
311 * Since we're doing Index ops, we expect to not be able
312 * to access the address we've been given. So, get the
313 * bits that determine the cache index, and make a KSEG0
314 * address out of them.
316 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
318 eva = round_line32(va + size);
319 va = trunc_line32(va);
322 * GCC generates better code in the loops if we reference local
323 * copies of these global variables.
325 stride = picache_stride;
326 loopcount = picache_loopcount;
328 mips_intern_dcache_wbinv_range_index(va, (eva - va));
330 while ((eva - va) >= (8 * 32)) {
332 for (i = 0; i < loopcount; i++, tmpva += stride)
333 cache_r4k_op_8lines_32(tmpva,
334 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
340 for (i = 0; i < loopcount; i++, tmpva += stride)
341 cache_op_r4k_line(tmpva,
342 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
348 mipsNN_pdcache_wbinv_all_16(void)
352 va = MIPS_PHYS_TO_KSEG0(0);
353 eva = va + pdcache_size;
356 * Since we're hitting the whole thing, we don't have to
357 * worry about the N different "ways".
361 cache_r4k_op_32lines_16(va,
362 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
370 mipsNN_pdcache_wbinv_all_32(void)
374 va = MIPS_PHYS_TO_KSEG0(0);
375 eva = va + pdcache_size;
378 * Since we're hitting the whole thing, we don't have to
379 * worry about the N different "ways".
383 cache_r4k_op_32lines_32(va,
384 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
392 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
396 eva = round_line16(va + size);
397 va = trunc_line16(va);
399 while ((eva - va) >= (32 * 16)) {
400 cache_r4k_op_32lines_16(va,
401 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
406 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
414 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
418 eva = round_line32(va + size);
419 va = trunc_line32(va);
421 while ((eva - va) >= (32 * 32)) {
422 cache_r4k_op_32lines_32(va,
423 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
428 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
436 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
438 vm_offset_t eva, tmpva;
439 int i, stride, loopcount;
442 * Since we're doing Index ops, we expect to not be able
443 * to access the address we've been given. So, get the
444 * bits that determine the cache index, and make a KSEG0
445 * address out of them.
447 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
449 eva = round_line16(va + size);
450 va = trunc_line16(va);
453 * GCC generates better code in the loops if we reference local
454 * copies of these global variables.
456 stride = pdcache_stride;
457 loopcount = pdcache_loopcount;
459 while ((eva - va) >= (8 * 16)) {
461 for (i = 0; i < loopcount; i++, tmpva += stride)
462 cache_r4k_op_8lines_16(tmpva,
463 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
469 for (i = 0; i < loopcount; i++, tmpva += stride)
470 cache_op_r4k_line(tmpva,
471 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
477 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
479 vm_offset_t eva, tmpva;
480 int i, stride, loopcount;
483 * Since we're doing Index ops, we expect to not be able
484 * to access the address we've been given. So, get the
485 * bits that determine the cache index, and make a KSEG0
486 * address out of them.
488 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
490 eva = round_line32(va + size);
491 va = trunc_line32(va);
494 * GCC generates better code in the loops if we reference local
495 * copies of these global variables.
497 stride = pdcache_stride;
498 loopcount = pdcache_loopcount;
500 while ((eva - va) >= (8 * 32)) {
502 for (i = 0; i < loopcount; i++, tmpva += stride)
503 cache_r4k_op_8lines_32(tmpva,
504 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
510 for (i = 0; i < loopcount; i++, tmpva += stride)
511 cache_op_r4k_line(tmpva,
512 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
518 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
522 eva = round_line16(va + size);
523 va = trunc_line16(va);
525 while ((eva - va) >= (32 * 16)) {
526 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
531 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
539 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
543 eva = round_line32(va + size);
544 va = trunc_line32(va);
546 while ((eva - va) >= (32 * 32)) {
547 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
552 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
560 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
564 eva = round_line16(va + size);
565 va = trunc_line16(va);
567 while ((eva - va) >= (32 * 16)) {
568 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
573 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
581 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
585 eva = round_line32(va + size);
586 va = trunc_line32(va);
588 while ((eva - va) >= (32 * 32)) {
589 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
594 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
605 mipsNN_icache_sync_all_128(void)
611 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
617 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
623 mipsNN_pdcache_wbinv_all_128(void)
629 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
635 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
640 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
645 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
653 mipsNN_sdcache_wbinv_all_32(void)
655 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
656 vm_offset_t eva = va + sdcache_size;
659 cache_r4k_op_32lines_32(va,
660 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
666 mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
668 vm_offset_t eva = round_line32(va + size);
670 va = trunc_line32(va);
672 while ((eva - va) >= (32 * 32)) {
673 cache_r4k_op_32lines_32(va,
674 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
679 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
685 mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
690 * Since we're doing Index ops, we expect to not be able
691 * to access the address we've been given. So, get the
692 * bits that determine the cache index, and make a KSEG0
693 * address out of them.
695 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
697 eva = round_line32(va + size);
698 va = trunc_line32(va);
700 while ((eva - va) >= (32 * 32)) {
701 cache_r4k_op_32lines_32(va,
702 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
707 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
713 mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
715 vm_offset_t eva = round_line32(va + size);
717 va = trunc_line32(va);
719 while ((eva - va) >= (32 * 32)) {
720 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
725 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
731 mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
733 vm_offset_t eva = round_line32(va + size);
735 va = trunc_line32(va);
737 while ((eva - va) >= (32 * 32)) {
738 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
743 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
749 mipsNN_sdcache_wbinv_all_128(void)
751 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
752 vm_offset_t eva = va + sdcache_size;
755 cache_r4k_op_32lines_128(va,
756 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
762 mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
764 vm_offset_t eva = round_line128(va + size);
766 va = trunc_line128(va);
768 while ((eva - va) >= (32 * 128)) {
769 cache_r4k_op_32lines_128(va,
770 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
775 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
781 mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
786 * Since we're doing Index ops, we expect to not be able
787 * to access the address we've been given. So, get the
788 * bits that determine the cache index, and make a KSEG0
789 * address out of them.
791 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
793 eva = round_line128(va + size);
794 va = trunc_line128(va);
796 while ((eva - va) >= (32 * 128)) {
797 cache_r4k_op_32lines_128(va,
798 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
803 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
809 mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size)
811 vm_offset_t eva = round_line128(va + size);
813 va = trunc_line128(va);
815 while ((eva - va) >= (32 * 128)) {
816 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
821 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
827 mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size)
829 vm_offset_t eva = round_line128(va + size);
831 va = trunc_line128(va);
833 while ((eva - va) >= (32 * 128)) {
834 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
839 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);