2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include "opt_kstack_pages.h"
32 #include <sys/errno.h>
34 #include <machine/asi.h>
35 #include <machine/asmacros.h>
36 #include <machine/fsr.h>
37 #include <machine/intr_machdep.h>
38 #include <machine/pcb.h>
39 #include <machine/pstate.h>
40 #include <machine/wstate.h>
44 .register %g2, #ignore
45 .register %g3, #ignore
46 .register %g6, #ignore
49 * Common code for copy routines.
51 * We use large macros to generate functions for each of the copy routines.
52 * This allows the load and store instructions to be generated for the right
53 * operation, asi or not. It is possible to write an asi independent function
54 * but this would require 2 expensive wrs in the main loop to switch %asi.
55 * It would also screw up profiling (if we ever get it), but may save some I$.
56 * We assume that either one of dasi and sasi is empty, or that they are both
57 * the same (empty or non-empty). It is up to the caller to set %asi.
61 * ASI independent implementation of copystr(9).
62 * Used to implement copyinstr() and copystr().
64 * Return value is in %g1.
66 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
72 LD(ub, sa) [src] sasi, %g1 ; \
73 ST(b, da) %g1, [dst] dasi ; \
78 2: mov ENAMETOOLONG, %g1 ; \
79 3: sub src, %g2, %g2 ; \
85 * ASI independent implementation of memset(3).
86 * Used to implement bzero(), memset() and aszero().
88 * If the pattern is non-zero, duplicate it to fill 64 bits.
89 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
90 * It has yet to be determined how much unrolling is beneficial.
91 * Could also read and compare before writing to minimize snoop traffic.
93 * XXX bzero() should be implemented as
94 * #define bzero(dst, len) (void)memset((dst), 0, (len))
97 #define _MEMSET(dst, pat, len, da, dasi) \
99 and pat, 0xff, pat ; \
103 sllx pat, 16, %g1 ; \
105 sllx pat, 32, %g1 ; \
113 ST(b, da) pat, [dst] dasi ; \
120 ST(x, da) pat, [dst] dasi ; \
121 ST(x, da) pat, [dst + 8] dasi ; \
122 ST(x, da) pat, [dst + 16] dasi ; \
123 ST(x, da) pat, [dst + 24] dasi ; \
130 ST(x, da) pat, [dst] dasi ; \
137 ST(b, da) pat, [dst] dasi ; \
143 * ASI independent implementation of memcpy(3).
144 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
145 * ascopyfrom() and ascopyto().
147 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
148 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
149 * case could be optimized, but it is expected that this is the uncommon
150 * case and of questionable value. The code to do so is also rather large
151 * and ugly. It has yet to be determined how much unrolling is beneficial.
153 * XXX bcopy() must also check for overlap. This is stupid.
154 * XXX bcopy() should be implemented as
155 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
158 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
164 LD(ub, sa) [src] sasi, %g1 ; \
165 ST(b, da) %g1, [dst] dasi ; \
178 LD(x, sa) [src] sasi, %g1 ; \
179 LD(x, sa) [src + 8] sasi, %g2 ; \
180 LD(x, sa) [src + 16] sasi, %g3 ; \
181 LD(x, sa) [src + 24] sasi, %g4 ; \
182 ST(x, da) %g1, [dst] dasi ; \
183 ST(x, da) %g2, [dst + 8] dasi ; \
184 ST(x, da) %g3, [dst + 16] dasi ; \
185 ST(x, da) %g4, [dst + 24] dasi ; \
193 LD(x, sa) [src] sasi, %g1 ; \
194 ST(x, da) %g1, [dst] dasi ; \
202 LD(ub, sa) [src] sasi, %g1 ; \
203 ST(b, da) %g1, [dst] dasi ; \
210 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
214 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
220 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
224 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
230 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
234 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
240 * void aszero(u_long asi, vm_offset_t pa, size_t len)
244 _MEMSET(%o1, %g0, %o2, a, %asi)
250 * int bcmp(const void *b1, const void *b2, size_t len)
255 1: ldub [%o0 + %o3], %o4
256 ldub [%o1 + %o3], %o5
268 * void *memmove(void *dst, const void *src, size_t len)
269 * void bcopy(const void *src, void *dst, size_t len)
273 * Swap src/dst for memmove/bcopy differences
280 * Check for overlap, and copy backwards if so.
304 * Do the fast version.
306 3: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
312 * void bzero(void *b, size_t len)
315 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
321 * int copystr(const void *src, void *dst, size_t len, size_t *done)
324 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
330 * void *memcpy(void *dst, const void *src, size_t len)
334 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
340 * void *memset(void *b, int c, size_t len)
344 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
349 .globl copy_nofault_begin
354 * int copyin(const void *uaddr, void *kaddr, size_t len)
357 wr %g0, ASI_AIUP, %asi
358 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
364 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
367 wr %g0, ASI_AIUP, %asi
368 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
374 * int copyout(const void *kaddr, void *uaddr, size_t len)
377 wr %g0, ASI_AIUP, %asi
378 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
383 .globl copy_nofault_end
392 .globl fs_nofault_begin
397 * Chatty aliases for fetch, store functions.
399 .globl fubyte, fusword, fuword, subyte, susword, suword
401 .set fusword, fuword16
402 .set fuword, fuword64
404 .set susword, suword16
405 .set suword, suword64
407 .globl casuword32, casuword, fuptr, suptr
408 .set casuword, casuword64
413 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
416 casa [%o0] ASI_AIUP, %o1, %o2
422 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
425 casxa [%o0] ASI_AIUP, %o1, %o2
431 * int fuword8(const void *base)
435 lduba [%o0] ASI_AIUP, %o0
439 * int fuword16(const void *base)
443 lduha [%o0] ASI_AIUP, %o0
447 * int32_t fuword32(const void *base)
451 lduwa [%o0] ASI_AIUP, %o0
455 * int64_t fuword64(const void *base)
459 ldxa [%o0] ASI_AIUP, %o0
463 * int suword8(const void *base, int word)
466 stba %o1, [%o0] ASI_AIUP
472 * int suword16(const void *base, int word)
475 stha %o1, [%o0] ASI_AIUP
481 * int suword32(const void *base, int32_t word)
484 stwa %o1, [%o0] ASI_AIUP
490 * int suword64(const void *base, int64_t word)
493 stxa %o1, [%o0] ASI_AIUP
498 .globl fs_nofault_end
507 .globl fas_nofault_begin
511 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
516 lduba [%o1] %asi, %o3
524 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
529 lduha [%o1] %asi, %o3
537 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
542 lduwa [%o1] %asi, %o3
549 .globl fas_nofault_end
559 .globl fpu_fault_begin
564 * void spitfire_block_copy(void *src, void *dst, size_t len)
566 ENTRY(spitfire_block_copy)
568 wrpr %g0, PSTATE_NORMAL, %pstate
570 wr %g0, ASI_BLK_S, %asi
571 wr %g0, FPRS_FEF, %fprs
573 sub PCB_REG, TF_SIZEOF, %o4
574 ldx [%o4 + TF_FPRS], %o5
575 andcc %o5, FPRS_FEF, %g0
578 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
579 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
580 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
581 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
584 andn %o5, FPRS_FEF, %o5
585 stx %o5, [%o4 + TF_FPRS]
586 ldx [PCB_REG + PCB_FLAGS], %o4
588 stx %o4, [PCB_REG + PCB_FLAGS]
590 1: wrpr %o3, 0, %pstate
593 add %o0, VIS_BLOCKSIZE, %o0
594 sub %o2, VIS_BLOCKSIZE, %o2
596 2: ldda [%o0] %asi, %f16
605 stda %f32, [%o1] %asi
606 add %o0, VIS_BLOCKSIZE, %o0
607 subcc %o2, VIS_BLOCKSIZE, %o2
609 add %o1, VIS_BLOCKSIZE, %o1
619 stda %f32, [%o1] %asi
620 add %o0, VIS_BLOCKSIZE, %o0
621 sub %o2, VIS_BLOCKSIZE, %o2
623 add %o1, VIS_BLOCKSIZE, %o1
627 stda %f16, [%o1] %asi
632 END(spitfire_block_copy)
635 * void zeus_block_copy(void *src, void *dst, size_t len)
637 ENTRY(zeus_block_copy)
638 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
641 wrpr %g0, PSTATE_NORMAL, %pstate
643 wr %g0, ASI_BLK_S, %asi
644 wr %g0, FPRS_FEF, %fprs
646 sub PCB_REG, TF_SIZEOF, %o4
647 ldx [%o4 + TF_FPRS], %o5
648 andcc %o5, FPRS_FEF, %g0
651 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
652 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
653 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
654 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
657 andn %o5, FPRS_FEF, %o5
658 stx %o5, [%o4 + TF_FPRS]
659 ldx [PCB_REG + PCB_FLAGS], %o4
661 stx %o4, [PCB_REG + PCB_FLAGS]
663 1: wrpr %o3, 0, %pstate
665 ldd [%o0 + (0 * 8)], %f0
666 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
667 ldd [%o0 + (1 * 8)], %f2
668 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
670 ldd [%o0 + (2 * 8)], %f4
671 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
673 ldd [%o0 + (3 * 8)], %f6
674 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
676 ldd [%o0 + (4 * 8)], %f8
677 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
679 ldd [%o0 + (5 * 8)], %f10
680 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
682 ldd [%o0 + (6 * 8)], %f12
683 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
685 ldd [%o0 + (7 * 8)], %f14
686 ldd [%o0 + (8 * 8)], %f0
687 sub %o2, VIS_BLOCKSIZE, %o2
688 add %o0, VIS_BLOCKSIZE, %o0
689 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
691 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
694 2: ldd [%o0 + (1 * 8)], %f2
696 ldd [%o0 + (2 * 8)], %f4
698 stda %f32, [%o1] %asi
699 ldd [%o0 + (3 * 8)], %f6
701 ldd [%o0 + (4 * 8)], %f8
703 ldd [%o0 + (5 * 8)], %f10
705 ldd [%o0 + (6 * 8)], %f12
707 ldd [%o0 + (7 * 8)], %f14
709 ldd [%o0 + (8 * 8)], %f0
711 sub %o2, VIS_BLOCKSIZE, %o2
712 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
713 add %o1, VIS_BLOCKSIZE, %o1
714 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
715 add %o0, VIS_BLOCKSIZE, %o0
716 cmp %o2, VIS_BLOCKSIZE + 8
718 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
719 ldd [%o0 + (1 * 8)], %f2
721 ldd [%o0 + (2 * 8)], %f4
723 stda %f32, [%o1] %asi
724 ldd [%o0 + (3 * 8)], %f6
726 ldd [%o0 + (4 * 8)], %f8
728 ldd [%o0 + (5 * 8)], %f10
730 ldd [%o0 + (6 * 8)], %f12
732 ldd [%o0 + (7 * 8)], %f14
734 add %o1, VIS_BLOCKSIZE, %o1
738 stda %f32, [%o1] %asi
746 * void spitfire_block_zero(void *dst, size_t len)
747 * void zeus_block_zero(void *dst, size_t len)
749 ALTENTRY(zeus_block_zero)
750 ENTRY(spitfire_block_zero)
752 wrpr %g0, PSTATE_NORMAL, %pstate
754 wr %g0, ASI_BLK_S, %asi
755 wr %g0, FPRS_FEF, %fprs
757 sub PCB_REG, TF_SIZEOF, %o4
758 ldx [%o4 + TF_FPRS], %o5
759 andcc %o5, FPRS_FEF, %g0
762 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
763 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
764 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
765 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
768 andn %o5, FPRS_FEF, %o5
769 stx %o5, [%o4 + TF_FPRS]
770 ldx [PCB_REG + PCB_FLAGS], %o4
772 stx %o4, [PCB_REG + PCB_FLAGS]
774 1: wrpr %o3, 0, %pstate
785 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
786 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
787 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
788 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
789 sub %o1, (4 * VIS_BLOCKSIZE), %o1
791 add %o0, (4 * VIS_BLOCKSIZE), %o0
796 END(spitfire_block_zero)
802 .globl fpu_fault_size
803 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
809 ldx [%g1 + _JB_FP], %g2
814 ldx [%g1 + _JB_SP], %o2
818 ldx [%g1 + _JB_PC], %o7
821 2: PANIC("longjmp botch", %l1)
825 stx %sp, [%o0 + _JB_SP]
826 stx %o7, [%o0 + _JB_PC]
827 stx %fp, [%o0 + _JB_FP]
833 * void ofw_entry(cell_t args[])
836 save %sp, -CCFSZ, %sp
837 SET(ofw_vec, %l7, %l6)
840 andn %l7, PSTATE_AM | PSTATE_IE, %l5
842 SET(tba_taken_over, %l5, %l4)
845 andn %l5, WSTATE_PROM_MASK, %l3
846 wrpr %l3, WSTATE_PROM_KMIX, %wstate
851 wrpr %g0, %l5, %wstate
852 1: wrpr %l7, 0, %pstate
854 restore %o0, %g0, %o0
858 * void ofw_exit(cell_t args[])
861 save %sp, -CCFSZ, %sp
863 SET(ofw_tba, %l7, %l5)
866 andn %l7, PSTATE_AM | PSTATE_IE, %l7
869 andn %l7, WSTATE_PROM_MASK, %l7
870 wrpr %l7, WSTATE_PROM_KMIX, %wstate
871 wrpr %l5, 0, %tba ! restore the OFW trap table
872 SET(ofw_vec, %l7, %l6)
874 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
875 sub %l0, SPOFF, %fp ! setup a stack in a locked page
876 sub %l0, SPOFF + CCFSZ, %sp
877 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
878 sethi %hi(KERNBASE), %l5
879 stxa %g0, [%l3] ASI_DMMU
881 wrpr %g0, 0, %tl ! force trap level 0
905 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
909 #define GMON_PROF_OFF 3
910 #define GMON_PROF_HIRES 4
913 .set _mcount, __cyg_profile_func_enter
915 ENTRY(__cyg_profile_func_enter)
916 SET(_gmonparam, %o3, %o2)
917 lduw [%o2 + GM_STATE], %o3
918 cmp %o3, GMON_PROF_OFF
921 SET(mcount, %o3, %o2)
926 END(__cyg_profile_func_enter)
930 ENTRY(__cyg_profile_func_exit)
931 SET(_gmonparam, %o3, %o2)
932 lduw [%o2 + GM_STATE], %o3
933 cmp %o3, GMON_PROF_HIRES
936 SET(mexitcount, %o3, %o2)
941 END(__cyg_profile_func_exit)