2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include "opt_kstack_pages.h"
32 #include <sys/errno.h>
34 #include <machine/asi.h>
35 #include <machine/asmacros.h>
36 #include <machine/fsr.h>
37 #include <machine/intr_machdep.h>
38 #include <machine/pcb.h>
39 #include <machine/pstate.h>
40 #include <machine/wstate.h>
44 .register %g2, #ignore
45 .register %g3, #ignore
46 .register %g6, #ignore
49 * Common code for copy routines.
51 * We use large macros to generate functions for each of the copy routines.
52 * This allows the load and store instructions to be generated for the right
53 * operation, asi or not. It is possible to write an asi independent function
54 * but this would require 2 expensive wrs in the main loop to switch %asi.
55 * It would also screw up profiling (if we ever get it), but may save some I$.
56 * We assume that either one of dasi and sasi is empty, or that they are both
57 * the same (empty or non-empty). It is up to the caller to set %asi.
61 * ASI independent implementation of copystr(9).
62 * Used to implement copyinstr() and copystr().
64 * Return value is in %g1.
66 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
72 LD(ub, sa) [src] sasi, %g1 ; \
73 ST(b, da) %g1, [dst] dasi ; \
78 2: mov ENAMETOOLONG, %g1 ; \
79 3: sub src, %g2, %g2 ; \
85 * ASI independent implementation of memset(3).
86 * Used to implement bzero(), memset() and aszero().
88 * If the pattern is non-zero, duplicate it to fill 64 bits.
89 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
90 * It has yet to be determined how much unrolling is beneficial.
91 * Could also read and compare before writing to minimize snoop traffic.
93 * XXX bzero() should be implemented as
94 * #define bzero(dst, len) (void)memset((dst), 0, (len))
97 #define _MEMSET(dst, pat, len, da, dasi) \
99 and pat, 0xff, pat ; \
103 sllx pat, 16, %g1 ; \
105 sllx pat, 32, %g1 ; \
113 ST(b, da) pat, [dst] dasi ; \
120 ST(x, da) pat, [dst] dasi ; \
121 ST(x, da) pat, [dst + 8] dasi ; \
122 ST(x, da) pat, [dst + 16] dasi ; \
123 ST(x, da) pat, [dst + 24] dasi ; \
130 ST(x, da) pat, [dst] dasi ; \
137 ST(b, da) pat, [dst] dasi ; \
143 * ASI independent implementation of memcpy(3).
144 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
145 * ascopyfrom() and ascopyto().
147 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
148 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
149 * case could be optimized, but it is expected that this is the uncommon
150 * case and of questionable value. The code to do so is also rather large
151 * and ugly. It has yet to be determined how much unrolling is beneficial.
153 * XXX bcopy() must also check for overlap. This is stupid.
154 * XXX bcopy() should be implemented as
155 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
158 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
164 LD(ub, sa) [src] sasi, %g1 ; \
165 ST(b, da) %g1, [dst] dasi ; \
178 LD(x, sa) [src] sasi, %g1 ; \
179 LD(x, sa) [src + 8] sasi, %g2 ; \
180 LD(x, sa) [src + 16] sasi, %g3 ; \
181 LD(x, sa) [src + 24] sasi, %g4 ; \
182 ST(x, da) %g1, [dst] dasi ; \
183 ST(x, da) %g2, [dst + 8] dasi ; \
184 ST(x, da) %g3, [dst + 16] dasi ; \
185 ST(x, da) %g4, [dst + 24] dasi ; \
193 LD(x, sa) [src] sasi, %g1 ; \
194 ST(x, da) %g1, [dst] dasi ; \
202 LD(ub, sa) [src] sasi, %g1 ; \
203 ST(b, da) %g1, [dst] dasi ; \
210 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
214 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
220 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
224 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
230 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
234 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
240 * void aszero(u_long asi, vm_offset_t pa, size_t len)
244 _MEMSET(%o1, %g0, %o2, a, %asi)
250 * int bcmp(const void *b1, const void *b2, size_t len)
255 1: ldub [%o0 + %o3], %o4
256 ldub [%o1 + %o3], %o5
268 * void bcopy(const void *src, void *dst, size_t len)
272 * Check for overlap, and copy backwards if so.
296 * Do the fast version.
298 3: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
304 * void bzero(void *b, size_t len)
307 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
313 * int copystr(const void *src, void *dst, size_t len, size_t *done)
316 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
322 * void *memcpy(void *dst, const void *src, size_t len)
326 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
332 * void *memset(void *b, int c, size_t len)
336 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
341 .globl copy_nofault_begin
346 * int copyin(const void *uaddr, void *kaddr, size_t len)
349 wr %g0, ASI_AIUP, %asi
350 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
356 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
359 wr %g0, ASI_AIUP, %asi
360 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
366 * int copyout(const void *kaddr, void *uaddr, size_t len)
369 wr %g0, ASI_AIUP, %asi
370 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
375 .globl copy_nofault_end
384 .globl fs_nofault_begin
389 * Chatty aliases for fetch, store functions.
391 .globl fubyte, fusword, fuword, subyte, susword, suword
393 .set fusword, fuword16
394 .set fuword, fuword64
396 .set susword, suword16
397 .set suword, suword64
399 .globl casuword32, casuword, fuptr, suptr
400 .set casuword, casuword64
405 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
408 casa [%o0] ASI_AIUP, %o1, %o2
414 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
417 casxa [%o0] ASI_AIUP, %o1, %o2
423 * int fuword8(const void *base)
427 lduba [%o0] ASI_AIUP, %o0
431 * int fuword16(const void *base)
435 lduha [%o0] ASI_AIUP, %o0
439 * int32_t fuword32(const void *base)
443 lduwa [%o0] ASI_AIUP, %o0
447 * int64_t fuword64(const void *base)
451 ldxa [%o0] ASI_AIUP, %o0
455 * int suword8(const void *base, int word)
458 stba %o1, [%o0] ASI_AIUP
464 * int suword16(const void *base, int word)
467 stha %o1, [%o0] ASI_AIUP
473 * int suword32(const void *base, int32_t word)
476 stwa %o1, [%o0] ASI_AIUP
482 * int suword64(const void *base, int64_t word)
485 stxa %o1, [%o0] ASI_AIUP
490 .globl fs_nofault_intr_begin
491 fs_nofault_intr_begin:
495 * int fuswintr(const void *base)
499 lduha [%o0] ASI_AIUP, %o0
503 * int suswintr(const void *base, int word)
506 stha %o1, [%o0] ASI_AIUP
511 .globl fs_nofault_intr_end
515 .globl fs_nofault_end
524 .globl fas_nofault_begin
528 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
533 lduba [%o1] %asi, %o3
541 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
546 lduha [%o1] %asi, %o3
554 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
559 lduwa [%o1] %asi, %o3
566 .globl fas_nofault_end
576 .globl fpu_fault_begin
581 * void spitfire_block_copy(void *src, void *dst, size_t len)
583 ENTRY(spitfire_block_copy)
585 wrpr %g0, PSTATE_NORMAL, %pstate
587 wr %g0, ASI_BLK_S, %asi
588 wr %g0, FPRS_FEF, %fprs
590 sub PCB_REG, TF_SIZEOF, %o4
591 ldx [%o4 + TF_FPRS], %o5
592 andcc %o5, FPRS_FEF, %g0
595 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
596 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
597 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
598 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
601 andn %o5, FPRS_FEF, %o5
602 stx %o5, [%o4 + TF_FPRS]
603 ldx [PCB_REG + PCB_FLAGS], %o4
605 stx %o4, [PCB_REG + PCB_FLAGS]
607 1: wrpr %o3, 0, %pstate
610 add %o0, VIS_BLOCKSIZE, %o0
611 sub %o2, VIS_BLOCKSIZE, %o2
613 2: ldda [%o0] %asi, %f16
622 stda %f32, [%o1] %asi
623 add %o0, VIS_BLOCKSIZE, %o0
624 subcc %o2, VIS_BLOCKSIZE, %o2
626 add %o1, VIS_BLOCKSIZE, %o1
636 stda %f32, [%o1] %asi
637 add %o0, VIS_BLOCKSIZE, %o0
638 sub %o2, VIS_BLOCKSIZE, %o2
640 add %o1, VIS_BLOCKSIZE, %o1
644 stda %f16, [%o1] %asi
649 END(spitfire_block_copy)
652 * void zeus_block_copy(void *src, void *dst, size_t len)
654 ENTRY(zeus_block_copy)
655 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
658 wrpr %g0, PSTATE_NORMAL, %pstate
660 wr %g0, ASI_BLK_S, %asi
661 wr %g0, FPRS_FEF, %fprs
663 sub PCB_REG, TF_SIZEOF, %o4
664 ldx [%o4 + TF_FPRS], %o5
665 andcc %o5, FPRS_FEF, %g0
668 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
669 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
670 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
671 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
674 andn %o5, FPRS_FEF, %o5
675 stx %o5, [%o4 + TF_FPRS]
676 ldx [PCB_REG + PCB_FLAGS], %o4
678 stx %o4, [PCB_REG + PCB_FLAGS]
680 1: wrpr %o3, 0, %pstate
682 ldd [%o0 + (0 * 8)], %f0
683 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
684 ldd [%o0 + (1 * 8)], %f2
685 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
687 ldd [%o0 + (2 * 8)], %f4
688 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
690 ldd [%o0 + (3 * 8)], %f6
691 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
693 ldd [%o0 + (4 * 8)], %f8
694 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
696 ldd [%o0 + (5 * 8)], %f10
697 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
699 ldd [%o0 + (6 * 8)], %f12
700 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
702 ldd [%o0 + (7 * 8)], %f14
703 ldd [%o0 + (8 * 8)], %f0
704 sub %o2, VIS_BLOCKSIZE, %o2
705 add %o0, VIS_BLOCKSIZE, %o0
706 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
708 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
711 2: ldd [%o0 + (1 * 8)], %f2
713 ldd [%o0 + (2 * 8)], %f4
715 stda %f32, [%o1] %asi
716 ldd [%o0 + (3 * 8)], %f6
718 ldd [%o0 + (4 * 8)], %f8
720 ldd [%o0 + (5 * 8)], %f10
722 ldd [%o0 + (6 * 8)], %f12
724 ldd [%o0 + (7 * 8)], %f14
726 ldd [%o0 + (8 * 8)], %f0
728 sub %o2, VIS_BLOCKSIZE, %o2
729 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
730 add %o1, VIS_BLOCKSIZE, %o1
731 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
732 add %o0, VIS_BLOCKSIZE, %o0
733 cmp %o2, VIS_BLOCKSIZE + 8
735 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
736 ldd [%o0 + (1 * 8)], %f2
738 ldd [%o0 + (2 * 8)], %f4
740 stda %f32, [%o1] %asi
741 ldd [%o0 + (3 * 8)], %f6
743 ldd [%o0 + (4 * 8)], %f8
745 ldd [%o0 + (5 * 8)], %f10
747 ldd [%o0 + (6 * 8)], %f12
749 ldd [%o0 + (7 * 8)], %f14
751 add %o1, VIS_BLOCKSIZE, %o1
755 stda %f32, [%o1] %asi
763 * void spitfire_block_zero(void *dst, size_t len)
764 * void zeus_block_zero(void *dst, size_t len)
766 ALTENTRY(zeus_block_zero)
767 ENTRY(spitfire_block_zero)
769 wrpr %g0, PSTATE_NORMAL, %pstate
771 wr %g0, ASI_BLK_S, %asi
772 wr %g0, FPRS_FEF, %fprs
774 sub PCB_REG, TF_SIZEOF, %o4
775 ldx [%o4 + TF_FPRS], %o5
776 andcc %o5, FPRS_FEF, %g0
779 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
780 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
781 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
782 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
785 andn %o5, FPRS_FEF, %o5
786 stx %o5, [%o4 + TF_FPRS]
787 ldx [PCB_REG + PCB_FLAGS], %o4
789 stx %o4, [PCB_REG + PCB_FLAGS]
791 1: wrpr %o3, 0, %pstate
802 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
803 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
804 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
805 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
806 sub %o1, (4 * VIS_BLOCKSIZE), %o1
808 add %o0, (4 * VIS_BLOCKSIZE), %o0
813 END(spitfire_block_zero)
819 .globl fpu_fault_size
820 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
826 ldx [%g1 + _JB_FP], %g2
831 ldx [%g1 + _JB_SP], %o2
835 ldx [%g1 + _JB_PC], %o7
838 2: PANIC("longjmp botch", %l1)
842 stx %sp, [%o0 + _JB_SP]
843 stx %o7, [%o0 + _JB_PC]
844 stx %fp, [%o0 + _JB_FP]
850 * void ofw_entry(cell_t args[])
853 save %sp, -CCFSZ, %sp
854 SET(ofw_vec, %l7, %l6)
857 andn %l7, PSTATE_AM | PSTATE_IE, %l5
859 SET(tba_taken_over, %l5, %l4)
862 andn %l5, WSTATE_PROM_MASK, %l3
863 wrpr %l3, WSTATE_PROM_KMIX, %wstate
868 wrpr %g0, %l5, %wstate
869 1: wrpr %l7, 0, %pstate
871 restore %o0, %g0, %o0
875 * void ofw_exit(cell_t args[])
878 save %sp, -CCFSZ, %sp
880 SET(ofw_tba, %l7, %l5)
883 andn %l7, PSTATE_AM | PSTATE_IE, %l7
886 andn %l7, WSTATE_PROM_MASK, %l7
887 wrpr %l7, WSTATE_PROM_KMIX, %wstate
888 wrpr %l5, 0, %tba ! restore the OFW trap table
889 SET(ofw_vec, %l7, %l6)
891 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
892 sub %l0, SPOFF, %fp ! setup a stack in a locked page
893 sub %l0, SPOFF + CCFSZ, %sp
894 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
895 sethi %hi(KERNBASE), %l5
896 stxa %g0, [%l3] ASI_DMMU
898 wrpr %g0, 0, %tl ! force trap level 0
922 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
926 #define GMON_PROF_OFF 3
927 #define GMON_PROF_HIRES 4
930 .set _mcount, __cyg_profile_func_enter
932 ENTRY(__cyg_profile_func_enter)
933 SET(_gmonparam, %o3, %o2)
934 lduw [%o2 + GM_STATE], %o3
935 cmp %o3, GMON_PROF_OFF
938 SET(mcount, %o3, %o2)
943 END(__cyg_profile_func_enter)
947 ENTRY(__cyg_profile_func_exit)
948 SET(_gmonparam, %o3, %o2)
949 lduw [%o2 + GM_STATE], %o3
950 cmp %o3, GMON_PROF_HIRES
953 SET(mexitcount, %o3, %o2)
958 END(__cyg_profile_func_exit)