2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/errno.h>
32 #include <machine/asi.h>
33 #include <machine/asmacros.h>
34 #include <machine/fsr.h>
35 #include <machine/intr_machdep.h>
36 #include <machine/pcb.h>
37 #include <machine/pstate.h>
38 #include <machine/wstate.h>
42 .register %g2, #ignore
43 .register %g3, #ignore
44 .register %g6, #ignore
47 * Common code for copy routines.
49 * We use large macros to generate functions for each of the copy routines.
50 * This allows the load and store instructions to be generated for the right
51 * operation, asi or not. It is possible to write an asi independent function
52 * but this would require 2 expensive wrs in the main loop to switch %asi.
53 * It would also screw up profiling (if we ever get it), but may save some I$.
54 * We assume that either one of dasi and sasi is empty, or that they are both
55 * the same (empty or non-empty). It is up to the caller to set %asi.
59 * ASI independent implementation of copystr(9).
60 * Used to implement copyinstr() and copystr().
62 * Return value is in %g1.
64 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
70 LD(ub, sa) [src] sasi, %g1 ; \
71 ST(b, da) %g1, [dst] dasi ; \
76 2: mov ENAMETOOLONG, %g1 ; \
77 3: sub src, %g2, %g2 ; \
83 * ASI independent implementation of memset(3).
84 * Used to implement bzero(), memset() and aszero().
86 * If the pattern is non-zero, duplicate it to fill 64 bits.
87 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
88 * It has yet to be determined how much unrolling is beneficial.
89 * Could also read and compare before writing to minimize snoop traffic.
91 * XXX bzero() should be implemented as
92 * #define bzero(dst, len) (void)memset((dst), 0, (len))
95 #define _MEMSET(dst, pat, len, da, dasi) \
97 and pat, 0xff, pat ; \
101 sllx pat, 16, %g1 ; \
103 sllx pat, 32, %g1 ; \
111 ST(b, da) pat, [dst] dasi ; \
118 ST(x, da) pat, [dst] dasi ; \
119 ST(x, da) pat, [dst + 8] dasi ; \
120 ST(x, da) pat, [dst + 16] dasi ; \
121 ST(x, da) pat, [dst + 24] dasi ; \
128 ST(x, da) pat, [dst] dasi ; \
135 ST(b, da) pat, [dst] dasi ; \
141 * ASI independent implementation of memcpy(3).
142 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
143 * ascopyfrom() and ascopyto().
145 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
146 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
147 * case could be optimized, but it is expected that this is the uncommon
148 * case and of questionable value. The code to do so is also rather large
149 * and ugly. It has yet to be determined how much unrolling is beneficial.
151 * XXX bcopy() must also check for overlap. This is stupid.
152 * XXX bcopy() should be implemented as
153 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
156 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
162 LD(ub, sa) [src] sasi, %g1 ; \
163 ST(b, da) %g1, [dst] dasi ; \
176 LD(x, sa) [src] sasi, %g1 ; \
177 LD(x, sa) [src + 8] sasi, %g2 ; \
178 LD(x, sa) [src + 16] sasi, %g3 ; \
179 LD(x, sa) [src + 24] sasi, %g4 ; \
180 ST(x, da) %g1, [dst] dasi ; \
181 ST(x, da) %g2, [dst + 8] dasi ; \
182 ST(x, da) %g3, [dst + 16] dasi ; \
183 ST(x, da) %g4, [dst + 24] dasi ; \
191 LD(x, sa) [src] sasi, %g1 ; \
192 ST(x, da) %g1, [dst] dasi ; \
200 LD(ub, sa) [src] sasi, %g1 ; \
201 ST(b, da) %g1, [dst] dasi ; \
208 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
212 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
218 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
222 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
228 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
232 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
238 * void aszero(u_long asi, vm_offset_t pa, size_t len)
242 _MEMSET(%o1, %g0, %o2, a, %asi)
248 * int bcmp(const void *b1, const void *b2, size_t len)
253 1: ldub [%o0 + %o3], %o4
254 ldub [%o1 + %o3], %o5
266 * void bcopy(const void *src, void *dst, size_t len)
270 * Check for overlap, and copy backwards if so.
294 * Do the fast version.
296 3: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
302 * void bzero(void *b, size_t len)
305 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
311 * int copystr(const void *src, void *dst, size_t len, size_t *done)
314 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
320 * void *memcpy(void *dst, const void *src, size_t len)
324 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
330 * void *memset(void *b, int c, size_t len)
334 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
339 .globl copy_nofault_begin
344 * int copyin(const void *uaddr, void *kaddr, size_t len)
347 wr %g0, ASI_AIUP, %asi
348 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
354 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
357 wr %g0, ASI_AIUP, %asi
358 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
364 * int copyout(const void *kaddr, void *uaddr, size_t len)
367 wr %g0, ASI_AIUP, %asi
368 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
373 .globl copy_nofault_end
382 .globl fs_nofault_begin
387 * Chatty aliases for fetch, store functions.
389 .globl fubyte, fusword, fuword, subyte, susword, suword
391 .set fusword, fuword16
392 .set fuword, fuword64
394 .set susword, suword16
395 .set suword, suword64
397 .globl casuword32, casuword, fuptr, suptr
398 .set casuword, casuword64
403 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
406 casa [%o0] ASI_AIUP, %o1, %o2
412 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
415 casxa [%o0] ASI_AIUP, %o1, %o2
421 * int fuword8(const void *base)
425 lduba [%o0] ASI_AIUP, %o0
429 * int fuword16(const void *base)
433 lduha [%o0] ASI_AIUP, %o0
437 * int32_t fuword32(const void *base)
441 lduwa [%o0] ASI_AIUP, %o0
445 * int64_t fuword64(const void *base)
449 ldxa [%o0] ASI_AIUP, %o0
453 * int suword8(const void *base, int word)
456 stba %o1, [%o0] ASI_AIUP
462 * int suword16(const void *base, int word)
465 stha %o1, [%o0] ASI_AIUP
471 * int suword32(const void *base, int32_t word)
474 stwa %o1, [%o0] ASI_AIUP
480 * int suword64(const void *base, int64_t word)
483 stxa %o1, [%o0] ASI_AIUP
488 .globl fs_nofault_intr_begin
489 fs_nofault_intr_begin:
493 * int fuswintr(const void *base)
497 lduha [%o0] ASI_AIUP, %o0
501 * int suswintr(const void *base, int word)
504 stha %o1, [%o0] ASI_AIUP
509 .globl fs_nofault_intr_end
513 .globl fs_nofault_end
522 .globl fas_nofault_begin
526 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
531 lduba [%o1] %asi, %o3
539 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
544 lduha [%o1] %asi, %o3
552 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
557 lduwa [%o1] %asi, %o3
564 .globl fas_nofault_end
574 .globl fpu_fault_begin
579 * void spitfire_block_copy(void *src, void *dst, size_t len)
581 ENTRY(spitfire_block_copy)
583 wrpr %g0, PSTATE_NORMAL, %pstate
585 wr %g0, ASI_BLK_S, %asi
586 wr %g0, FPRS_FEF, %fprs
588 sub PCB_REG, TF_SIZEOF, %o4
589 ldx [%o4 + TF_FPRS], %o5
590 andcc %o5, FPRS_FEF, %g0
593 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
594 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
595 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
596 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
599 andn %o5, FPRS_FEF, %o5
600 stx %o5, [%o4 + TF_FPRS]
601 ldx [PCB_REG + PCB_FLAGS], %o4
603 stx %o4, [PCB_REG + PCB_FLAGS]
605 1: wrpr %o3, 0, %pstate
608 add %o0, VIS_BLOCKSIZE, %o0
609 sub %o2, VIS_BLOCKSIZE, %o2
611 2: ldda [%o0] %asi, %f16
620 stda %f32, [%o1] %asi
621 add %o0, VIS_BLOCKSIZE, %o0
622 subcc %o2, VIS_BLOCKSIZE, %o2
624 add %o1, VIS_BLOCKSIZE, %o1
634 stda %f32, [%o1] %asi
635 add %o0, VIS_BLOCKSIZE, %o0
636 sub %o2, VIS_BLOCKSIZE, %o2
638 add %o1, VIS_BLOCKSIZE, %o1
642 stda %f16, [%o1] %asi
647 END(spitfire_block_copy)
650 * void zeus_block_copy(void *src, void *dst, size_t len)
652 ENTRY(zeus_block_copy)
653 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
656 wrpr %g0, PSTATE_NORMAL, %pstate
658 wr %g0, ASI_BLK_S, %asi
659 wr %g0, FPRS_FEF, %fprs
661 sub PCB_REG, TF_SIZEOF, %o4
662 ldx [%o4 + TF_FPRS], %o5
663 andcc %o5, FPRS_FEF, %g0
666 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
667 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
668 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
669 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
672 andn %o5, FPRS_FEF, %o5
673 stx %o5, [%o4 + TF_FPRS]
674 ldx [PCB_REG + PCB_FLAGS], %o4
676 stx %o4, [PCB_REG + PCB_FLAGS]
678 1: wrpr %o3, 0, %pstate
680 ldd [%o0 + (0 * 8)], %f0
681 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
682 ldd [%o0 + (1 * 8)], %f2
683 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
685 ldd [%o0 + (2 * 8)], %f4
686 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
688 ldd [%o0 + (3 * 8)], %f6
689 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
691 ldd [%o0 + (4 * 8)], %f8
692 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
694 ldd [%o0 + (5 * 8)], %f10
695 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
697 ldd [%o0 + (6 * 8)], %f12
698 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
700 ldd [%o0 + (7 * 8)], %f14
701 ldd [%o0 + (8 * 8)], %f0
702 sub %o2, VIS_BLOCKSIZE, %o2
703 add %o0, VIS_BLOCKSIZE, %o0
704 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
706 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
709 2: ldd [%o0 + (1 * 8)], %f2
711 ldd [%o0 + (2 * 8)], %f4
713 stda %f32, [%o1] %asi
714 ldd [%o0 + (3 * 8)], %f6
716 ldd [%o0 + (4 * 8)], %f8
718 ldd [%o0 + (5 * 8)], %f10
720 ldd [%o0 + (6 * 8)], %f12
722 ldd [%o0 + (7 * 8)], %f14
724 ldd [%o0 + (8 * 8)], %f0
726 sub %o2, VIS_BLOCKSIZE, %o2
727 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
728 add %o1, VIS_BLOCKSIZE, %o1
729 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
730 add %o0, VIS_BLOCKSIZE, %o0
731 cmp %o2, VIS_BLOCKSIZE + 8
733 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
734 ldd [%o0 + (1 * 8)], %f2
736 ldd [%o0 + (2 * 8)], %f4
738 stda %f32, [%o1] %asi
739 ldd [%o0 + (3 * 8)], %f6
741 ldd [%o0 + (4 * 8)], %f8
743 ldd [%o0 + (5 * 8)], %f10
745 ldd [%o0 + (6 * 8)], %f12
747 ldd [%o0 + (7 * 8)], %f14
749 add %o1, VIS_BLOCKSIZE, %o1
753 stda %f32, [%o1] %asi
761 * void spitfire_block_zero(void *dst, size_t len)
762 * void zeus_block_zero(void *dst, size_t len)
764 ALTENTRY(zeus_block_zero)
765 ENTRY(spitfire_block_zero)
767 wrpr %g0, PSTATE_NORMAL, %pstate
769 wr %g0, ASI_BLK_S, %asi
770 wr %g0, FPRS_FEF, %fprs
772 sub PCB_REG, TF_SIZEOF, %o4
773 ldx [%o4 + TF_FPRS], %o5
774 andcc %o5, FPRS_FEF, %g0
777 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
778 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
779 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
780 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
783 andn %o5, FPRS_FEF, %o5
784 stx %o5, [%o4 + TF_FPRS]
785 ldx [PCB_REG + PCB_FLAGS], %o4
787 stx %o4, [PCB_REG + PCB_FLAGS]
789 1: wrpr %o3, 0, %pstate
800 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
801 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
802 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
803 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
804 sub %o1, (4 * VIS_BLOCKSIZE), %o1
806 add %o0, (4 * VIS_BLOCKSIZE), %o0
811 END(spitfire_block_zero)
817 .globl fpu_fault_size
818 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
824 ldx [%g1 + _JB_FP], %g2
829 ldx [%g1 + _JB_SP], %o2
833 ldx [%g1 + _JB_PC], %o7
836 2: PANIC("longjmp botch", %l1)
840 stx %sp, [%o0 + _JB_SP]
841 stx %o7, [%o0 + _JB_PC]
842 stx %fp, [%o0 + _JB_FP]
848 * void ofw_entry(cell_t args[])
851 save %sp, -CCFSZ, %sp
852 SET(ofw_vec, %l7, %l6)
855 andn %l7, PSTATE_AM | PSTATE_IE, %l5
857 SET(tba_taken_over, %l5, %l4)
860 andn %l5, WSTATE_PROM_MASK, %l3
861 wrpr %l3, WSTATE_PROM_KMIX, %wstate
866 wrpr %g0, %l5, %wstate
867 1: wrpr %l7, 0, %pstate
869 restore %o0, %g0, %o0
873 * void ofw_exit(cell_t args[])
876 save %sp, -CCFSZ, %sp
878 SET(ofw_tba, %l7, %l5)
881 andn %l7, PSTATE_AM | PSTATE_IE, %l7
884 andn %l7, WSTATE_PROM_MASK, %l7
885 wrpr %l7, WSTATE_PROM_KMIX, %wstate
886 wrpr %l5, 0, %tba ! restore the OFW trap table
887 SET(ofw_vec, %l7, %l6)
889 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
890 sub %l0, SPOFF, %fp ! setup a stack in a locked page
891 sub %l0, SPOFF + CCFSZ, %sp
892 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
893 sethi %hi(KERNBASE), %l5
894 stxa %g0, [%l3] ASI_DMMU
896 wrpr %g0, 0, %tl ! force trap level 0
920 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
924 #define GMON_PROF_OFF 3
925 #define GMON_PROF_HIRES 4
928 .set _mcount, __cyg_profile_func_enter
930 ENTRY(__cyg_profile_func_enter)
931 SET(_gmonparam, %o3, %o2)
932 lduw [%o2 + GM_STATE], %o3
933 cmp %o3, GMON_PROF_OFF
936 SET(mcount, %o3, %o2)
941 END(__cyg_profile_func_enter)
945 ENTRY(__cyg_profile_func_exit)
946 SET(_gmonparam, %o3, %o2)
947 lduw [%o2 + GM_STATE], %o3
948 cmp %o3, GMON_PROF_HIRES
951 SET(mexitcount, %o3, %o2)
956 END(__cyg_profile_func_exit)