2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include "opt_kstack_pages.h"
32 #include <sys/errno.h>
34 #include <machine/asi.h>
35 #include <machine/asmacros.h>
36 #include <machine/fsr.h>
37 #include <machine/intr_machdep.h>
38 #include <machine/pcb.h>
39 #include <machine/pstate.h>
40 #include <machine/wstate.h>
44 .register %g2, #ignore
45 .register %g3, #ignore
46 .register %g6, #ignore
49 * Common code for copy routines.
51 * We use large macros to generate functions for each of the copy routines.
52 * This allows the load and store instructions to be generated for the right
53 * operation, asi or not. It is possible to write an asi independent function
54 * but this would require 2 expensive wrs in the main loop to switch %asi.
55 * It would also screw up profiling (if we ever get it), but may save some I$.
56 * We assume that either one of dasi and sasi is empty, or that they are both
57 * the same (empty or non-empty). It is up to the caller to set %asi.
61 * ASI independent implementation of copystr(9).
62 * Used to implement copyinstr() and copystr().
64 * Return value is in %g1.
66 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
72 LD(ub, sa) [src] sasi, %g1 ; \
73 ST(b, da) %g1, [dst] dasi ; \
78 2: mov ENAMETOOLONG, %g1 ; \
79 3: sub src, %g2, %g2 ; \
85 * ASI independent implementation of memset(3).
86 * Used to implement bzero(), memset() and aszero().
88 * If the pattern is non-zero, duplicate it to fill 64 bits.
89 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
90 * It has yet to be determined how much unrolling is beneficial.
91 * Could also read and compare before writing to minimize snoop traffic.
93 * XXX bzero() should be implemented as
94 * #define bzero(dst, len) (void)memset((dst), 0, (len))
97 #define _MEMSET(dst, pat, len, da, dasi) \
99 and pat, 0xff, pat ; \
103 sllx pat, 16, %g1 ; \
105 sllx pat, 32, %g1 ; \
113 ST(b, da) pat, [dst] dasi ; \
120 ST(x, da) pat, [dst] dasi ; \
121 ST(x, da) pat, [dst + 8] dasi ; \
122 ST(x, da) pat, [dst + 16] dasi ; \
123 ST(x, da) pat, [dst + 24] dasi ; \
130 ST(x, da) pat, [dst] dasi ; \
137 ST(b, da) pat, [dst] dasi ; \
143 * ASI independent implementation of memcpy(3).
144 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
145 * ascopyfrom() and ascopyto().
147 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
148 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
149 * case could be optimized, but it is expected that this is the uncommon
150 * case and of questionable value. The code to do so is also rather large
151 * and ugly. It has yet to be determined how much unrolling is beneficial.
153 * XXX bcopy() must also check for overlap. This is stupid.
154 * XXX bcopy() should be implemented as
155 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
158 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
164 LD(ub, sa) [src] sasi, %g1 ; \
165 ST(b, da) %g1, [dst] dasi ; \
178 LD(x, sa) [src] sasi, %g1 ; \
179 LD(x, sa) [src + 8] sasi, %g2 ; \
180 LD(x, sa) [src + 16] sasi, %g3 ; \
181 LD(x, sa) [src + 24] sasi, %g4 ; \
182 ST(x, da) %g1, [dst] dasi ; \
183 ST(x, da) %g2, [dst + 8] dasi ; \
184 ST(x, da) %g3, [dst + 16] dasi ; \
185 ST(x, da) %g4, [dst + 24] dasi ; \
193 LD(x, sa) [src] sasi, %g1 ; \
194 ST(x, da) %g1, [dst] dasi ; \
202 LD(ub, sa) [src] sasi, %g1 ; \
203 ST(b, da) %g1, [dst] dasi ; \
210 * Extension of _MEMCPY dealing with overlap, but unaware of ASIs.
211 * Used for bcopy() and memmove().
213 #define _MEMMOVE(dst, src, len) \
214 /* Check for overlap, and copy backwards if so. */ \
215 sub dst, src, %g1 ; \
217 bgeu,a,pt %xcc, 2f ; \
219 /* Copy backwards. */ \
220 add src, len, src ; \
221 add dst, len, dst ; \
229 2: /* Do the fast version. */ \
230 _MEMCPY(dst, src, len, EMPTY, EMPTY, EMPTY, EMPTY) ; \
234 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
238 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
244 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
248 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
254 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
258 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
264 * void aszero(u_long asi, vm_offset_t pa, size_t len)
268 _MEMSET(%o1, %g0, %o2, a, %asi)
274 * int bcmp(const void *b1, const void *b2, size_t len)
279 1: ldub [%o0 + %o3], %o4
280 ldub [%o1 + %o3], %o5
292 * void bcopy(const void *src, void *dst, size_t len)
295 _MEMMOVE(%o1, %o0, %o2)
301 * void bzero(void *b, size_t len)
304 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
310 * int copystr(const void *src, void *dst, size_t len, size_t *done)
313 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
319 * void *memcpy(void *dst, const void *src, size_t len)
323 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
329 * void *memmove(void *dst, const void *src, size_t len)
333 _MEMMOVE(%o3, %o1, %o2)
339 * void *memset(void *b, int c, size_t len)
343 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
348 .globl copy_nofault_begin
353 * int copyin(const void *uaddr, void *kaddr, size_t len)
356 wr %g0, ASI_AIUP, %asi
357 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
363 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
366 wr %g0, ASI_AIUP, %asi
367 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
373 * int copyout(const void *kaddr, void *uaddr, size_t len)
376 wr %g0, ASI_AIUP, %asi
377 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
382 .globl copy_nofault_end
391 .globl fs_nofault_begin
396 * Chatty aliases for fetch, store functions.
398 .globl fubyte, fusword, fuword, subyte, susword, suword
400 .set fusword, fuword16
401 .set fuword, fuword64
403 .set susword, suword16
404 .set suword, suword64
406 .globl casuword32, casuword, fuptr, suptr
407 .set casuword, casuword64
412 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
415 casa [%o0] ASI_AIUP, %o1, %o2
421 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
424 casxa [%o0] ASI_AIUP, %o1, %o2
430 * int fuword8(const void *base)
434 lduba [%o0] ASI_AIUP, %o0
438 * int fuword16(const void *base)
442 lduha [%o0] ASI_AIUP, %o0
446 * int32_t fuword32(const void *base)
450 lduwa [%o0] ASI_AIUP, %o0
454 * int64_t fuword64(const void *base)
458 ldxa [%o0] ASI_AIUP, %o0
462 * int suword8(const void *base, int word)
465 stba %o1, [%o0] ASI_AIUP
471 * int suword16(const void *base, int word)
474 stha %o1, [%o0] ASI_AIUP
480 * int suword32(const void *base, int32_t word)
483 stwa %o1, [%o0] ASI_AIUP
489 * int suword64(const void *base, int64_t word)
492 stxa %o1, [%o0] ASI_AIUP
497 .globl fs_nofault_end
506 .globl fas_nofault_begin
510 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
515 lduba [%o1] %asi, %o3
523 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
528 lduha [%o1] %asi, %o3
536 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
541 lduwa [%o1] %asi, %o3
548 .globl fas_nofault_end
558 .globl fpu_fault_begin
563 * void spitfire_block_copy(void *src, void *dst, size_t len)
565 ENTRY(spitfire_block_copy)
567 wrpr %g0, PSTATE_NORMAL, %pstate
569 wr %g0, ASI_BLK_S, %asi
570 wr %g0, FPRS_FEF, %fprs
572 sub PCB_REG, TF_SIZEOF, %o4
573 ldx [%o4 + TF_FPRS], %o5
574 andcc %o5, FPRS_FEF, %g0
577 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
578 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
579 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
580 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
583 andn %o5, FPRS_FEF, %o5
584 stx %o5, [%o4 + TF_FPRS]
585 ldx [PCB_REG + PCB_FLAGS], %o4
587 stx %o4, [PCB_REG + PCB_FLAGS]
589 1: wrpr %o3, 0, %pstate
592 add %o0, VIS_BLOCKSIZE, %o0
593 sub %o2, VIS_BLOCKSIZE, %o2
595 2: ldda [%o0] %asi, %f16
604 stda %f32, [%o1] %asi
605 add %o0, VIS_BLOCKSIZE, %o0
606 subcc %o2, VIS_BLOCKSIZE, %o2
608 add %o1, VIS_BLOCKSIZE, %o1
618 stda %f32, [%o1] %asi
619 add %o0, VIS_BLOCKSIZE, %o0
620 sub %o2, VIS_BLOCKSIZE, %o2
622 add %o1, VIS_BLOCKSIZE, %o1
626 stda %f16, [%o1] %asi
631 END(spitfire_block_copy)
634 * void zeus_block_copy(void *src, void *dst, size_t len)
636 ENTRY(zeus_block_copy)
637 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
640 wrpr %g0, PSTATE_NORMAL, %pstate
642 wr %g0, ASI_BLK_S, %asi
643 wr %g0, FPRS_FEF, %fprs
645 sub PCB_REG, TF_SIZEOF, %o4
646 ldx [%o4 + TF_FPRS], %o5
647 andcc %o5, FPRS_FEF, %g0
650 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
651 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
652 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
653 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
656 andn %o5, FPRS_FEF, %o5
657 stx %o5, [%o4 + TF_FPRS]
658 ldx [PCB_REG + PCB_FLAGS], %o4
660 stx %o4, [PCB_REG + PCB_FLAGS]
662 1: wrpr %o3, 0, %pstate
664 ldd [%o0 + (0 * 8)], %f0
665 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
666 ldd [%o0 + (1 * 8)], %f2
667 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
669 ldd [%o0 + (2 * 8)], %f4
670 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
672 ldd [%o0 + (3 * 8)], %f6
673 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
675 ldd [%o0 + (4 * 8)], %f8
676 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
678 ldd [%o0 + (5 * 8)], %f10
679 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
681 ldd [%o0 + (6 * 8)], %f12
682 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
684 ldd [%o0 + (7 * 8)], %f14
685 ldd [%o0 + (8 * 8)], %f0
686 sub %o2, VIS_BLOCKSIZE, %o2
687 add %o0, VIS_BLOCKSIZE, %o0
688 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
690 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
693 2: ldd [%o0 + (1 * 8)], %f2
695 ldd [%o0 + (2 * 8)], %f4
697 stda %f32, [%o1] %asi
698 ldd [%o0 + (3 * 8)], %f6
700 ldd [%o0 + (4 * 8)], %f8
702 ldd [%o0 + (5 * 8)], %f10
704 ldd [%o0 + (6 * 8)], %f12
706 ldd [%o0 + (7 * 8)], %f14
708 ldd [%o0 + (8 * 8)], %f0
710 sub %o2, VIS_BLOCKSIZE, %o2
711 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
712 add %o1, VIS_BLOCKSIZE, %o1
713 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
714 add %o0, VIS_BLOCKSIZE, %o0
715 cmp %o2, VIS_BLOCKSIZE + 8
717 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
718 ldd [%o0 + (1 * 8)], %f2
720 ldd [%o0 + (2 * 8)], %f4
722 stda %f32, [%o1] %asi
723 ldd [%o0 + (3 * 8)], %f6
725 ldd [%o0 + (4 * 8)], %f8
727 ldd [%o0 + (5 * 8)], %f10
729 ldd [%o0 + (6 * 8)], %f12
731 ldd [%o0 + (7 * 8)], %f14
733 add %o1, VIS_BLOCKSIZE, %o1
737 stda %f32, [%o1] %asi
745 * void spitfire_block_zero(void *dst, size_t len)
746 * void zeus_block_zero(void *dst, size_t len)
748 ALTENTRY(zeus_block_zero)
749 ENTRY(spitfire_block_zero)
751 wrpr %g0, PSTATE_NORMAL, %pstate
753 wr %g0, ASI_BLK_S, %asi
754 wr %g0, FPRS_FEF, %fprs
756 sub PCB_REG, TF_SIZEOF, %o4
757 ldx [%o4 + TF_FPRS], %o5
758 andcc %o5, FPRS_FEF, %g0
761 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
762 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
763 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
764 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
767 andn %o5, FPRS_FEF, %o5
768 stx %o5, [%o4 + TF_FPRS]
769 ldx [PCB_REG + PCB_FLAGS], %o4
771 stx %o4, [PCB_REG + PCB_FLAGS]
773 1: wrpr %o3, 0, %pstate
784 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
785 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
786 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
787 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
788 sub %o1, (4 * VIS_BLOCKSIZE), %o1
790 add %o0, (4 * VIS_BLOCKSIZE), %o0
795 END(spitfire_block_zero)
801 .globl fpu_fault_size
802 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
808 ldx [%g1 + _JB_FP], %g2
813 ldx [%g1 + _JB_SP], %o2
817 ldx [%g1 + _JB_PC], %o7
820 2: PANIC("longjmp botch", %l1)
824 stx %sp, [%o0 + _JB_SP]
825 stx %o7, [%o0 + _JB_PC]
826 stx %fp, [%o0 + _JB_FP]
832 * void ofw_entry(cell_t args[])
835 save %sp, -CCFSZ, %sp
836 SET(ofw_vec, %l7, %l6)
839 andn %l7, PSTATE_AM | PSTATE_IE, %l5
841 SET(tba_taken_over, %l5, %l4)
844 andn %l5, WSTATE_PROM_MASK, %l3
845 wrpr %l3, WSTATE_PROM_KMIX, %wstate
850 wrpr %g0, %l5, %wstate
851 1: wrpr %l7, 0, %pstate
853 restore %o0, %g0, %o0
857 * void ofw_exit(cell_t args[])
860 save %sp, -CCFSZ, %sp
862 SET(ofw_tba, %l7, %l5)
865 andn %l7, PSTATE_AM | PSTATE_IE, %l7
868 andn %l7, WSTATE_PROM_MASK, %l7
869 wrpr %l7, WSTATE_PROM_KMIX, %wstate
870 wrpr %l5, 0, %tba ! restore the OFW trap table
871 SET(ofw_vec, %l7, %l6)
873 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
874 sub %l0, SPOFF, %fp ! setup a stack in a locked page
875 sub %l0, SPOFF + CCFSZ, %sp
876 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
877 sethi %hi(KERNBASE), %l5
878 stxa %g0, [%l3] ASI_DMMU
880 wrpr %g0, 0, %tl ! force trap level 0
904 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
908 #define GMON_PROF_OFF 3
909 #define GMON_PROF_HIRES 4
912 .set _mcount, __cyg_profile_func_enter
914 ENTRY(__cyg_profile_func_enter)
915 SET(_gmonparam, %o3, %o2)
916 lduw [%o2 + GM_STATE], %o3
917 cmp %o3, GMON_PROF_OFF
920 SET(mcount, %o3, %o2)
925 END(__cyg_profile_func_enter)
929 ENTRY(__cyg_profile_func_exit)
930 SET(_gmonparam, %o3, %o2)
931 lduw [%o2 + GM_STATE], %o3
932 cmp %o3, GMON_PROF_HIRES
935 SET(mexitcount, %o3, %o2)
940 END(__cyg_profile_func_exit)