2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include "opt_kstack_pages.h"
32 #include <sys/errno.h>
34 #include <machine/asi.h>
35 #include <machine/asmacros.h>
36 #include <machine/fsr.h>
37 #include <machine/intr_machdep.h>
38 #include <machine/pcb.h>
39 #include <machine/pstate.h>
40 #include <machine/wstate.h>
44 .register %g2, #ignore
45 .register %g3, #ignore
46 .register %g6, #ignore
49 * Common code for copy routines.
51 * We use large macros to generate functions for each of the copy routines.
52 * This allows the load and store instructions to be generated for the right
53 * operation, asi or not. It is possible to write an asi independent function
54 * but this would require 2 expensive wrs in the main loop to switch %asi.
55 * It would also screw up profiling (if we ever get it), but may save some I$.
56 * We assume that either one of dasi and sasi is empty, or that they are both
57 * the same (empty or non-empty). It is up to the caller to set %asi.
61 * ASI independent implementation of copystr(9).
62 * Used to implement copyinstr() and copystr().
64 * Return value is in %g1.
66 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
72 LD(ub, sa) [src] sasi, %g1 ; \
73 ST(b, da) %g1, [dst] dasi ; \
78 2: mov ENAMETOOLONG, %g1 ; \
79 3: sub src, %g2, %g2 ; \
85 * ASI independent implementation of memset(3).
86 * Used to implement bzero(), memset() and aszero().
88 * If the pattern is non-zero, duplicate it to fill 64 bits.
89 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
90 * It has yet to be determined how much unrolling is beneficial.
91 * Could also read and compare before writing to minimize snoop traffic.
93 * XXX bzero() should be implemented as
94 * #define bzero(dst, len) (void)memset((dst), 0, (len))
97 #define _MEMSET(dst, pat, len, da, dasi) \
99 and pat, 0xff, pat ; \
103 sllx pat, 16, %g1 ; \
105 sllx pat, 32, %g1 ; \
113 ST(b, da) pat, [dst] dasi ; \
120 ST(x, da) pat, [dst] dasi ; \
121 ST(x, da) pat, [dst + 8] dasi ; \
122 ST(x, da) pat, [dst + 16] dasi ; \
123 ST(x, da) pat, [dst + 24] dasi ; \
130 ST(x, da) pat, [dst] dasi ; \
137 ST(b, da) pat, [dst] dasi ; \
143 * ASI independent implementation of memcpy(3).
144 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
145 * ascopyfrom() and ascopyto().
147 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
148 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
149 * case could be optimized, but it is expected that this is the uncommon
150 * case and of questionable value. The code to do so is also rather large
151 * and ugly. It has yet to be determined how much unrolling is beneficial.
153 * XXX bcopy() must also check for overlap. This is stupid.
154 * XXX bcopy() should be implemented as
155 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
158 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
164 LD(ub, sa) [src] sasi, %g1 ; \
165 ST(b, da) %g1, [dst] dasi ; \
178 LD(x, sa) [src] sasi, %g1 ; \
179 LD(x, sa) [src + 8] sasi, %g2 ; \
180 LD(x, sa) [src + 16] sasi, %g3 ; \
181 LD(x, sa) [src + 24] sasi, %g4 ; \
182 ST(x, da) %g1, [dst] dasi ; \
183 ST(x, da) %g2, [dst + 8] dasi ; \
184 ST(x, da) %g3, [dst + 16] dasi ; \
185 ST(x, da) %g4, [dst + 24] dasi ; \
193 LD(x, sa) [src] sasi, %g1 ; \
194 ST(x, da) %g1, [dst] dasi ; \
202 LD(ub, sa) [src] sasi, %g1 ; \
203 ST(b, da) %g1, [dst] dasi ; \
210 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
214 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
220 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
224 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
230 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
234 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
240 * void aszero(u_long asi, vm_offset_t pa, size_t len)
244 _MEMSET(%o1, %g0, %o2, a, %asi)
250 * int bcmp(const void *b1, const void *b2, size_t len)
255 1: ldub [%o0 + %o3], %o4
256 ldub [%o1 + %o3], %o5
268 * void bcopy(const void *src, void *dst, size_t len)
272 * Check for overlap, and copy backwards if so.
296 * Do the fast version.
298 3: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
304 * void bzero(void *b, size_t len)
307 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
313 * int copystr(const void *src, void *dst, size_t len, size_t *done)
316 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
322 * void *memcpy(void *dst, const void *src, size_t len)
326 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
332 * void *memset(void *b, int c, size_t len)
336 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
341 .globl copy_nofault_begin
346 * int copyin(const void *uaddr, void *kaddr, size_t len)
349 wr %g0, ASI_AIUP, %asi
350 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
356 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
359 wr %g0, ASI_AIUP, %asi
360 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
366 * int copyout(const void *kaddr, void *uaddr, size_t len)
369 wr %g0, ASI_AIUP, %asi
370 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
375 .globl copy_nofault_end
384 .globl fs_nofault_begin
389 * Chatty aliases for fetch, store functions.
391 .globl fubyte, fusword, fuword, subyte, susword, suword
393 .set fusword, fuword16
394 .set fuword, fuword64
396 .set susword, suword16
397 .set suword, suword64
399 .globl casuword32, casuword, fuptr, suptr
400 .set casuword, casuword64
405 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
408 casa [%o0] ASI_AIUP, %o1, %o2
414 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
417 casxa [%o0] ASI_AIUP, %o1, %o2
423 * int fuword8(const void *base)
427 lduba [%o0] ASI_AIUP, %o0
431 * int fuword16(const void *base)
435 lduha [%o0] ASI_AIUP, %o0
439 * int32_t fuword32(const void *base)
443 lduwa [%o0] ASI_AIUP, %o0
447 * int64_t fuword64(const void *base)
451 ldxa [%o0] ASI_AIUP, %o0
455 * int suword8(const void *base, int word)
458 stba %o1, [%o0] ASI_AIUP
464 * int suword16(const void *base, int word)
467 stha %o1, [%o0] ASI_AIUP
473 * int suword32(const void *base, int32_t word)
476 stwa %o1, [%o0] ASI_AIUP
482 * int suword64(const void *base, int64_t word)
485 stxa %o1, [%o0] ASI_AIUP
490 .globl fs_nofault_end
499 .globl fas_nofault_begin
503 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
508 lduba [%o1] %asi, %o3
516 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
521 lduha [%o1] %asi, %o3
529 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
534 lduwa [%o1] %asi, %o3
541 .globl fas_nofault_end
551 .globl fpu_fault_begin
556 * void spitfire_block_copy(void *src, void *dst, size_t len)
558 ENTRY(spitfire_block_copy)
560 wrpr %g0, PSTATE_NORMAL, %pstate
562 wr %g0, ASI_BLK_S, %asi
563 wr %g0, FPRS_FEF, %fprs
565 sub PCB_REG, TF_SIZEOF, %o4
566 ldx [%o4 + TF_FPRS], %o5
567 andcc %o5, FPRS_FEF, %g0
570 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
571 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
572 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
573 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
576 andn %o5, FPRS_FEF, %o5
577 stx %o5, [%o4 + TF_FPRS]
578 ldx [PCB_REG + PCB_FLAGS], %o4
580 stx %o4, [PCB_REG + PCB_FLAGS]
582 1: wrpr %o3, 0, %pstate
585 add %o0, VIS_BLOCKSIZE, %o0
586 sub %o2, VIS_BLOCKSIZE, %o2
588 2: ldda [%o0] %asi, %f16
597 stda %f32, [%o1] %asi
598 add %o0, VIS_BLOCKSIZE, %o0
599 subcc %o2, VIS_BLOCKSIZE, %o2
601 add %o1, VIS_BLOCKSIZE, %o1
611 stda %f32, [%o1] %asi
612 add %o0, VIS_BLOCKSIZE, %o0
613 sub %o2, VIS_BLOCKSIZE, %o2
615 add %o1, VIS_BLOCKSIZE, %o1
619 stda %f16, [%o1] %asi
624 END(spitfire_block_copy)
627 * void zeus_block_copy(void *src, void *dst, size_t len)
629 ENTRY(zeus_block_copy)
630 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
633 wrpr %g0, PSTATE_NORMAL, %pstate
635 wr %g0, ASI_BLK_S, %asi
636 wr %g0, FPRS_FEF, %fprs
638 sub PCB_REG, TF_SIZEOF, %o4
639 ldx [%o4 + TF_FPRS], %o5
640 andcc %o5, FPRS_FEF, %g0
643 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
644 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
645 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
646 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
649 andn %o5, FPRS_FEF, %o5
650 stx %o5, [%o4 + TF_FPRS]
651 ldx [PCB_REG + PCB_FLAGS], %o4
653 stx %o4, [PCB_REG + PCB_FLAGS]
655 1: wrpr %o3, 0, %pstate
657 ldd [%o0 + (0 * 8)], %f0
658 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
659 ldd [%o0 + (1 * 8)], %f2
660 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
662 ldd [%o0 + (2 * 8)], %f4
663 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
665 ldd [%o0 + (3 * 8)], %f6
666 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
668 ldd [%o0 + (4 * 8)], %f8
669 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
671 ldd [%o0 + (5 * 8)], %f10
672 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
674 ldd [%o0 + (6 * 8)], %f12
675 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
677 ldd [%o0 + (7 * 8)], %f14
678 ldd [%o0 + (8 * 8)], %f0
679 sub %o2, VIS_BLOCKSIZE, %o2
680 add %o0, VIS_BLOCKSIZE, %o0
681 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
683 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
686 2: ldd [%o0 + (1 * 8)], %f2
688 ldd [%o0 + (2 * 8)], %f4
690 stda %f32, [%o1] %asi
691 ldd [%o0 + (3 * 8)], %f6
693 ldd [%o0 + (4 * 8)], %f8
695 ldd [%o0 + (5 * 8)], %f10
697 ldd [%o0 + (6 * 8)], %f12
699 ldd [%o0 + (7 * 8)], %f14
701 ldd [%o0 + (8 * 8)], %f0
703 sub %o2, VIS_BLOCKSIZE, %o2
704 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
705 add %o1, VIS_BLOCKSIZE, %o1
706 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
707 add %o0, VIS_BLOCKSIZE, %o0
708 cmp %o2, VIS_BLOCKSIZE + 8
710 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
711 ldd [%o0 + (1 * 8)], %f2
713 ldd [%o0 + (2 * 8)], %f4
715 stda %f32, [%o1] %asi
716 ldd [%o0 + (3 * 8)], %f6
718 ldd [%o0 + (4 * 8)], %f8
720 ldd [%o0 + (5 * 8)], %f10
722 ldd [%o0 + (6 * 8)], %f12
724 ldd [%o0 + (7 * 8)], %f14
726 add %o1, VIS_BLOCKSIZE, %o1
730 stda %f32, [%o1] %asi
738 * void spitfire_block_zero(void *dst, size_t len)
739 * void zeus_block_zero(void *dst, size_t len)
741 ALTENTRY(zeus_block_zero)
742 ENTRY(spitfire_block_zero)
744 wrpr %g0, PSTATE_NORMAL, %pstate
746 wr %g0, ASI_BLK_S, %asi
747 wr %g0, FPRS_FEF, %fprs
749 sub PCB_REG, TF_SIZEOF, %o4
750 ldx [%o4 + TF_FPRS], %o5
751 andcc %o5, FPRS_FEF, %g0
754 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
755 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
756 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
757 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
760 andn %o5, FPRS_FEF, %o5
761 stx %o5, [%o4 + TF_FPRS]
762 ldx [PCB_REG + PCB_FLAGS], %o4
764 stx %o4, [PCB_REG + PCB_FLAGS]
766 1: wrpr %o3, 0, %pstate
777 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
778 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
779 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
780 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
781 sub %o1, (4 * VIS_BLOCKSIZE), %o1
783 add %o0, (4 * VIS_BLOCKSIZE), %o0
788 END(spitfire_block_zero)
794 .globl fpu_fault_size
795 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
801 ldx [%g1 + _JB_FP], %g2
806 ldx [%g1 + _JB_SP], %o2
810 ldx [%g1 + _JB_PC], %o7
813 2: PANIC("longjmp botch", %l1)
817 stx %sp, [%o0 + _JB_SP]
818 stx %o7, [%o0 + _JB_PC]
819 stx %fp, [%o0 + _JB_FP]
825 * void ofw_entry(cell_t args[])
828 save %sp, -CCFSZ, %sp
829 SET(ofw_vec, %l7, %l6)
832 andn %l7, PSTATE_AM | PSTATE_IE, %l5
834 SET(tba_taken_over, %l5, %l4)
837 andn %l5, WSTATE_PROM_MASK, %l3
838 wrpr %l3, WSTATE_PROM_KMIX, %wstate
843 wrpr %g0, %l5, %wstate
844 1: wrpr %l7, 0, %pstate
846 restore %o0, %g0, %o0
850 * void ofw_exit(cell_t args[])
853 save %sp, -CCFSZ, %sp
855 SET(ofw_tba, %l7, %l5)
858 andn %l7, PSTATE_AM | PSTATE_IE, %l7
861 andn %l7, WSTATE_PROM_MASK, %l7
862 wrpr %l7, WSTATE_PROM_KMIX, %wstate
863 wrpr %l5, 0, %tba ! restore the OFW trap table
864 SET(ofw_vec, %l7, %l6)
866 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
867 sub %l0, SPOFF, %fp ! setup a stack in a locked page
868 sub %l0, SPOFF + CCFSZ, %sp
869 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
870 sethi %hi(KERNBASE), %l5
871 stxa %g0, [%l3] ASI_DMMU
873 wrpr %g0, 0, %tl ! force trap level 0
897 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
901 #define GMON_PROF_OFF 3
902 #define GMON_PROF_HIRES 4
905 .set _mcount, __cyg_profile_func_enter
907 ENTRY(__cyg_profile_func_enter)
908 SET(_gmonparam, %o3, %o2)
909 lduw [%o2 + GM_STATE], %o3
910 cmp %o3, GMON_PROF_OFF
913 SET(mcount, %o3, %o2)
918 END(__cyg_profile_func_enter)
922 ENTRY(__cyg_profile_func_exit)
923 SET(_gmonparam, %o3, %o2)
924 lduw [%o2 + GM_STATE], %o3
925 cmp %o3, GMON_PROF_HIRES
928 SET(mexitcount, %o3, %o2)
933 END(__cyg_profile_func_exit)