2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/errno.h>
32 #include <machine/asi.h>
33 #include <machine/asmacros.h>
34 #include <machine/fsr.h>
35 #include <machine/intr_machdep.h>
36 #include <machine/ktr.h>
37 #include <machine/pcb.h>
38 #include <machine/pstate.h>
39 #include <machine/wstate.h>
43 .register %g2, #ignore
44 .register %g3, #ignore
45 .register %g6, #ignore
48 * Common code for copy routines.
50 * We use large macros to generate functions for each of the copy routines.
51 * This allows the load and store instructions to be generated for the right
52 * operation, asi or not. It is possible to write an asi independent function
53 * but this would require 2 expensive wrs in the main loop to switch %asi.
54 * It would also screw up profiling (if we ever get it), but may save some I$.
55 * We assume that either one of dasi and sasi is empty, or that they are both
56 * the same (empty or non-empty). It is up to the caller to set %asi.
60 * ASI independent implementation of copystr(9).
61 * Used to implement copyinstr() and copystr().
63 * Return value is in %g1.
65 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
71 LD(ub, sa) [src] sasi, %g1 ; \
72 ST(b, da) %g1, [dst] dasi ; \
77 2: mov ENAMETOOLONG, %g1 ; \
78 3: sub src, %g2, %g2 ; \
84 * ASI independent implementation of memset(3).
85 * Used to implement bzero(), memset() and aszero().
87 * If the pattern is non-zero, duplicate it to fill 64 bits.
88 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
89 * It has yet to be determined how much unrolling is beneficial.
90 * Could also read and compare before writing to minimize snoop traffic.
92 * XXX bzero() should be implemented as
93 * #define bzero(dst, len) (void)memset((dst), 0, (len))
96 #define _MEMSET(dst, pat, len, da, dasi) \
98 and pat, 0xff, pat ; \
102 sllx pat, 16, %g1 ; \
104 sllx pat, 32, %g1 ; \
112 ST(b, da) pat, [dst] dasi ; \
119 ST(x, da) pat, [dst] dasi ; \
120 ST(x, da) pat, [dst + 8] dasi ; \
121 ST(x, da) pat, [dst + 16] dasi ; \
122 ST(x, da) pat, [dst + 24] dasi ; \
129 ST(x, da) pat, [dst] dasi ; \
136 ST(b, da) pat, [dst] dasi ; \
142 * ASI independent implementation of memcpy(3).
143 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
144 * ascopyfrom() and ascopyto().
146 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
147 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
148 * case could be optimized, but it is expected that this is the uncommon
149 * case and of questionable value. The code to do so is also rather large
150 * and ugly. It has yet to be determined how much unrolling is beneficial.
152 * XXX bcopy() must also check for overlap. This is stupid.
153 * XXX bcopy() should be implemented as
154 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
157 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
163 LD(ub, sa) [src] sasi, %g1 ; \
164 ST(b, da) %g1, [dst] dasi ; \
177 LD(x, sa) [src] sasi, %g1 ; \
178 LD(x, sa) [src + 8] sasi, %g2 ; \
179 LD(x, sa) [src + 16] sasi, %g3 ; \
180 LD(x, sa) [src + 24] sasi, %g4 ; \
181 ST(x, da) %g1, [dst] dasi ; \
182 ST(x, da) %g2, [dst + 8] dasi ; \
183 ST(x, da) %g3, [dst + 16] dasi ; \
184 ST(x, da) %g4, [dst + 24] dasi ; \
192 LD(x, sa) [src] sasi, %g1 ; \
193 ST(x, da) %g1, [dst] dasi ; \
201 LD(ub, sa) [src] sasi, %g1 ; \
202 ST(b, da) %g1, [dst] dasi ; \
209 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
213 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
219 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
223 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
229 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
233 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
239 * void aszero(u_long asi, vm_offset_t pa, size_t len)
243 _MEMSET(%o1, %g0, %o2, a, %asi)
249 * int bcmp(const void *b1, const void *b2, size_t len)
254 1: ldub [%o0 + %o3], %o4
255 ldub [%o1 + %o3], %o5
267 * void bcopy(const void *src, void *dst, size_t len)
271 * Check for overlap, and copy backwards if so.
295 * Do the fast version.
297 3: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
303 * void bzero(void *b, size_t len)
306 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
312 * int copystr(const void *src, void *dst, size_t len, size_t *done)
315 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
321 * void *memcpy(void *dst, const void *src, size_t len)
325 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
331 * void *memset(void *b, int c, size_t len)
335 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
340 .globl copy_nofault_begin
345 * int copyin(const void *uaddr, void *kaddr, size_t len)
348 wr %g0, ASI_AIUP, %asi
349 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
355 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
358 wr %g0, ASI_AIUP, %asi
359 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
365 * int copyout(const void *kaddr, void *uaddr, size_t len)
368 wr %g0, ASI_AIUP, %asi
369 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
374 .globl copy_nofault_end
383 .globl fs_nofault_begin
388 * Chatty aliases for fetch, store functions.
390 .globl fubyte, fusword, fuword, subyte, susword, suword
392 .set fusword, fuword16
393 .set fuword, fuword64
395 .set susword, suword16
396 .set suword, suword64
398 .globl casuword32, casuword, fuptr, suptr
399 .set casuword, casuword64
404 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
407 casa [%o0] ASI_AIUP, %o1, %o2
413 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
416 casxa [%o0] ASI_AIUP, %o1, %o2
422 * int fuword8(const void *base)
426 lduba [%o0] ASI_AIUP, %o0
430 * int fuword16(const void *base)
434 lduha [%o0] ASI_AIUP, %o0
438 * int32_t fuword32(const void *base)
442 lduwa [%o0] ASI_AIUP, %o0
446 * int64_t fuword64(const void *base)
450 ldxa [%o0] ASI_AIUP, %o0
454 * int suword8(const void *base, int word)
457 stba %o1, [%o0] ASI_AIUP
463 * int suword16(const void *base, int word)
466 stha %o1, [%o0] ASI_AIUP
472 * int suword32(const void *base, int32_t word)
475 stwa %o1, [%o0] ASI_AIUP
481 * int suword64(const void *base, int64_t word)
484 stxa %o1, [%o0] ASI_AIUP
489 .globl fs_nofault_intr_begin
490 fs_nofault_intr_begin:
494 * int fuswintr(const void *base)
498 lduha [%o0] ASI_AIUP, %o0
502 * int suswintr(const void *base, int word)
505 stha %o1, [%o0] ASI_AIUP
510 .globl fs_nofault_intr_end
514 .globl fs_nofault_end
523 .globl fas_nofault_begin
527 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
532 lduba [%o1] %asi, %o3
540 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
545 lduha [%o1] %asi, %o3
553 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
558 lduwa [%o1] %asi, %o3
565 .globl fas_nofault_end
575 .globl fpu_fault_begin
580 * void spitfire_block_copy(void *src, void *dst, size_t len)
582 ENTRY(spitfire_block_copy)
584 wrpr %g0, PSTATE_NORMAL, %pstate
586 wr %g0, ASI_BLK_S, %asi
587 wr %g0, FPRS_FEF, %fprs
589 sub PCB_REG, TF_SIZEOF, %o4
590 ldx [%o4 + TF_FPRS], %o5
591 andcc %o5, FPRS_FEF, %g0
594 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
595 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
596 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
597 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
600 andn %o5, FPRS_FEF, %o5
601 stx %o5, [%o4 + TF_FPRS]
602 ldx [PCB_REG + PCB_FLAGS], %o4
604 stx %o4, [PCB_REG + PCB_FLAGS]
606 1: wrpr %o3, 0, %pstate
612 2: ldda [%o0] %asi, %f16
621 stda %f32, [%o1] %asi
635 stda %f32, [%o1] %asi
643 stda %f16, [%o1] %asi
648 END(spitfire_block_copy)
651 * void zeus_block_copy(void *src, void *dst, size_t len)
653 ENTRY(zeus_block_copy)
654 prefetch [%o0 + (0 * 64)], 0
657 wrpr %g0, PSTATE_NORMAL, %pstate
659 wr %g0, ASI_BLK_S, %asi
660 wr %g0, FPRS_FEF, %fprs
662 sub PCB_REG, TF_SIZEOF, %o4
663 ldx [%o4 + TF_FPRS], %o5
664 andcc %o5, FPRS_FEF, %g0
667 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
668 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
669 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
670 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
673 andn %o5, FPRS_FEF, %o5
674 stx %o5, [%o4 + TF_FPRS]
675 ldx [PCB_REG + PCB_FLAGS], %o4
677 stx %o4, [PCB_REG + PCB_FLAGS]
679 1: wrpr %o3, 0, %pstate
681 ldd [%o0 + (0 * 8)], %f0
682 prefetch [%o0 + (1 * 64)], 0
683 ldd [%o0 + (1 * 8)], %f2
684 prefetch [%o0 + (2 * 64)], 0
686 ldd [%o0 + (2 * 8)], %f4
687 prefetch [%o0 + (3 * 64)], 0
689 ldd [%o0 + (3 * 8)], %f6
690 prefetch [%o0 + (4 * 64)], 1
692 ldd [%o0 + (4 * 8)], %f8
693 prefetch [%o0 + (8 * 64)], 1
695 ldd [%o0 + (5 * 8)], %f10
696 prefetch [%o0 + (12 * 64)], 1
698 ldd [%o0 + (6 * 8)], %f12
699 prefetch [%o0 + (16 * 64)], 1
701 ldd [%o0 + (7 * 8)], %f14
702 ldd [%o0 + (8 * 8)], %f0
705 prefetch [%o0 + (19 * 64)], 1
707 prefetch [%o0 + (23 * 64)], 1
710 2: ldd [%o0 + (1 * 8)], %f2
712 ldd [%o0 + (2 * 8)], %f4
714 stda %f32, [%o1] %asi
715 ldd [%o0 + (3 * 8)], %f6
717 ldd [%o0 + (4 * 8)], %f8
719 ldd [%o0 + (5 * 8)], %f10
721 ldd [%o0 + (6 * 8)], %f12
723 ldd [%o0 + (7 * 8)], %f14
725 ldd [%o0 + (8 * 8)], %f0
728 prefetch [%o0 + (3 * 64)], 0
730 prefetch [%o0 + (24 * 64)], 1
734 prefetch [%o0 + (12 * 64)], 1
735 ldd [%o0 + (1 * 8)], %f2
737 ldd [%o0 + (2 * 8)], %f4
739 stda %f32, [%o1] %asi
740 ldd [%o0 + (3 * 8)], %f6
742 ldd [%o0 + (4 * 8)], %f8
744 ldd [%o0 + (5 * 8)], %f10
746 ldd [%o0 + (6 * 8)], %f12
748 ldd [%o0 + (7 * 8)], %f14
754 stda %f32, [%o1] %asi
762 * void spitfire_block_zero(void *dst, size_t len)
763 * void zeus_block_zero(void *dst, size_t len)
765 ALTENTRY(zeus_block_zero)
766 ENTRY(spitfire_block_zero)
768 wrpr %g0, PSTATE_NORMAL, %pstate
770 wr %g0, ASI_BLK_S, %asi
771 wr %g0, FPRS_FEF, %fprs
773 sub PCB_REG, TF_SIZEOF, %o4
774 ldx [%o4 + TF_FPRS], %o5
775 andcc %o5, FPRS_FEF, %g0
778 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
779 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
780 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
781 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
784 andn %o5, FPRS_FEF, %o5
785 stx %o5, [%o4 + TF_FPRS]
786 ldx [PCB_REG + PCB_FLAGS], %o4
788 stx %o4, [PCB_REG + PCB_FLAGS]
790 1: wrpr %o3, 0, %pstate
801 1: stda %f0, [%o0] %asi
802 stda %f0, [%o0 + 64] %asi
803 stda %f0, [%o0 + 128] %asi
804 stda %f0, [%o0 + 192] %asi
812 END(spitfire_block_zero)
818 .globl fpu_fault_size
819 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
825 ldx [%g1 + _JB_FP], %g2
830 ldx [%g1 + _JB_SP], %o2
834 ldx [%g1 + _JB_PC], %o7
837 2: PANIC("longjmp botch", %l1)
841 stx %sp, [%o0 + _JB_SP]
842 stx %o7, [%o0 + _JB_PC]
843 stx %fp, [%o0 + _JB_FP]
849 * void ofw_entry(cell_t args[])
852 save %sp, -CCFSZ, %sp
853 SET(ofw_vec, %l7, %l6)
856 andn %l7, PSTATE_AM | PSTATE_IE, %l5
858 SET(tba_taken_over, %l5, %l4)
861 andn %l5, WSTATE_PROM_MASK, %l3
862 wrpr %l3, WSTATE_PROM_KMIX, %wstate
867 wrpr %g0, %l5, %wstate
868 1: wrpr %l7, 0, %pstate
870 restore %o0, %g0, %o0
874 * void ofw_exit(cell_t args[])
877 save %sp, -CCFSZ, %sp
879 SET(ofw_tba, %l7, %l5)
882 andn %l7, PSTATE_AM | PSTATE_IE, %l7
885 andn %l7, WSTATE_PROM_MASK, %l7
886 wrpr %l7, WSTATE_PROM_KMIX, %wstate
887 wrpr %l5, 0, %tba ! restore the OFW trap table
888 SET(ofw_vec, %l7, %l6)
890 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
891 sub %l0, SPOFF, %fp ! setup a stack in a locked page
892 sub %l0, SPOFF + CCFSZ, %sp
893 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
894 sethi %hi(KERNBASE), %l5
895 stxa %g0, [%l3] ASI_DMMU
897 wrpr %g0, 0, %tl ! force trap level 0
921 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
925 #define GMON_PROF_OFF 3
926 #define GMON_PROF_HIRES 4
929 .set _mcount, __cyg_profile_func_enter
931 ENTRY(__cyg_profile_func_enter)
932 SET(_gmonparam, %o3, %o2)
933 lduw [%o2 + GM_STATE], %o3
934 cmp %o3, GMON_PROF_OFF
937 SET(mcount, %o3, %o2)
942 END(__cyg_profile_func_enter)
946 ENTRY(__cyg_profile_func_exit)
947 SET(_gmonparam, %o3, %o2)
948 lduw [%o2 + GM_STATE], %o3
949 cmp %o3, GMON_PROF_HIRES
952 SET(mexitcount, %o3, %o2)
957 END(__cyg_profile_func_exit)