2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/errno.h>
32 #include <machine/asi.h>
33 #include <machine/asmacros.h>
34 #include <machine/fsr.h>
35 #include <machine/intr_machdep.h>
36 #include <machine/ktr.h>
37 #include <machine/pcb.h>
38 #include <machine/pstate.h>
39 #include <machine/wstate.h>
43 .register %g2, #ignore
44 .register %g3, #ignore
45 .register %g6, #ignore
50 * Generate load and store instructions for the corresponding width and asi
51 * (or not). Note that we want to evaluate the macro args before
52 * concatenating, so that E really turns into nothing.
54 #define _LD(w, a) ld ## w ## a
55 #define _ST(w, a) st ## w ## a
57 #define LD(w, a) _LD(w, a)
58 #define ST(w, a) _ST(w, a)
61 * Common code for copy routines.
63 * We use large macros to generate functions for each of the copy routines.
64 * This allows the load and store instructions to be generated for the right
65 * operation, asi or not. It is possible to write an asi independent function
66 * but this would require 2 expensive wrs in the main loop to switch %asi.
67 * It would also screw up profiling (if we ever get it), but may save some I$.
68 * We assume that either one of dasi and sasi is empty, or that they are both
69 * the same (empty or non-empty). It is up to the caller to set %asi.
73 * ASI independent implementation of copystr(9).
74 * Used to implement copyinstr() and copystr().
76 * Return value is in %g1.
78 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
84 LD(ub, sa) [src] sasi, %g1 ; \
85 ST(b, da) %g1, [dst] dasi ; \
90 2: mov ENAMETOOLONG, %g1 ; \
91 3: sub src, %g2, %g2 ; \
97 * ASI independent implementation of memset(3).
98 * Used to implement bzero(), memset() and aszero().
100 * If the pattern is non-zero, duplicate it to fill 64 bits.
101 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
102 * It has yet to be determined how much unrolling is beneficial.
103 * Could also read and compare before writing to minimize snoop traffic.
105 * XXX bzero() should be implemented as
106 * #define bzero(dst, len) (void)memset((dst), 0, (len))
109 #define _MEMSET(dst, pat, len, da, dasi) \
111 and pat, 0xff, pat ; \
115 sllx pat, 16, %g1 ; \
117 sllx pat, 32, %g1 ; \
125 ST(b, da) pat, [dst] dasi ; \
132 ST(x, da) pat, [dst] dasi ; \
133 ST(x, da) pat, [dst + 8] dasi ; \
134 ST(x, da) pat, [dst + 16] dasi ; \
135 ST(x, da) pat, [dst + 24] dasi ; \
142 ST(x, da) pat, [dst] dasi ; \
149 ST(b, da) pat, [dst] dasi ; \
155 * ASI independent implementation of memcpy(3).
156 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
157 * ascopyfrom() and ascopyto().
159 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
160 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
161 * case could be optimized, but it is expected that this is the uncommon
162 * case and of questionable value. The code to do so is also rather large
163 * and ugly. It has yet to be determined how much unrolling is beneficial.
165 * XXX bcopy() must also check for overlap. This is stupid.
166 * XXX bcopy() should be implemented as
167 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
170 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
176 LD(ub, sa) [src] sasi, %g1 ; \
177 ST(b, da) %g1, [dst] dasi ; \
190 LD(x, sa) [src] sasi, %g1 ; \
191 LD(x, sa) [src + 8] sasi, %g2 ; \
192 LD(x, sa) [src + 16] sasi, %g3 ; \
193 LD(x, sa) [src + 24] sasi, %g4 ; \
194 ST(x, da) %g1, [dst] dasi ; \
195 ST(x, da) %g2, [dst + 8] dasi ; \
196 ST(x, da) %g3, [dst + 16] dasi ; \
197 ST(x, da) %g4, [dst + 24] dasi ; \
205 LD(x, sa) [src] sasi, %g1 ; \
206 ST(x, da) %g1, [dst] dasi ; \
214 LD(ub, sa) [src] sasi, %g1 ; \
215 ST(b, da) %g1, [dst] dasi ; \
222 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
226 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
232 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
236 _MEMCPY(%o2, %o1, %o3, E, E, a, %asi)
242 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
246 _MEMCPY(%o2, %o0, %o3, a, %asi, E, E)
252 * void aszero(u_long asi, vm_offset_t pa, size_t len)
256 _MEMSET(%o1, %g0, %o2, a, %asi)
262 * int bcmp(const void *b1, const void *b2, size_t len)
267 1: ldub [%o0 + %o3], %o4
268 ldub [%o1 + %o3], %o5
280 * void bcopy(const void *src, void *dst, size_t len)
284 * Check for overlap, and copy backwards if so.
308 * Do the fast version.
310 3: _MEMCPY(%o1, %o0, %o2, E, E, E, E)
316 * void bzero(void *b, size_t len)
319 _MEMSET(%o0, %g0, %o1, E, E)
325 * int copystr(const void *src, void *dst, size_t len, size_t *done)
328 _COPYSTR(%o0, %o1, %o2, %o3, E, E, E, E)
334 * void *memcpy(void *dst, const void *src, size_t len)
338 _MEMCPY(%o3, %o1, %o2, E, E, E, E)
344 * void *memset(void *b, int c, size_t len)
348 _MEMSET(%o3, %o1, %o2, E, E)
353 .globl copy_nofault_begin
358 * int copyin(const void *uaddr, void *kaddr, size_t len)
361 wr %g0, ASI_AIUP, %asi
362 _MEMCPY(%o1, %o0, %o2, E, E, a, %asi)
368 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
371 wr %g0, ASI_AIUP, %asi
372 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, E, E)
378 * int copyout(const void *kaddr, void *uaddr, size_t len)
381 wr %g0, ASI_AIUP, %asi
382 _MEMCPY(%o1, %o0, %o2, a, %asi, E, E)
387 .globl copy_nofault_end
396 .globl fs_nofault_begin
401 * Chatty aliases for fetch, store functions.
403 .globl fubyte, fusword, fuword, subyte, susword, suword
405 .set fusword, fuword16
406 .set fuword, fuword64
408 .set susword, suword16
409 .set suword, suword64
411 .globl casuword32, casuword, fuptr, suptr
412 .set casuword, casuword64
417 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
420 casa [%o0] ASI_AIUP, %o1, %o2
426 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
429 casxa [%o0] ASI_AIUP, %o1, %o2
435 * int fuword8(const void *base)
439 lduba [%o0] ASI_AIUP, %o0
443 * int fuword16(const void *base)
447 lduha [%o0] ASI_AIUP, %o0
451 * int32_t fuword32(const void *base)
455 lduwa [%o0] ASI_AIUP, %o0
459 * int64_t fuword64(const void *base)
463 ldxa [%o0] ASI_AIUP, %o0
467 * int suword8(const void *base, int word)
470 stba %o1, [%o0] ASI_AIUP
476 * int suword16(const void *base, int word)
479 stha %o1, [%o0] ASI_AIUP
485 * int suword32(const void *base, int32_t word)
488 stwa %o1, [%o0] ASI_AIUP
494 * int suword64(const void *base, int64_t word)
497 stxa %o1, [%o0] ASI_AIUP
502 .globl fs_nofault_intr_begin
503 fs_nofault_intr_begin:
507 * int fuswintr(const void *base)
511 lduha [%o0] ASI_AIUP, %o0
515 * int suswintr(const void *base, int word)
518 stha %o1, [%o0] ASI_AIUP
523 .globl fs_nofault_intr_end
527 .globl fs_nofault_end
536 .globl fas_nofault_begin
540 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
545 lduba [%o1] %asi, %o3
553 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
558 lduha [%o1] %asi, %o3
566 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
571 lduwa [%o1] %asi, %o3
578 .globl fas_nofault_end
588 .globl fpu_fault_begin
593 * void spitfire_block_copy(void *src, void *dst, size_t len)
595 ENTRY(spitfire_block_copy)
597 wrpr %g0, PIL_TICK, %pil
599 wr %g0, ASI_BLK_S, %asi
600 wr %g0, FPRS_FEF, %fprs
602 sub PCB_REG, TF_SIZEOF, %o4
603 ldx [%o4 + TF_FPRS], %o5
604 andcc %o5, FPRS_FEF, %g0
607 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
608 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
609 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
610 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
613 andn %o5, FPRS_FEF, %o5
614 stx %o5, [%o4 + TF_FPRS]
615 ldx [PCB_REG + PCB_FLAGS], %o4
617 stx %o4, [PCB_REG + PCB_FLAGS]
625 2: ldda [%o0] %asi, %f16
634 stda %f32, [%o1] %asi
648 stda %f32, [%o1] %asi
656 stda %f16, [%o1] %asi
661 END(spitfire_block_copy)
664 * void spitfire_block_zero(void *dst, size_t len)
666 ENTRY(spitfire_block_zero)
668 wrpr %g0, PIL_TICK, %pil
670 wr %g0, ASI_BLK_S, %asi
671 wr %g0, FPRS_FEF, %fprs
673 sub PCB_REG, TF_SIZEOF, %o4
674 ldx [%o4 + TF_FPRS], %o5
675 andcc %o5, FPRS_FEF, %g0
678 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
679 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
680 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
681 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
684 andn %o5, FPRS_FEF, %o5
685 stx %o5, [%o4 + TF_FPRS]
686 ldx [PCB_REG + PCB_FLAGS], %o4
688 stx %o4, [PCB_REG + PCB_FLAGS]
701 1: stda %f0, [%o0] %asi
702 stda %f0, [%o0 + 64] %asi
703 stda %f0, [%o0 + 128] %asi
704 stda %f0, [%o0 + 192] %asi
712 END(spitfire_block_zero)
718 .globl fpu_fault_size
719 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
725 ldx [%g1 + _JB_FP], %g2
730 ldx [%g1 + _JB_SP], %o2
734 ldx [%g1 + _JB_PC], %o7
737 2: PANIC("longjmp botch", %l1)
741 stx %sp, [%o0 + _JB_SP]
742 stx %o7, [%o0 + _JB_PC]
743 stx %fp, [%o0 + _JB_FP]
749 * void ofw_entry(cell_t args[])
752 save %sp, -CCFSZ, %sp
753 SET(ofw_vec, %l7, %l6)
756 andn %l7, PSTATE_AM | PSTATE_IE, %l5
758 SET(tba_taken_over, %l5, %l4)
761 andn %l5, WSTATE_PROM_MASK, %l3
762 wrpr %l3, WSTATE_PROM_KMIX, %wstate
767 wrpr %g0, %l5, %wstate
768 1: wrpr %l7, 0, %pstate
770 restore %o0, %g0, %o0
774 * void ofw_exit(cell_t args[])
777 save %sp, -CCFSZ, %sp
779 SET(ofw_tba, %l7, %l5)
782 andn %l7, PSTATE_AM | PSTATE_IE, %l7
785 andn %l7, WSTATE_PROM_MASK, %l7
786 wrpr %l7, WSTATE_PROM_KMIX, %wstate
787 wrpr %l5, 0, %tba ! restore the OFW trap table
788 SET(ofw_vec, %l7, %l6)
790 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
791 sub %l0, SPOFF, %fp ! setup a stack in a locked page
792 sub %l0, SPOFF + CCFSZ, %sp
793 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
794 sethi %hi(KERNBASE), %l5
795 stxa %g0, [%l3] ASI_DMMU
797 wrpr %g0, 0, %tl ! force trap level 0
821 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
825 #define GMON_PROF_OFF 3
826 #define GMON_PROF_HIRES 4
829 .set _mcount, __cyg_profile_func_enter
831 ENTRY(__cyg_profile_func_enter)
832 SET(_gmonparam, %o3, %o2)
833 lduw [%o2 + GM_STATE], %o3
834 cmp %o3, GMON_PROF_OFF
837 SET(mcount, %o3, %o2)
842 END(__cyg_profile_func_enter)
846 ENTRY(__cyg_profile_func_exit)
847 SET(_gmonparam, %o3, %o2)
848 lduw [%o2 + GM_STATE], %o3
849 cmp %o3, GMON_PROF_HIRES
852 SET(mexitcount, %o3, %o2)
857 END(__cyg_profile_func_exit)