2 * Copyright (c) 2001 Jake Burkholder.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/errno.h>
32 #include <machine/asi.h>
33 #include <machine/asmacros.h>
34 #include <machine/fsr.h>
35 #include <machine/intr_machdep.h>
36 #include <machine/ktr.h>
37 #include <machine/pcb.h>
38 #include <machine/pstate.h>
42 .register %g2, #ignore
43 .register %g3, #ignore
44 .register %g6, #ignore
49 * Generate load and store instructions for the corresponding width and asi
50 * (or not). Note that we want to evaluate the macro args before
51 * concatenating, so that E really turns into nothing.
53 #define _LD(w, a) ld ## w ## a
54 #define _ST(w, a) st ## w ## a
56 #define LD(w, a) _LD(w, a)
57 #define ST(w, a) _ST(w, a)
60 * Common code for copy routines.
62 * We use large macros to generate functions for each of the copy routines.
63 * This allows the load and store instructions to be generated for the right
64 * operation, asi or not. It is possible to write an asi independent function
65 * but this would require 2 expensive wrs in the main loop to switch %asi.
66 * It would also screw up profiling (if we ever get it), but may save some I$.
67 * We assume that either one of dasi and sasi is empty, or that they are both
68 * the same (empty or non-empty). It is up to the caller to set %asi.
72 * ASI independent implementation of copystr(9).
73 * Used to implement copyinstr() and copystr().
75 * Return value is in %g1.
77 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
83 LD(ub, sa) [src] sasi, %g1 ; \
84 ST(b, da) %g1, [dst] dasi ; \
89 2: mov ENAMETOOLONG, %g1 ; \
90 3: sub src, %g2, %g2 ; \
96 * ASI independent implementation of memset(3).
97 * Used to implement bzero(), memset() and aszero().
99 * If the pattern is non-zero, duplicate it to fill 64 bits.
100 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
101 * It has yet to be determined how much unrolling is beneficial.
102 * Could also read and compare before writing to minimize snoop traffic.
104 * XXX bzero() should be implemented as
105 * #define bzero(dst, len) (void)memset((dst), 0, (len))
108 #define _MEMSET(dst, pat, len, da, dasi) \
110 and pat, 0xff, pat ; \
114 sllx pat, 16, %g1 ; \
116 sllx pat, 32, %g1 ; \
124 ST(b, da) pat, [dst] dasi ; \
131 ST(x, da) pat, [dst] dasi ; \
132 ST(x, da) pat, [dst + 8] dasi ; \
133 ST(x, da) pat, [dst + 16] dasi ; \
134 ST(x, da) pat, [dst + 24] dasi ; \
141 ST(x, da) pat, [dst] dasi ; \
148 ST(b, da) pat, [dst] dasi ; \
154 * ASI independent implementation of memcpy(3).
155 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
156 * ascopyfrom() and ascopyto().
158 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
159 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
160 * case could be optimized, but it is expected that this is the uncommon
161 * case and of questionable value. The code to do so is also rather large
162 * and ugly. It has yet to be determined how much unrolling is beneficial.
164 * XXX bcopy() must also check for overlap. This is stupid.
165 * XXX bcopy() should be implemented as
166 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
169 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
175 LD(ub, sa) [src] sasi, %g1 ; \
176 ST(b, da) %g1, [dst] dasi ; \
189 LD(x, sa) [src] sasi, %g1 ; \
190 LD(x, sa) [src + 8] sasi, %g2 ; \
191 LD(x, sa) [src + 16] sasi, %g3 ; \
192 LD(x, sa) [src + 24] sasi, %g4 ; \
193 ST(x, da) %g1, [dst] dasi ; \
194 ST(x, da) %g2, [dst + 8] dasi ; \
195 ST(x, da) %g3, [dst + 16] dasi ; \
196 ST(x, da) %g4, [dst + 24] dasi ; \
204 LD(x, sa) [src] sasi, %g1 ; \
205 ST(x, da) %g1, [dst] dasi ; \
213 LD(ub, sa) [src] sasi, %g1 ; \
214 ST(b, da) %g1, [dst] dasi ; \
221 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
225 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
231 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
235 _MEMCPY(%o2, %o1, %o3, E, E, a, %asi)
241 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
245 _MEMCPY(%o2, %o0, %o3, a, %asi, E, E)
251 * void aszero(u_long asi, vm_offset_t pa, size_t len)
255 _MEMSET(%o1, %g0, %o2, a, %asi)
261 * int bcmp(const void *b1, const void *b2, size_t len)
266 1: ldub [%o0 + %o3], %o4
267 ldub [%o1 + %o3], %o5
279 * void bcopy(const void *src, void *dst, size_t len)
283 * Check for overlap, and copy backwards if so.
307 * Do the fast version.
309 3: _MEMCPY(%o1, %o0, %o2, E, E, E, E)
315 * void bzero(void *b, size_t len)
318 _MEMSET(%o0, %g0, %o1, E, E)
324 * int copystr(const void *src, void *dst, size_t len, size_t *done)
327 _COPYSTR(%o0, %o1, %o2, %o3, E, E, E, E)
333 * void *memcpy(void *dst, const void *src, size_t len)
337 _MEMCPY(%o3, %o1, %o2, E, E, E, E)
343 * void *memset(void *b, int c, size_t len)
347 _MEMSET(%o3, %o1, %o2, E, E)
352 .globl copy_nofault_begin
357 * int copyin(const void *uaddr, void *kaddr, size_t len)
360 wr %g0, ASI_AIUP, %asi
361 _MEMCPY(%o1, %o0, %o2, E, E, a, %asi)
367 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
370 wr %g0, ASI_AIUP, %asi
371 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, E, E)
377 * int copyout(const void *kaddr, void *uaddr, size_t len)
380 wr %g0, ASI_AIUP, %asi
381 _MEMCPY(%o1, %o0, %o2, a, %asi, E, E)
386 .globl copy_nofault_end
395 .globl fs_nofault_begin
400 * Chatty aliases for fetch, store functions.
402 .globl fubyte, fusword, fuword, subyte, susword, suword
404 .set fusword, fuword16
405 .set fuword, fuword64
407 .set susword, suword16
408 .set suword, suword64
410 .globl casuword32, casuword, fuptr, suptr
411 .set casuword, casuword64
416 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
419 casa [%o0] ASI_AIUP, %o1, %o2
425 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
428 casxa [%o0] ASI_AIUP, %o1, %o2
434 * int fuword8(const void *base)
438 lduba [%o0] ASI_AIUP, %o0
442 * int fuword16(const void *base)
446 lduha [%o0] ASI_AIUP, %o0
450 * int32_t fuword32(const void *base)
454 lduwa [%o0] ASI_AIUP, %o0
458 * int64_t fuword64(const void *base)
462 ldxa [%o0] ASI_AIUP, %o0
466 * int suword8(const void *base, int word)
469 stba %o1, [%o0] ASI_AIUP
475 * int suword16(const void *base, int word)
478 stha %o1, [%o0] ASI_AIUP
484 * int suword32(const void *base, int32_t word)
487 stwa %o1, [%o0] ASI_AIUP
493 * int suword64(const void *base, int64_t word)
496 stxa %o1, [%o0] ASI_AIUP
501 .globl fs_nofault_intr_begin
502 fs_nofault_intr_begin:
506 * int fuswintr(const void *base)
510 lduha [%o0] ASI_AIUP, %o0
514 * int suswintr(const void *base, int word)
517 stha %o1, [%o0] ASI_AIUP
522 .globl fs_nofault_intr_end
526 .globl fs_nofault_end
535 .globl fas_nofault_begin
539 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
544 lduba [%o1] %asi, %o3
552 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
557 lduha [%o1] %asi, %o3
565 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
570 lduwa [%o1] %asi, %o3
577 .globl fas_nofault_end
587 .globl fpu_fault_begin
592 * void spitfire_block_copy(void *src, void *dst, size_t len)
594 ENTRY(spitfire_block_copy)
596 wrpr %g0, PIL_TICK, %pil
598 wr %g0, ASI_BLK_S, %asi
599 wr %g0, FPRS_FEF, %fprs
601 sub PCB_REG, TF_SIZEOF, %o4
602 ldx [%o4 + TF_FPRS], %o5
603 andcc %o5, FPRS_FEF, %g0
606 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
607 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
608 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
609 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
612 andn %o5, FPRS_FEF, %o5
613 stx %o5, [%o4 + TF_FPRS]
614 ldx [PCB_REG + PCB_FLAGS], %o4
616 stx %o4, [PCB_REG + PCB_FLAGS]
624 2: ldda [%o0] %asi, %f16
633 stda %f32, [%o1] %asi
647 stda %f32, [%o1] %asi
655 stda %f16, [%o1] %asi
660 END(spitfire_block_copy)
663 * void spitfire_block_zero(void *dst, size_t len)
665 ENTRY(spitfire_block_zero)
667 wrpr %g0, PIL_TICK, %pil
669 wr %g0, ASI_BLK_S, %asi
670 wr %g0, FPRS_FEF, %fprs
672 sub PCB_REG, TF_SIZEOF, %o4
673 ldx [%o4 + TF_FPRS], %o5
674 andcc %o5, FPRS_FEF, %g0
677 stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
678 stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
679 stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
680 stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
683 andn %o5, FPRS_FEF, %o5
684 stx %o5, [%o4 + TF_FPRS]
685 ldx [PCB_REG + PCB_FLAGS], %o4
687 stx %o4, [PCB_REG + PCB_FLAGS]
700 1: stda %f0, [%o0] %asi
701 stda %f0, [%o0 + 64] %asi
702 stda %f0, [%o0 + 128] %asi
703 stda %f0, [%o0 + 192] %asi
711 END(spitfire_block_zero)
717 .globl fpu_fault_size
718 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
724 ldx [%g1 + _JB_FP], %g2
729 ldx [%g1 + _JB_SP], %o2
733 ldx [%g1 + _JB_PC], %o7
736 2: PANIC("longjmp botch", %l1)
740 stx %sp, [%o0 + _JB_SP]
741 stx %o7, [%o0 + _JB_PC]
742 stx %fp, [%o0 + _JB_FP]
748 * void ofw_entry(cell_t args[])
751 save %sp, -CCFSZ, %sp
752 SET(ofw_vec, %l7, %l6)
755 wrpr %g0, PIL_TICK, %pil
760 restore %o0, %g0, %o0
764 * void ofw_exit(cell_t args[])
767 save %sp, -CCFSZ, %sp
769 wrpr %g0, PIL_TICK, %pil
770 SET(ofw_tba, %l7, %l5)
772 wrpr %l5, 0, %tba ! restore the OFW trap table
773 SET(ofw_vec, %l7, %l6)
775 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
776 sub %l0, SPOFF, %fp ! setup a stack in a locked page
777 sub %l0, SPOFF + CCFSZ, %sp
778 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
779 sethi %hi(KERNBASE), %l5
780 stxa %g0, [%l3] ASI_DMMU
782 wrpr %g0, 0, %tl ! force trap level 0
806 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
810 #define GMON_PROF_OFF 3
811 #define GMON_PROF_HIRES 4
814 .set _mcount, __cyg_profile_func_enter
816 ENTRY(__cyg_profile_func_enter)
817 SET(_gmonparam, %o3, %o2)
818 lduw [%o2 + GM_STATE], %o3
819 cmp %o3, GMON_PROF_OFF
822 SET(mcount, %o3, %o2)
827 END(__cyg_profile_func_enter)
831 ENTRY(__cyg_profile_func_exit)
832 SET(_gmonparam, %o3, %o2)
833 lduw [%o2 + GM_STATE], %o3
834 cmp %o3, GMON_PROF_HIRES
837 SET(mexitcount, %o3, %o2)
842 END(__cyg_profile_func_exit)