4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http: //www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
28 #include <machine/asm.h>
31 #include <machine/asi.h>
32 #include <machine/asmacros.h>
33 #include <machine/ktr.h>
34 #include <machine/pstate.h>
35 #include <machine/trap.h>
36 #include <machine/tstate.h>
37 #include <machine/wstate.h>
38 #include <machine/hypervisorvar.h>
47 * This define is to align data for the unaligned source cases.
48 * The data1, data2 and data3 is merged into data1 and data2.
49 * The data3 is preserved for next merge.
51 #define ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp) \
52 sllx data1, lshift, data1 ;\
53 srlx data2, rshift, tmp ;\
54 or data1, tmp, data1 ;\
55 sllx data2, lshift, data2 ;\
56 srlx data3, rshift, tmp ;\
59 * This macro is to align the data. Basically it merges
60 * data1 and data2 to form double word.
62 #define ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp) \
63 sllx data1, lshift, data1 ;\
64 srlx data2, rshift, tmp ;\
72 * DGDEF and DGDEF2 provide global data declarations.
74 * DGDEF provides a word aligned word of storage.
76 * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This
77 * implies this macro is best used for byte arrays.
79 * DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
81 #define DGDEF2(name, sz) \
84 .type name, @object ; \
88 #define DGDEF3(name, sz, algn) \
92 .type name, @object ; \
96 #define DGDEF(name) DGDEF3(name, 4, 4)
99 DGDEF(hw_copy_limit_1)
101 DGDEF(hw_copy_limit_2)
103 DGDEF(hw_copy_limit_4)
105 DGDEF(hw_copy_limit_8)
115 ovbcopy(const void *from, void *to, size_t count)
121 tst %o2 ! check count
122 bgu,a %xcc, 1f ! nothing to do or bad arguments
123 subcc %o0, %o1, %o3 ! difference of from and to address
129 neg %o3 ! if < 0, make it positive
130 2: cmp %o2, %o3 ! cmp size and abs(from - to)
131 bleu %xcc, novbcopy ! if size <= abs(diff): use bcopy,
133 cmp %o0, %o1 ! compare from and to addresses
134 blu %xcc, ov_bkwd ! if from < to, copy backwards
140 ldub [%o0], %o3 ! read from address
141 inc %o0 ! inc from address
142 stb %o3, [%o1] ! write to address
143 deccc %o2 ! dec count
144 bgu %xcc, ov_fwd ! loop till done
145 inc %o1 ! inc to address
153 deccc %o2 ! dec count
154 ldub [%o0 + %o2], %o3 ! get byte at end of src
155 bgu %xcc, ov_bkwd ! loop till done
156 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
167 * Copy a block of storage - must not overlap (from + len <= to).
171 save %sp, -SA(MINFRAME), %sp
174 cmp %i2, 12 ! for small counts
175 blu %xcc, bytecp ! just copy bytes
178 cmp %i2, 128 ! for less than 128 bytes
179 blu,pn %xcc, bcb_punt ! no block st/quad ld
182 set use_hw_bcopy, %o2
193 * Compare against 256 since we should be checking block addresses
194 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
195 * src = dest + (64 * 3) + 63.
198 blu,pn %xcc, bcb_punt
202 * Copy that reach here have at least 2 blocks of data to copy.
205 ! Swap src/dst since the code below is memcpy code
206 ! and memcpy/bcopy have different calling sequences
211 andcc %i0, 0x3f, %i3 ! is dst aligned on a 64 bytes
212 bz %xcc, chksrc ! dst is already double aligned
214 neg %i3 ! bytes till dst 64 bytes aligned
215 sub %i2, %i3, %i2 ! update i2 with new count
224 ! Now Destination is block (64 bytes) aligned
226 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size
227 sub %i2, %i3, %i2 ! Residue bytes in %i2
229 wr %g0, ASI_LDSTBI_P, %asi
231 andcc %i1, 0xf, %o2 ! is src quadword aligned
232 bz,pn %xcc, blkcpy ! src offset in %o2
240 ! Falls through when source offset is equal to 8 i.e.
241 ! source is double word aligned.
242 ! In this case no shift/merge of data is required
243 sub %i1, %o2, %i1 ! align the src at 16 bytes.
244 andn %i1, 0x3f, %l0 ! %l0 has block aligned source
245 prefetch [%l0+0x0], #one_read
246 ldda [%i1+0x0]%asi, %l2
248 ldda [%i1+0x10]%asi, %l4
249 prefetch [%l0+0x40], #one_read
251 stxa %l3, [%i0+0x0]%asi
252 stxa %l4, [%i0+0x8]%asi
254 ldda [%i1+0x20]%asi, %l2
255 stxa %l5, [%i0+0x10]%asi
256 stxa %l2, [%i0+0x18]%asi
258 ldda [%i1+0x30]%asi, %l4
259 stxa %l3, [%i0+0x20]%asi
260 stxa %l4, [%i0+0x28]%asi
262 ldda [%i1+0x40]%asi, %l2
263 stxa %l5, [%i0+0x30]%asi
264 stxa %l2, [%i0+0x38]%asi
272 add %i1, %o2, %i1 ! increment the source by src offset
273 ! the src offset was stored in %o2
276 sub %i1, %o2, %i1 ! align the src at 16 bytes.
277 sll %o2, 3, %o0 ! %o0 left shift
279 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift)
280 andn %i1, 0x3f, %l0 ! %l0 has block aligned source
281 prefetch [%l0+0x0], #one_read
282 ldda [%i1+0x0]%asi, %l2 ! partial data in %l2 and %l3 has
285 ldda [%i1+0x10]%asi, %l4 ! %l4 has partial data for this read.
286 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4
288 prefetch [%l0+0x40], #one_read
289 stxa %l2, [%i0+0x0]%asi
290 stxa %l3, [%i0+0x8]%asi
292 ldda [%i1+0x20]%asi, %l2
293 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and
294 stxa %l4, [%i0+0x10]%asi ! %l4 from previous read
295 stxa %l5, [%i0+0x18]%asi ! into %l4 and %l5
297 ! Repeat the same for next 32 bytes.
299 ldda [%i1+0x30]%asi, %l4
300 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)
301 stxa %l2, [%i0+0x20]%asi
302 stxa %l3, [%i0+0x28]%asi
304 ldda [%i1+0x40]%asi, %l2
305 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)
306 stxa %l4, [%i0+0x30]%asi
307 stxa %l5, [%i0+0x38]%asi
315 add %i1, %o2, %i1 ! increment the source by src offset
316 ! the src offset was stored in %o2
319 sub %i1, %o2, %i1 ! align the src at 16 bytes.
322 sll %o0, 3, %o0 ! %o0 left shift
324 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift)
325 andn %i1, 0x3f, %l0 ! %l0 has block aligned source
326 prefetch [%l0+0x0], #one_read
327 ldda [%i1+0x0]%asi, %l2 ! partial data in %l3 for this read and
330 ldda [%i1+0x10]%asi, %l4 ! %l4 has complete data and %l5 has
332 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5
334 prefetch [%l0+0x40], #one_read
335 stxa %l3, [%i0+0x0]%asi
336 stxa %l4, [%i0+0x8]%asi
338 ldda [%i1+0x20]%asi, %l2
339 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with
340 stxa %l5, [%i0+0x10]%asi ! %l5 from previous read
341 stxa %l2, [%i0+0x18]%asi ! into %l5 and %l2
343 ! Repeat the same for next 32 bytes.
345 ldda [%i1+0x30]%asi, %l4
346 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)
347 stxa %l3, [%i0+0x20]%asi
348 stxa %l4, [%i0+0x28]%asi
350 ldda [%i1+0x40]%asi, %l2
351 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)
352 stxa %l5, [%i0+0x30]%asi
353 stxa %l2, [%i0+0x38]%asi
361 add %i1, %o2, %i1 ! increment the source by src offset
362 ! the src offset was stored in %o2
365 ! Both Source and Destination are block aligned.
366 ! Do fast copy using ASI_LDSTBI_P
368 prefetch [%i1+0x0], #one_read
370 ldda [%i1+0x0]%asi, %l0
371 ldda [%i1+0x10]%asi, %l2
372 prefetch [%i1+0x40], #one_read
374 stxa %l0, [%i0+0x0]%asi
375 ldda [%i1+0x20]%asi, %l4
376 ldda [%i1+0x30]%asi, %l6
378 stxa %l1, [%i0+0x8]%asi
379 stxa %l2, [%i0+0x10]%asi
380 stxa %l3, [%i0+0x18]%asi
381 stxa %l4, [%i0+0x20]%asi
382 stxa %l5, [%i0+0x28]%asi
383 stxa %l6, [%i0+0x30]%asi
384 stxa %l7, [%i0+0x38]%asi
405 membar #Sync ! sync error barrier
411 ! use aligned transfers where possible
413 xor %i0, %i1, %o4 ! xor from and to address
414 btst 7, %o4 ! if lower three bits zero
415 bz aldoubcp ! can align on double boundary
416 nop ! assembler complaints about label
418 xor %i0, %i1, %o4 ! xor from and to address
419 btst 3, %o4 ! if lower two bits zero
420 bz alwordcp ! can align on word boundary
421 btst 3, %i0 ! delay slot, from address unaligned?
423 ! use aligned reads and writes where possible
424 ! this differs from wordcp in that it copes
425 ! with odd alignment between source and destnation
426 ! using word reads and writes with the proper shifts
427 ! in between to align transfers to and from memory
428 ! i0 - src address, i1 - dest address, i2 - count
429 ! i3, i4 - tmps for used generating complete word
431 ! l0 size in bits of upper part of source word (US)
432 ! l1 size in bits of lower part of source word (LS = 32 - US)
433 ! l2 size in bits of upper part of destination word (UD)
434 ! l3 size in bits of lower part of destination word (LD = 32 - UD)
435 ! l4 number of bytes leftover after aligned transfers complete
438 mov 32, %l5 ! load an oft-needed constant
440 btst 3, %i1 ! is destnation address aligned?
441 clr %i4 ! clear registers used in either case
445 ! both source and destination addresses are unaligned
448 ldub [%i0], %i3 ! read a byte from source address
449 add %i0, 1, %i0 ! increment source address
450 or %i4, %i3, %i4 ! or in with previous bytes (if any)
451 btst 3, %i0 ! is source aligned?
452 add %l0, 8, %l0 ! increment size of upper source (US)
454 sll %i4, 8, %i4 ! make room for next byte
456 sub %l5, %l0, %l1 ! generate shift left count (LS)
457 sll %i4, %l1, %i4 ! prepare to get rest
458 ld [%i0], %i3 ! read a word
459 add %i0, 4, %i0 ! increment source address
460 srl %i3, %l0, %i5 ! upper src bits into lower dst bits
461 or %i4, %i5, %i5 ! merge
462 mov 24, %l3 ! align destination
464 srl %i5, %l3, %i4 ! prepare to write a single byte
465 stb %i4, [%i1] ! write a byte
466 add %i1, 1, %i1 ! increment destination address
467 sub %i2, 1, %i2 ! decrement count
468 btst 3, %i1 ! is destination aligned?
470 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD)
471 sub %l5, %l3, %l2 ! generate shift left count (UD)
472 sll %i5, %l2, %i5 ! move leftover into upper bytes
473 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left
474 bgu %xcc, more_needed ! need more to fill than we have
477 sll %i3, %l1, %i3 ! clear upper used byte(s)
479 ! get the odd bytes between alignments
480 sub %l0, %l2, %l0 ! regenerate shift count
481 sub %l5, %l0, %l1 ! generate new shift left count (LS)
482 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
483 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
486 st %i5, [%i1] ! write a word
487 subcc %i2, 4, %i2 ! decrement count
489 add %i1, 4, %i1 ! increment destination address
492 sll %i3, %l1, %i5 ! get leftover into upper bits
494 sll %i3, %l0, %i3 ! save remaining byte(s)
496 sub %l2, %l0, %l1 ! regenerate shift count
497 sub %l5, %l1, %l0 ! generate new shift left count
498 sll %i3, %l1, %i4 ! move to fill empty space
500 or %i5, %i4, %i5 ! merge to complete word
502 ! the source address is aligned and destination is not
505 ld [%i0], %i4 ! read a word
506 add %i0, 4, %i0 ! increment source address
507 mov 24, %l0 ! initial shift alignment count
509 srl %i4, %l0, %i3 ! prepare to write a single byte
510 stb %i3, [%i1] ! write a byte
511 add %i1, 1, %i1 ! increment destination address
512 sub %i2, 1, %i2 ! decrement count
513 btst 3, %i1 ! is destination aligned?
515 sub %l0, 8, %l0 ! delay slot, decrement shift count
517 sub %l5, %l0, %l1 ! generate shift left count
518 sll %i4, %l1, %i5 ! get leftover
520 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
521 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
523 ld [%i0], %i3 ! read a source word
524 add %i0, 4, %i0 ! increment source address
525 srl %i3, %l0, %i4 ! upper src bits into lower dst bits
526 or %i5, %i4, %i5 ! merge with upper dest bits (leftover)
527 st %i5, [%i1] ! write a destination word
528 subcc %i2, 4, %i2 ! decrement count
529 bz %xcc, unalign_out ! check if done
530 add %i1, 4, %i1 ! increment destination address
532 sll %i3, %l1, %i5 ! get leftover
534 tst %l4 ! any bytes leftover?
538 sub %l0, 8, %l0 ! decrement shift
539 srl %i3, %l0, %i4 ! upper src byte into lower dst byte
540 stb %i4, [%i1] ! write a byte
541 subcc %l4, 1, %l4 ! decrement count
542 bz %xcc, cpdone ! done?
543 add %i1, 1, %i1 ! increment destination
544 tst %l0 ! any more previously read bytes
545 bnz %xcc, 1b ! we have leftover bytes
546 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants
547 b dbytecp ! let dbytecp do the rest
548 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
550 ! the destination address is aligned and the source is not
553 ldub [%i0], %i3 ! read a byte from source address
554 add %i0, 1, %i0 ! increment source address
555 or %i4, %i3, %i4 ! or in with previous bytes (if any)
556 btst 3, %i0 ! is source aligned?
557 add %l0, 8, %l0 ! increment shift count (US)
559 sll %i4, 8, %i4 ! make room for next byte
562 ! if from address unaligned for double-word moves,
563 ! move bytes till it is, if count is < 56 it could take
564 ! longer to align the thing than to do the transfer
565 ! in word size chunks right away
568 cmp %i2, 56 ! if count < 56, use wordcp, it takes
569 blu,a %xcc, alwordcp ! longer to align doubles than words
570 mov 3, %o0 ! mask for word alignment
571 call alignit ! copy bytes until aligned
572 mov 7, %o0 ! mask for double alignment
574 ! source and destination are now double-word aligned
575 ! i3 has aligned count returned by alignit
577 and %i2, 7, %i2 ! unaligned leftover count
578 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
580 ldx [%i0+%i1], %o4 ! read from address
581 stx %o4, [%i1] ! write at destination address
582 subcc %i3, 8, %i3 ! dec count
584 add %i1, 8, %i1 ! delay slot, inc to address
585 cmp %i2, 4 ! see if we can copy a word
586 blu %xcc, dbytecp ! if 3 or less bytes use bytecp
589 ! for leftover bytes we fall into wordcp, if needed
592 and %i2, 3, %i2 ! unaligned leftover count
594 ld [%i0+%i1], %o4 ! read from address
595 st %o4, [%i1] ! write at destination address
596 subcc %i3, 4, %i3 ! dec count
598 add %i1, 4, %i1 ! delay slot, inc to address
601 ! we come here to align copies on word boundaries
603 call alignit ! go word-align it
604 mov 3, %o0 ! bits that must be zero to be aligned
606 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
609 ! byte copy, works with any alignment
613 sub %i0, %i1, %i0 ! i0 gets difference of src and dst
616 ! differenced byte copy, works with any alignment
617 ! assumes dest in %i1 and (source - dest) in %i0
620 stb %o4, [%i1] ! write to address
621 inc %i1 ! inc to address
623 deccc %i2 ! dec count
624 bgeu,a %xcc, 1b ! loop till done
625 ldub [%i0+%i1], %o4 ! read from address
627 membar #Sync ! sync error barrier
629 restore %g0, 0, %o0 ! return (0)
632 * Common code used to align transfers on word and doubleword
633 * boudaries. Aligns source and destination and returns a count
634 * of aligned bytes to transfer in %i3
638 stb %o4, [%i1] ! write a byte
642 btst %o0, %i0 ! %o0 is bit mask to check for alignment
644 ldub [%i0], %o4 ! read next byte
647 andn %i2, %o0, %i3 ! return size of aligned bytes
652 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
653 * longer than 256 bytes in length using Niagara's block stores/quad store.
654 * If the criteria for using this routine are not met then it calls bzero
655 * and returns 1. Otherwise 0 is returned indicating success.
656 * Caller is responsible for ensuring use_hw_bzero is true and that
657 * kpreempt_disable() has been called.
662 hwblkclr(void *addr, size_t len)
667 ! %i0 - start address
668 ! %i1 - length of region (multiple of 64)
671 save %sp, -SA(MINFRAME), %sp
673 ! Must be block-aligned
678 ! ... and must be 256 bytes or more
683 ! ... and length must be a multiple of 64
685 bz,pn %xcc, pz_doblock
686 wr %g0, ASI_LDSTBI_P, %asi
688 1: ! punt, call bzero but notify the caller that bzero was used
693 restore %g0, 1, %o0 ! return (1) - did not use block operations
695 ! Already verified that there are at least 256 bytes to set
697 stxa %g0, [%i0+0x0]%asi
698 stxa %g0, [%i0+0x40]%asi
699 stxa %g0, [%i0+0x80]%asi
700 stxa %g0, [%i0+0xc0]%asi
702 stxa %g0, [%i0+0x8]%asi
703 stxa %g0, [%i0+0x10]%asi
704 stxa %g0, [%i0+0x18]%asi
705 stxa %g0, [%i0+0x20]%asi
706 stxa %g0, [%i0+0x28]%asi
707 stxa %g0, [%i0+0x30]%asi
708 stxa %g0, [%i0+0x38]%asi
710 stxa %g0, [%i0+0x48]%asi
711 stxa %g0, [%i0+0x50]%asi
712 stxa %g0, [%i0+0x58]%asi
713 stxa %g0, [%i0+0x60]%asi
714 stxa %g0, [%i0+0x68]%asi
715 stxa %g0, [%i0+0x70]%asi
716 stxa %g0, [%i0+0x78]%asi
718 stxa %g0, [%i0+0x88]%asi
719 stxa %g0, [%i0+0x90]%asi
720 stxa %g0, [%i0+0x98]%asi
721 stxa %g0, [%i0+0xa0]%asi
722 stxa %g0, [%i0+0xa8]%asi
723 stxa %g0, [%i0+0xb0]%asi
724 stxa %g0, [%i0+0xb8]%asi
726 stxa %g0, [%i0+0xc8]%asi
727 stxa %g0, [%i0+0xd0]%asi
728 stxa %g0, [%i0+0xd8]%asi
729 stxa %g0, [%i0+0xe0]%asi
730 stxa %g0, [%i0+0xe8]%asi
731 stxa %g0, [%i0+0xf0]%asi
732 stxa %g0, [%i0+0xf8]%asi
736 bgu,pt %xcc, pz_doblock
740 ! Check if more than 64 bytes to set
746 stxa %g0, [%i0+0x0]%asi
747 stxa %g0, [%i0+0x8]%asi
748 stxa %g0, [%i0+0x10]%asi
749 stxa %g0, [%i0+0x18]%asi
750 stxa %g0, [%i0+0x20]%asi
751 stxa %g0, [%i0+0x28]%asi
752 stxa %g0, [%i0+0x30]%asi
753 stxa %g0, [%i0+0x38]%asi
762 restore %g0, 0, %o0 ! return (bzero or not)
770 bzero(void *addr, size_t count)
786 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound
787 bz,pt %xcc, blkalign ! already double aligned
788 sub %o3, 8, %o3 ! -(bytes till double aligned)
789 add %o1, %o3, %o1 ! update o1 with new count
797 ! Now address is double aligned
799 cmp %o1, 0x80 ! check if there are 128 bytes to set
800 blu,pn %xcc, bzero_small
803 sethi %hi(use_hw_bzero), %o2
804 ld [%o2 + %lo(use_hw_bzero)], %o2
810 wr %g0, ASI_LDSTBI_P, %asi
813 wr %g0, ASI_LDSTBI_AIUS, %asi
816 andcc %o0, 0x3f, %o3 ! is block aligned?
817 bz,pt %xcc, bzero_blk
818 sub %o3, 0x40, %o3 ! -(bytes till block aligned)
819 add %o1, %o3, %o1 ! o1 is the remainder
821 ! Clear -(%o3) bytes till block aligned
829 and %o1, 0x3f, %o3 ! calc bytes left after blk clear
830 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes
832 cmp %o4, 0x100 ! 256 bytes or more
837 stxa %g0, [%o0+0x0]%asi
838 stxa %g0, [%o0+0x40]%asi
839 stxa %g0, [%o0+0x80]%asi
840 stxa %g0, [%o0+0xc0]%asi
842 stxa %g0, [%o0+0x8]%asi
843 stxa %g0, [%o0+0x10]%asi
844 stxa %g0, [%o0+0x18]%asi
845 stxa %g0, [%o0+0x20]%asi
846 stxa %g0, [%o0+0x28]%asi
847 stxa %g0, [%o0+0x30]%asi
848 stxa %g0, [%o0+0x38]%asi
850 stxa %g0, [%o0+0x48]%asi
851 stxa %g0, [%o0+0x50]%asi
852 stxa %g0, [%o0+0x58]%asi
853 stxa %g0, [%o0+0x60]%asi
854 stxa %g0, [%o0+0x68]%asi
855 stxa %g0, [%o0+0x70]%asi
856 stxa %g0, [%o0+0x78]%asi
858 stxa %g0, [%o0+0x88]%asi
859 stxa %g0, [%o0+0x90]%asi
860 stxa %g0, [%o0+0x98]%asi
861 stxa %g0, [%o0+0xa0]%asi
862 stxa %g0, [%o0+0xa8]%asi
863 stxa %g0, [%o0+0xb0]%asi
864 stxa %g0, [%o0+0xb8]%asi
866 stxa %g0, [%o0+0xc8]%asi
867 stxa %g0, [%o0+0xd0]%asi
868 stxa %g0, [%o0+0xd8]%asi
869 stxa %g0, [%o0+0xe0]%asi
870 stxa %g0, [%o0+0xe8]%asi
871 stxa %g0, [%o0+0xf0]%asi
872 stxa %g0, [%o0+0xf8]%asi
880 ! ... check if 64 bytes to set
882 blu %xcc, bzero_blk_done
886 stxa %g0, [%o0+0x0]%asi
887 stxa %g0, [%o0+0x8]%asi
888 stxa %g0, [%o0+0x10]%asi
889 stxa %g0, [%o0+0x18]%asi
890 stxa %g0, [%o0+0x20]%asi
891 stxa %g0, [%o0+0x28]%asi
892 stxa %g0, [%o0+0x30]%asi
893 stxa %g0, [%o0+0x38]%asi
902 ! Undo asi register setting.
906 cmp %o4, ASI_LDSTBI_P
907 bne,a %xcc, bzero_small
908 wr %g0, ASI_AIUS, %asi
911 ! Set the remaining doubles
912 subcc %o3, 8, %o3 ! Can we store any doubles?
914 and %o1, 7, %o1 ! calc bytes left after doubles
917 stxa %g0, [%o0]%asi ! Clear the doubles
926 andcc %o0, 3, %o3 ! is add aligned on a word boundary
928 andn %o1, 3, %o3 ! create word sized count in %o3
930 dec %o1 ! decrement count
931 stba %g0, [%o0]%asi ! clear a byte
936 sta %g0, [%o0]%asi ! 4-byte clearing loop
941 and %o1, 3, %o1 ! leftover count, if any
944 ! Set the leftover bytes
949 deccc %o1 ! byte clearing loop
963 #define SMALL_LIMIT 7
968 copyin(const void *uaddr, void *kaddr, size_t count)
975 ! Check the length and bail if zero.
984 sethi %hi(copyio_fault), %o4
985 or %o4, %lo(copyio_fault), %o4
986 sethi %hi(copyio_fault_nowindow), %o3
987 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
988 or %o3, %lo(copyio_fault_nowindow), %o3
990 stn %o3, [THREAD_REG + T_LOFAULT]
997 ! Check to see if we're more than SMALL_LIMIT.
999 subcc %o2, SMALL_LIMIT, %o3
1000 bgu,a,pt %xcc, dci_ns
1003 ! What was previously ".small_copyin"
1006 sub %g0, %o2, %o3 ! setup for copy loop
1010 lduba [%o0 + %o3]ASI_AIUS, %o4
1012 ! %o0 and %o1 point at the end and remain pointing at the end
1013 ! of their buffers. We pull things out by adding %o3 (which is
1014 ! the negation of the length) to the buffer end which gives us
1015 ! the curent location in the buffers. By incrementing %o3 we walk
1016 ! through both buffers without having to bump each buffer's
1017 ! pointer. A very fast 4 instruction loop.
1021 stb %o4, [%o1 + %o3]
1024 lduba [%o0 + %o3]ASI_AIUS, %o4
1026 ! We're done. Go home.
1032 ! Try aligned copies from here.
1036 ! See if we're single byte aligned. If we are, check the
1037 ! limit for single byte copies. If we're smaller, or equal,
1038 ! bounce to the byte for byte copy loop. Otherwise do it in
1045 ! We're single byte aligned.
1047 sethi %hi(hw_copy_limit_1), %o3
1048 ld [%o3 + %lo(hw_copy_limit_1)], %o3
1050 ! Is HW copy on? If not do everything byte for byte.
1056 ! Are we bigger than the HW limit? If not
1057 ! go to byte for byte.
1062 ! We're big enough and copy is on. Do it with HW.
1064 ba,pt %xcc, big_copyin
1073 ! We're eight byte aligned.
1075 sethi %hi(hw_copy_limit_8), %o3
1076 ld [%o3 + %lo(hw_copy_limit_8)], %o3
1078 ! Is HW assist on? If not, do it with the aligned copy.
1085 ba,pt %xcc, big_copyin
1089 ! Housekeeping for copy loops. Uses same idea as in the byte for
1090 ! byte copy loop above.
1096 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
1102 sethi %hi(hw_copy_limit_4), %o3
1103 ld [%o3 + %lo(hw_copy_limit_4)], %o3
1105 ! Is HW assist on? If not, do it with the aligned copy.
1111 ! We're negative if our size is less than or equal to hw_copy_limit_4.
1115 ba,pt %xcc, big_copyin
1119 ! Housekeeping for copy loops. Uses same idea as in the byte
1120 ! for byte copy loop above.
1126 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
1129 ! We're two byte aligned. Check for "smallness"
1130 ! done in delay at .dcih4
1133 sethi %hi(hw_copy_limit_2), %o3
1134 ld [%o3 + %lo(hw_copy_limit_2)], %o3
1136 ! Is HW assist on? If not, do it with the aligned copy.
1142 ! Are we larger than the HW limit?
1147 ! HW assist is on and we're large enough to use it.
1149 ba,pt %xcc, big_copyin
1152 ! Housekeeping for copy loops. Uses same idea as in the byte
1153 ! for byte copy loop above.
1160 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
1164 ! Why are we doing this AGAIN? There are certain conditions in
1165 ! big copyin that will cause us to forgo the HW assisted copys
1166 ! and bounce back to a non-hw assisted copy. This dispatches
1167 ! those copies. Note that we branch around this in the main line
1170 ! We make no check for limits or HW enablement here. We've
1171 ! already been told that we're a poster child so just go off
1176 bnz %icc, dcibcp ! Most likely
1185 ! Eight byte aligned copies. A steal from the original .small_copyin
1186 ! with modifications. %o2 is number of 8 byte chunks to copy. When
1187 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
1192 ldxa [%o0 + %o3]ASI_AIUS, %o4
1194 stx %o4, [%o1 + %o3]
1198 ! End of copy loop. Most 8 byte aligned copies end here.
1203 ! Something is left. Do it byte for byte.
1206 lduba [%o0 + %o3]ASI_AIUS, %o4
1208 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
1212 lduwa [%o0 + %o3]ASI_AIUS, %o4
1218 ! End of copy loop. Most 4 byte aligned copies end here.
1223 ! Something is left. Do it byte for byte.
1226 lduba [%o0 + %o3]ASI_AIUS, %o4
1228 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
1233 lduha [%o0 + %o3]ASI_AIUS, %o4
1235 sth %o4, [%o1 + %o3]
1239 ! End of copy loop. Most 2 byte aligned copies end here.
1244 ! Deal with the last byte
1246 lduba [%o0 + %o3]ASI_AIUS, %o4
1247 stb %o4, [%o1 + %o3]
1255 ! We're going off to do a block copy.
1256 ! Switch fault hendlers and grab a window. We
1257 ! don't do a membar #Sync since we've done only
1258 ! kernel data to this point.
1260 save %sp, -SA(MINFRAME), %sp
1262 ! Copy in that reach here are larger than 256 bytes. The
1263 ! hw_copy_limit_1 is set to 256. Never set this limit less
1267 ! Swap src/dst since the code below is memcpy code
1268 ! and memcpy/bcopy have different calling sequences
1273 andcc %i0, 7, %i3 ! is dst double aligned
1274 bz %xcc, copyin_blkcpy
1276 neg %i3 ! bytes till double aligned
1277 sub %i2, %i3, %i2 ! update %i2 with new count
1279 ! Align Destination on double-word boundary
1281 1: lduba [%i1]ASI_AIUS, %i4
1290 bz,pn %xcc, copyin_blalign ! now block aligned
1292 neg %i3 ! bytes till block aligned
1293 sub %i2, %i3, %i2 ! update %i2 with new count
1295 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
1296 ! double word copies.
1298 andcc %i1, 7, %g1 ! is src aligned on a 8 bytes
1299 bz %xcc, ci_dbcopy ! %g1 has source offset (last 3-bits)
1300 sll %g1, 3, %l1 ! left shift
1302 sub %l2, %l1, %l2 ! right shift = (64 - left shift)
1304 ! Now use double word copies to align destination.
1306 sub %i1, %g1, %i1 ! align the src at 8 bytes.
1307 ldxa [%i1]ASI_AIUS, %o2
1310 ldxa [%i1]ASI_AIUS, %o4
1311 ALIGN_DATA_EW(%o2, %o4, %l1, %l2, %o3)
1320 ! Both source and destination are double aligned.
1321 ! No shift and merge of data required in this case.
1323 ldxa [%i1]ASI_AIUS, %o2
1327 bgu,pt %xcc, ci_dbcopy
1331 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size
1332 sub %i2, %i3, %i2 ! Residue bytes in %i2
1334 wr %g0, ASI_LDSTBI_P, %asi
1336 andcc %i1, 0xf, %o2 ! is src quadword aligned
1337 bz,pn %xcc, ci_blkcpy ! src offset in %o2 (last 4-bits)
1345 ! Falls through when source offset is equal to 8 i.e.
1346 ! source is double word aligned.
1347 ! In this case no shift/merge of data is required
1349 sub %i1, %o2, %i1 ! align the src at 16 bytes.
1350 andn %i1, 0x3f, %l0 ! %l0 has block aligned source
1351 prefetch [%l0+0x0], #one_read
1352 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1355 ldda [%i1]ASI_LDSTBI_AIUS, %l4
1357 prefetch [%l0+0x40], #one_read
1359 stxa %l3, [%i0+0x0]%asi
1360 stxa %l4, [%i0+0x8]%asi
1363 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1365 stxa %l5, [%i0+0x10]%asi
1366 stxa %l2, [%i0+0x18]%asi
1369 ldda [%i1]ASI_LDSTBI_AIUS, %l4
1371 stxa %l3, [%i0+0x20]%asi
1372 stxa %l4, [%i0+0x28]%asi
1375 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1377 stxa %l5, [%i0+0x30]%asi
1378 stxa %l2, [%i0+0x38]%asi
1381 subcc %i3, 0x40, %i3
1382 bgu,pt %xcc, ci_loop0
1385 add %i1, %o2, %i1 ! increment the source by src offset
1386 ! the src offset was stored in %o2
1390 sub %i1, %o2, %i1 ! align the src at 16 bytes.
1391 sll %o2, 3, %o0 ! %o0 left shift
1393 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift)
1394 andn %i1, 0x3f, %l0 ! %l0 has block aligned source
1395 prefetch [%l0+0x0], #one_read
1396 ldda [%i1]ASI_LDSTBI_AIUS, %l2 ! partial data in %l2
1397 ! and %l3 has complete
1401 ldda [%i1]ASI_LDSTBI_AIUS, %l4 ! %l4 has partial data
1403 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4
1406 prefetch [%l0+0x40], #one_read
1408 stxa %l2, [%i0+0x0]%asi
1409 stxa %l3, [%i0+0x8]%asi
1412 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1413 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and
1414 ! %l4 from previous read
1416 stxa %l4, [%i0+0x10]%asi
1417 stxa %l5, [%i0+0x18]%asi
1419 ! Repeat the same for next 32 bytes.
1422 ldda [%i1]ASI_LDSTBI_AIUS, %l4
1423 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)
1425 stxa %l2, [%i0+0x20]%asi
1426 stxa %l3, [%i0+0x28]%asi
1429 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1430 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)
1432 stxa %l4, [%i0+0x30]%asi
1433 stxa %l5, [%i0+0x38]%asi
1436 subcc %i3, 0x40, %i3
1437 bgu,pt %xcc, ci_loop1
1440 add %i1, %o2, %i1 ! increment the source by src offset
1441 ! the src offset was stored in %o2
1445 sub %i1, %o2, %i1 ! align the src at 16 bytes.
1447 sll %o0, 3, %o0 ! %o0 left shift
1449 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift)
1450 andn %i1, 0x3f, %l0 ! %l0 has block aligned source
1451 prefetch [%l0+0x0], #one_read
1452 ldda [%i1]ASI_LDSTBI_AIUS, %l2 ! partial data in %l3
1457 ldda [%i1]ASI_LDSTBI_AIUS, %l4 ! %l4 has complete data
1458 ! and %l5 has partial
1459 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5
1461 prefetch [%l0+0x40], #one_read
1463 stxa %l3, [%i0+0x0]%asi
1464 stxa %l4, [%i0+0x8]%asi
1467 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1468 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with
1469 ! %l5 from previous read
1472 stxa %l5, [%i0+0x10]%asi
1473 stxa %l2, [%i0+0x18]%asi
1475 ! Repeat the same for next 32 bytes.
1478 ldda [%i1]ASI_LDSTBI_AIUS, %l4
1479 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)
1481 stxa %l3, [%i0+0x20]%asi
1482 stxa %l4, [%i0+0x28]%asi
1485 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1486 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)
1488 stxa %l5, [%i0+0x30]%asi
1489 stxa %l2, [%i0+0x38]%asi
1492 subcc %i3, 0x40, %i3
1493 bgu,pt %xcc, ci_loop2
1496 add %i1, %o2, %i1 ! increment the source by src offset
1497 ! the src offset was stored in %o2
1500 ! Do fast copy using ASI_LDSTBI_P
1503 andn %i1, 0x3f, %o0 ! %o0 has block aligned source
1504 prefetch [%o0+0x0], #one_read
1506 ldda [%i1]ASI_LDSTBI_AIUS, %l0
1508 ldda [%i1]ASI_LDSTBI_AIUS, %l2
1511 prefetch [%o0+0x40], #one_read
1513 stxa %l0, [%i0+0x0]%asi
1515 ldda [%i1]ASI_LDSTBI_AIUS, %l4
1517 ldda [%i1]ASI_LDSTBI_AIUS, %l6
1520 stxa %l1, [%i0+0x8]%asi
1521 stxa %l2, [%i0+0x10]%asi
1522 stxa %l3, [%i0+0x18]%asi
1523 stxa %l4, [%i0+0x20]%asi
1524 stxa %l5, [%i0+0x28]%asi
1525 stxa %l6, [%i0+0x30]%asi
1526 stxa %l7, [%i0+0x38]%asi
1529 subcc %i3, 0x40, %i3
1536 ! Copy as much rest of the data as double word copy.
1538 cmp %i2, 0x8 ! Not enough bytes to copy as double
1542 andn %i2, 0x7, %i3 ! %i3 count is multiple of 8 bytes size
1543 sub %i2, %i3, %i2 ! Residue bytes in %i2
1545 andcc %i1, 7, %g1 ! is src aligned on a 8 bytes
1549 sll %g1, 3, %l0 ! left shift
1551 sub %l1, %l0, %l1 ! right shift = (64 - left shift)
1554 sub %i1, %g1, %i1 ! align the src at 8 bytes.
1555 ldxa [%i1]ASI_AIUS, %o2
1558 ldxa [%i1]ASI_AIUS, %o4
1559 ALIGN_DATA_EW(%o2, %o4, %l0, %l1, %o3)
1569 ldxa [%i1]ASI_AIUS, %o2
1573 bgu,pt %xcc, ci_cpy_db
1578 bz,pt %xcc, copyin_exit
1581 ! Copy the residue as byte copy
1583 lduba [%i1]ASI_AIUS, %i4
1587 bgu %xcc, ci_residue