]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/mips/rmi/xlr_csum_nocopy.S
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / mips / rmi / xlr_csum_nocopy.S
1 #include <machine/asm.h>
2
3
4 /*
5  * a0: source address
6  * a1: length of the area to checksum
7  * a2: partial checksum
8  * a3: dst
9  */
10
11 #define src a0
12 #define dst a3
13 #define sum v0
14
15         .text
16         .set    noreorder
17
18         .macro CSUM_BIGCHUNK_AND_COPY offset 
19         pref                    0,  (\offset+0x0)(a0)
20         ld                      t0, (\offset+0x00)(a0)
21         ld                      t1, (\offset+0x08)(a0)
22         .word                   0x70481038  /*daddwc v0, v0, t0 */
23         .word                   0x70491038 /*daddwc v0, v0, t1 */
24         ld                      t0, (\offset + 0x10)(a0)                
25         ld                      t1, (\offset + 0x18)(a0)        
26         .word                   0x70481038 /* daddwc v0, v0, t0 */
27         .word                   0x70491038 /*daddwc v0, v0, t1 */
28         .endm
29
30 small_csumcpy:                                          /* unknown src alignment and < 8 bytes to go  */
31         move            a1, t2
32
33         andi            t0, a1, 4
34         beqz            t0, 1f
35         andi            t0, a1, 2
36
37         ulw                     t1, (src)                       /* Still a full word to go  */
38         daddiu          src, 4
39         .word                   0x70491038 /*daddwc v0, v0, t1 */
40
41 1:      move            t1, zero
42         beqz            t0, 1f
43         andi            t0, a1, 1
44
45         ulhu            t1, (src)                       /* Still a halfword to go  */
46         daddiu          src, 2
47
48 1:      beqz            t0, 1f
49         sll                     t1, t1, 16
50
51         lbu                     t2, (src)
52         nop
53
54 #ifdef __MIPSEB__
55         sll             t2, t2, 8
56 #endif
57         or              t1, t2
58
59 1:      .word                   0x70491038 /*daddwc v0, v0, t1 */
60
61         .word                   0x70461038 /*daddwc v0, v0, a2 */
62         .word                   0x70401038 /*daddwc v0, v0, $0 */
63
64         /* Ideally at this point of time the status flag must be cleared */
65                                         
66         dsll32      v1, sum, 0
67         .word                   0x70431038 /*daddwc v0, v0, v1 */
68         dsrl32          sum, sum, 0
69         .word                   0x70401038 /*daddwc v0, v0, zero */
70
71         /* fold the checksum */
72         sll             v1, sum, 16
73         addu            sum, v1
74         sltu            v1, sum, v1
75         srl             sum, sum, 16
76         addu            sum, v1
77 1:
78         .set            reorder
79         jr                      ra
80         .set            noreorder
81
82 /* ------------------------------------------------------------------ */
83
84         .align  5
85 LEAF(xlr_csum_partial_nocopy)
86         move            sum, zero
87         move            t7, zero
88
89         sltiu           t8, a1, 0x8
90         bnez            t8, small_csumcpy               /* < 8 bytes to copy */
91         move            t2, a1
92
93         beqz            a1, out
94         andi            t7, src, 0x1                    /* odd buffer? */
95
96 hword_align:
97         beqz            t7, word_align
98         andi            t8, src, 0x2
99
100         lbu                     t0, (src)
101         dsubu           a1, a1, 0x1
102         .word                   0x70481038 /*daddwc v0, v0, t0 */
103         daddu           src, src, 0x1
104         andi            t8, src, 0x2
105
106 word_align:
107         beqz            t8, dword_align
108         sltiu           t8, a1, 56
109
110         lhu                     t0, (src)
111         dsubu           a1, a1, 0x2
112         .word                   0x70481038 /*daddwc v0, v0, t0 */
113         sltiu           t8, a1, 56
114         daddu           src, src, 0x2
115
116 dword_align:
117         bnez            t8, do_end_words
118         move            t8, a1
119
120         andi            t8, src, 0x4
121         beqz            t8, qword_align
122         andi            t8, src, 0x8
123
124         lw                      t0, 0x00(src)
125         dsubu           a1, a1, 0x4
126         .word                   0x70481038 /*daddwc v0, v0, t0 */
127         daddu           src, src, 0x4
128         andi            t8, src, 0x8
129
130 qword_align:
131         beqz            t8, oword_align
132         andi            t8, src, 0x10
133
134         ld                      t0, 0x00(src)
135         dsubu           a1, a1, 0x8
136         .word                   0x70481038 /*daddwc v0, v0, t0 */
137         daddu           src, src, 0x8
138         andi            t8, src, 0x10
139
140 oword_align:
141         beqz            t8, begin_movement
142         dsrl            t8, a1, 0x7
143
144         ld                      t3, 0x08(src)
145         ld                      t0, 0x00(src)
146         .word                   0x704b1038 /*daddwc v0, v0, t3 */
147         .word                   0x70481038 /*daddwc v0, v0, t0 */
148         dsubu           a1, a1, 0x10
149         daddu           src, src, 0x10
150         dsrl            t8, a1, 0x7
151
152 begin_movement:
153         beqz            t8, 1f
154         andi            t2, a1, 0x40
155
156 move_128bytes:
157         pref            0, 0x20(a0)
158         pref            0, 0x40(a0)
159         pref            0, 0x60(a0)
160         CSUM_BIGCHUNK_AND_COPY(0x00)
161         CSUM_BIGCHUNK_AND_COPY(0x20)
162         CSUM_BIGCHUNK_AND_COPY(0x40)
163         CSUM_BIGCHUNK_AND_COPY(0x60)
164         dsubu           t8, t8, 0x01
165         bnez            t8, move_128bytes       /* flag */
166         daddu           src, src, 0x80
167
168 1:
169         beqz            t2, 1f
170         andi            t2, a1, 0x20
171
172 move_64bytes:
173         pref            0, 0x20(a0)
174         pref            0, 0x40(a0)
175         CSUM_BIGCHUNK_AND_COPY(0x00)
176         CSUM_BIGCHUNK_AND_COPY(0x20)
177         daddu   src, src, 0x40
178
179 1:
180         beqz            t2, do_end_words
181         andi            t8, a1, 0x1c
182
183 move_32bytes:
184         pref            0, 0x20(a0)
185         CSUM_BIGCHUNK_AND_COPY(0x00)
186         andi            t8, a1, 0x1c
187         daddu           src, src, 0x20
188
189 do_end_words:
190         beqz            t8, maybe_end_cruft
191         dsrl            t8, t8, 0x2
192
193 end_words:
194         lw                      t0, (src)
195         dsubu           t8, t8, 0x1
196         .word                   0x70481038 /*daddwc v0, v0, t0 */
197         bnez            t8, end_words
198         daddu           src, src, 0x4
199
200 maybe_end_cruft:
201         andi            t2, a1, 0x3
202
203 small_memcpy:
204  j small_csumcpy; move a1, t2
205         beqz            t2, out
206         move            a1, t2
207
208 end_bytes:
209         lb                      t0, (src)
210         dsubu           a1, a1, 0x1
211         bnez            a2, end_bytes
212         daddu           src, src, 0x1
213
214 out:
215         jr                      ra
216         move            v0, sum
217         END(xlr_csum_partial_nocopy)