1 /* $NetBSD: in_cksum_arm.S,v 1.2 2003/09/23 10:01:36 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
40 * Hand-optimised in_cksum() and in4_cksum() implementations for ARM/armv5e
45 #include <machine/asm.h>
47 __FBSDID("$FreeBSD$");
50 * int in_cksum(struct mbuf *m, int len)
56 * NOTE: Assumes 'm' is *never* NULL.
58 /* LINTSTUB: Func: int in_cksum(struct mbuf *, int) */
60 stmfd sp!, {r4-r11,lr}
67 ldr r1, [ip, #(M_LEN)]
68 ldr r0, [ip, #(M_DATA)]
69 ldr ip, [ip, #(M_NEXT)]
77 blne _ASM_LABEL(L_cksumdata)
88 add r0, r0, r8, lsr #16
89 add r0, r0, r0, lsr #16
92 ldmfd sp!, {r4-r11,pc}
96 stmfd sp!, {r4-r7, lr}
99 ldmfd sp!, {r4-r7, pc}
101 * The main in*_cksum() workhorse...
104 * r0 Pointer to buffer
109 * r2 Accumulated 32-bit sum
114 /* LINTSTUB: Ignore */
115 ASENTRY_NP(L_cksumdata)
117 pld [r0] /* Pre-fetch the start of the buffer */
121 /* We first have to word-align the buffer. */
123 beq .Lcksumdata_wordaligned
125 cmp r1, r7 /* Enough bytes left to make it? */
126 blt .Lcksumdata_endgame
128 ldrb r4, [r0], #0x01 /* Fetch 1st byte */
129 ldrgeb r5, [r0], #0x01 /* Fetch 2nd byte */
131 ldrgtb r6, [r0], #0x01 /* Fetch 3rd byte */
133 /* Combine the three bytes depending on endianness and alignment */
135 orreq r2, r5, r4, lsl #8
136 orreq r2, r2, r6, lsl #24
137 orrne r2, r4, r5, lsl #8
138 orrne r2, r2, r6, lsl #16
140 orreq r2, r4, r5, lsl #8
141 orreq r2, r2, r6, lsl #16
142 orrne r2, r5, r4, lsl #8
143 orrne r2, r2, r6, lsl #24
145 subs r1, r1, r7 /* Update length */
146 RETeq /* All done? */
148 /* Buffer is now word aligned */
149 .Lcksumdata_wordaligned:
151 cmp r1, #0x04 /* Less than 4 bytes left? */
152 blt .Lcksumdata_endgame /* Yup */
154 /* Now quad-align, if necessary */
156 ldrne r7, [r0], #0x04
159 blt .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */
162 * Buffer is now quad aligned. Sum 64 bytes at a time.
163 * Note: First ldrd is hoisted above the loop, together with
164 * setting r6 to zero to avoid stalling for results in the
165 * loop. (r7 is live, from above).
197 ldrged r4, [r0], #0x08
198 bge .Lcksumdata_bigloop
200 adds r2, r2, r6 /* r6/r7 still need summing */
201 .Lcksumdata_bigloop_end:
205 #else /* !_ARM_ARCH_5E */
208 blt .Lcksumdata_bigloop_end
211 ldmia r0!, {r3, r4, r5, r6}
215 ldmia r0!, {r3, r4, r5, r7}
220 ldmia r0!, {r3, r4, r5, r6}
225 ldmia r0!, {r3, r4, r5, r7}
233 bge .Lcksumdata_bigloop
234 .Lcksumdata_bigloop_end:
242 ldrged r4, [r0], #0x08 /* Avoid stalling pld and result */
243 blt .Lcksumdata_less_than_32
254 adcs r2, r2, r6 /* XXX: Unavoidable result stall */
257 blt .Lcksumdata_less_than_32
258 ldmia r0!, {r3, r4, r5, r6}
262 ldmia r0!, {r3, r4, r5, r7}
273 .Lcksumdata_less_than_32:
274 /* There are less than 32 bytes left */
278 adds r4, r4, r4, lsr #1 /* Side effect: Clear carry flag */
283 * Note: We use ldm here, even on armv5e, since the combined issue/result
284 * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs.
286 /* At least 24 bytes remaining... */
291 /* At least 16 bytes remaining... */
296 /* At least 8 bytes remaining... */
301 /* Less than 8 bytes remaining... */
304 blt .Lcksumdata_lessthan4
311 /* Deal with < 4 bytes remaining */
312 .Lcksumdata_lessthan4:
316 /* Deal with 1 to 3 remaining bytes, possibly misaligned */
318 ldrb r3, [r0] /* Fetch first byte */
320 ldrgeb r4, [r0, #0x01] /* Fetch 2nd and 3rd as necessary */
322 ldrgtb r5, [r0, #0x02]
324 /* Combine the three bytes depending on endianness and alignment */
327 orreq r3, r4, r3, lsl #8
328 orreq r3, r3, r5, lsl #24
329 orrne r3, r3, r4, lsl #8
330 orrne r3, r3, r5, lsl #16
332 orreq r3, r3, r4, lsl #8
333 orreq r3, r3, r5, lsl #16
334 orrne r3, r4, r3, lsl #8
335 orrne r3, r3, r5, lsl #24