2 * strncmp - compare two strings
4 * Copyright (c) 2013-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT
14 #include <machine/asm.h>
18 #define REP8_01 0x0101010101010101
19 #define REP8_7f 0x7f7f7f7f7f7f7f7f
21 /* Parameters and result. */
27 /* Internal variables. */
44 #define neg_offset x15
46 /* Define endian dependent shift operations.
47 On big-endian early bytes are at MSB and on little-endian LSB.
48 LS_FW means shifting towards early bytes.
49 LS_BK means shifting towards later bytes.
62 mov zeroones, #REP8_01
66 cbnz count, L(mutual_align)
68 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
69 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
70 can be done in parallel across the entire word. */
77 sub tmp1, data1, zeroones
78 orr tmp2, data1, #REP8_7f
79 eor diff, data1, data2 /* Non-zero if differences found. */
80 csinv endloop, diff, xzr, hi /* Last Dword or differences. */
81 bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
82 ccmp endloop, #0, #0, eq
84 /* End of main loop */
88 orr syndrome, diff, has_nul
89 add limit, limit, 8 /* Rewind limit to before last subs. */
91 /* Limit was reached. Check if the NUL byte or the difference
92 is before the limit. */
93 rev syndrome, syndrome
98 cmp limit, pos, lsr #3
100 /* But we need to zero-extend (char is unsigned) the value and then
101 perform a signed 32-bit subtraction. */
102 lsr data1, data1, #56
103 sub result, data1, data2, lsr #56
104 csel result, result, xzr, hi
107 /* Not reached the limit, must have found the end or a diff. */
108 tbz limit, #63, L(not_limit)
110 cbz limit, L(not_limit)
112 lsl limit, tmp1, #3 /* Bits -> bytes. */
114 lsr mask, mask, limit
115 bic data1, data1, mask
116 bic data2, data2, mask
118 /* Make sure that the NUL byte is marked in the syndrome. */
119 orr has_nul, has_nul, mask
122 /* For big-endian we cannot use the trick with the syndrome value
123 as carry-propagation can corrupt the upper bits if the trailing
124 bytes in the string contain 0x01. */
125 /* However, if there is no NUL byte in the dword, we can generate
126 the result directly. We can't just subtract the bytes as the
127 MSB might be significant. */
131 cneg result, result, lo
134 /* Re-compute the NUL-byte detection, using a byte-reversed value. */
136 sub tmp1, tmp3, zeroones
137 orr tmp2, tmp3, #REP8_7f
138 bic has_nul, tmp1, tmp2
140 orr syndrome, diff, has_nul
142 /* The most-significant-non-zero bit of the syndrome marks either the
143 first bit that is different, or the top bit of the first zero byte.
144 Shifting left now will bring the critical information into the
147 lsl data1, data1, pos
148 lsl data2, data2, pos
149 /* But we need to zero-extend (char is unsigned) the value and then
150 perform a signed 32-bit subtraction. */
151 lsr data1, data1, #56
152 sub result, data1, data2, lsr #56
157 /* Sources are mutually aligned, but are not currently at an
158 alignment boundary. Round down the addresses and then mask off
159 the bytes that precede the start point.
160 We also need to adjust the limit calculations, but without
161 overflowing if the limit is near ULONG_MAX. */
164 ldr data1, [src1], #8
165 neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
166 ldr data2, [src2], #8
168 LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
169 /* Adjust the limit and ensure it doesn't overflow. */
170 adds limit, limit, count
171 csinv limit, limit, xzr, lo
172 orr data1, data1, tmp2
173 orr data2, data2, tmp2
177 /* Don't bother with dwords for up to 16 bytes. */
180 b.hs L(try_misaligned_words)
183 /* Perhaps we can do better than this. */
184 ldrb data1w, [src1], #1
185 ldrb data2w, [src2], #1
186 subs limit, limit, #1
187 ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
188 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
191 sub result, data1, data2
193 /* Align the SRC1 to a dword by doing a bytewise compare and then do
195 L(try_misaligned_words):
196 cbz count, L(src1_aligned)
200 sub limit, limit, count
203 ldrb data1w, [src1], #1
204 ldrb data2w, [src2], #1
206 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
208 subs count, count, #1
209 b.hi L(page_end_loop)
211 /* The following diagram explains the comparison of misaligned strings.
212 The bytes are shown in natural order. For little-endian, it is
213 reversed in the registers. The "x" bytes are before the string.
214 The "|" separates data that is loaded at one time.
215 src1 | a a a a a a a a | b b b c c c c c | . . .
216 src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
218 After shifting in each step, the data looks like this:
220 data1 a a a a a a a a b b b c c c c c b b b c c c c c
221 data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
223 The bytes with "0" are eliminated from the syndrome via mask.
225 Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
226 time from SRC2. The comparison happens in 3 steps. After each step
227 the loop can exit, or read from SRC1 or SRC2. */
229 /* Calculate offset from 8 byte alignment to string start in bits. No
230 need to mask offset since shifts are ignoring upper bits. */
234 neg neg_offset, offset
235 ldr data1, [src1], #8
236 ldp tmp1, tmp2, [src2], #16
237 LS_BK mask, mask, neg_offset
238 and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
239 /* Skip the first compare if data in tmp1 is irrelevant. */
240 tbnz offset, 6, L(misaligned_mid_loop)
243 /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
244 LS_FW data2, tmp1, offset
245 LS_BK tmp1, tmp2, neg_offset
246 subs limit, limit, #8
247 orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
248 sub has_nul, data1, zeroones
249 eor diff, data1, data2 /* Non-zero if differences found. */
250 orr tmp3, data1, #REP8_7f
251 csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
252 bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
253 orr tmp3, endloop, has_nul
254 cbnz tmp3, L(full_check)
256 ldr data1, [src1], #8
257 L(misaligned_mid_loop):
258 /* STEP_B: Compare first part of data1 to second part of tmp2. */
259 LS_FW data2, tmp2, offset
261 /* For big-endian we do a byte reverse to avoid carry-propagation
262 problem described above. This way we can reuse the has_nul in the
263 next step and also use syndrome value trick at the end. */
265 #define data1_fixed tmp3
267 #define data1_fixed data1
269 sub has_nul, data1_fixed, zeroones
270 orr tmp3, data1_fixed, #REP8_7f
271 eor diff, data2, data1 /* Non-zero if differences found. */
272 bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
276 cmp limit, neg_offset, lsr #3
277 orr syndrome, diff, has_nul
278 bic syndrome, syndrome, mask /* Ignore later bytes. */
279 csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
280 cbnz tmp3, L(syndrome_check)
282 /* STEP_C: Compare second part of data1 to first part of tmp1. */
283 ldp tmp1, tmp2, [src2], #16
285 LS_BK data2, tmp1, neg_offset
286 eor diff, data2, data1 /* Non-zero if differences found. */
287 orr syndrome, diff, has_nul
288 and syndrome, syndrome, mask /* Ignore earlier bytes. */
289 csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
290 cbnz tmp3, L(syndrome_check)
292 ldr data1, [src1], #8
299 cmp pos, limit, lsl #3