1 /* Copyright (c) 2012, Linaro Limited
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of the Linaro nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
32 .macro def_fn f p2align=0
40 #define REP8_01 0x0101010101010101
41 #define REP8_7f 0x7f7f7f7f7f7f7f7f
42 #define REP8_80 0x8080808080808080
44 /* Parameters and result. */
49 /* Internal variables. */
63 /* Start of performance-critical section -- one 64B cache line. */
64 def_fn strcmp p2align=6
66 mov zeroones, #REP8_01
71 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
72 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
73 can be done in parallel across the entire word. */
78 sub tmp1, data1, zeroones
79 orr tmp2, data1, #REP8_7f
80 eor diff, data1, data2 /* Non-zero if differences found. */
81 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
82 orr syndrome, diff, has_nul
83 cbz syndrome, .Lloop_aligned
84 /* End of performance-critical section -- one 64B cache line. */
87 rev syndrome, syndrome
89 /* The MS-non-zero bit of the syndrome marks either the first bit
90 that is different, or the top bit of the first zero byte.
91 Shifting left now will bring the critical information into the
97 /* But we need to zero-extend (char is unsigned) the value and then
98 perform a signed 32-bit subtraction. */
100 sub result, data1, data2, lsr #56
103 /* For big-endian we cannot use the trick with the syndrome value
104 as carry-propagation can corrupt the upper bits if the trailing
105 bytes in the string contain 0x01. */
106 /* However, if there is no NUL byte in the dword, we can generate
107 the result directly. We can't just subtract the bytes as the
108 MSB might be significant. */
112 cneg result, result, lo
115 /* Re-compute the NUL-byte detection, using a byte-reversed value. */
117 sub tmp1, tmp3, zeroones
118 orr tmp2, tmp3, #REP8_7f
119 bic has_nul, tmp1, tmp2
121 orr syndrome, diff, has_nul
123 /* The MS-non-zero bit of the syndrome marks either the first bit
124 that is different, or the top bit of the first zero byte.
125 Shifting left now will bring the critical information into the
127 lsl data1, data1, pos
128 lsl data2, data2, pos
129 /* But we need to zero-extend (char is unsigned) the value and then
130 perform a signed 32-bit subtraction. */
131 lsr data1, data1, #56
132 sub result, data1, data2, lsr #56
137 /* Sources are mutually aligned, but are not currently at an
138 alignment boundary. Round down the addresses and then mask off
139 the bytes that preceed the start point. */
142 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
143 ldr data1, [src1], #8
144 neg tmp1, tmp1 /* Bits to alignment -64. */
145 ldr data2, [src2], #8
148 /* Big-endian. Early bytes are at MSB. */
149 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
151 /* Little-endian. Early bytes are at LSB. */
152 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
154 orr data1, data1, tmp2
155 orr data2, data2, tmp2
159 /* We can do better than this. */
160 ldrb data1w, [src1], #1
161 ldrb data2w, [src2], #1
163 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
165 sub result, data1, data2