2 strchr - find a character in a string
4 Copyright (c) 2014, ARM Limited
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of the company nor the names of its contributors
15 may be used to endorse or promote products derived from this
16 software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* Arguments and results. */
55 #define vrepmask_c v16
61 For each 32-byte hunk we calculate a 64-bit syndrome value, with
62 two bits per byte (LSB is always in bits 0 and 1, for both big
63 and little-endian systems). For each tuple, bit 0 is set iff
64 the relevant byte matched the requested character; bit 1 is set
65 iff the relevant byte matched the NUL end of string (we trigger
66 off bit0 for the special case of looking for NUL). Since the bits
67 in the syndrome reflect exactly the order in which things occur
68 in the original string a count_trailing_zeros() operation will
69 identify exactly which byte is causing the termination, and why. */
71 /* Locals and temporaries. */
73 .macro def_fn f p2align=0
87 def_alias strchr index
88 /* Magic constant 0x40100401 to allow us to identify which lane
89 matches the requested byte. Magic constant 0x80200802 used
90 similarly for NUL termination. */
92 movk wtmp2, #0x4010, lsl #16
93 dup vrepchr.16b, chrin
94 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
95 dup vrepmask_c.4s, wtmp2
97 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
100 /* Input string is not 32-byte aligned. Rather than forcing
101 the padding bytes to a safe value, we calculate the syndrome
102 for all the bytes, but then mask off those bits of the
103 syndrome that are related to the padding. */
104 ld1 {vdata1.16b, vdata2.16b}, [src], #32
106 cmeq vhas_nul1.16b, vdata1.16b, #0
107 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
108 cmeq vhas_nul2.16b, vdata2.16b, #0
109 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
110 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
111 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
112 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
113 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
114 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
115 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
117 addp vend1.16b, vend1.16b, vend2.16b // 256->128
119 addp vend1.16b, vend1.16b, vend2.16b // 128->64
123 bic tmp1, tmp3, tmp1 // Mask padding bits.
127 ld1 {vdata1.16b, vdata2.16b}, [src], #32
128 cmeq vhas_nul1.16b, vdata1.16b, #0
129 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
130 cmeq vhas_nul2.16b, vdata2.16b, #0
131 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
132 /* Use a fast check for the termination condition. */
133 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
134 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
135 orr vend1.16b, vend1.16b, vend2.16b
136 addp vend1.2d, vend1.2d, vend1.2d
140 /* Termination condition found. Now need to establish exactly why
142 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
143 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
144 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
145 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
146 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
147 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
148 addp vend1.16b, vend1.16b, vend2.16b // 256->128
149 addp vend1.16b, vend1.16b, vend2.16b // 128->64
153 /* Count the trailing zeros, by bit reversing... */
155 /* Re-bias source. */
157 clz tmp1, tmp1 /* And counting the leading zeros. */
158 /* Tmp1 is even if the target charager was found first. Otherwise
159 we've found the end of string and we weren't looking for NUL. */
161 add result, src, tmp1, lsr #1
162 csel result, result, xzr, eq
165 .size strchr, . - strchr