2 * memchr - find a character in a memory zone
4 * Copyright (c) 2014, ARM Limited
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of the company nor the names of its contributors
15 * may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 /* Arguments and results. */
62 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
63 * per byte. For each tuple, bit 0 is set if the relevant byte matched the
64 * requested character and bit 1 is not used (faster than using a 32bit
65 * syndrome). Since the bits in the syndrome reflect exactly the order in which
66 * things occur in the original string, counting trailing zeros allows to
67 * identify exactly which byte has matched.
70 .macro def_fn f p2align=0
79 /* Do not dereference srcin if no bytes to compare. */
80 cbz cntin, .Lzero_length
82 * Magic constant 0x40100401 allows us to identify which lane matches
86 movk wtmp2, #0x4010, lsl #16
87 dup vrepchr.16b, chrin
88 /* Work with aligned 32-byte chunks */
90 dup vrepmask.4s, wtmp2
92 and cntrem, cntin, #31
96 * Input string is not 32-byte aligned. We calculate the syndrome
97 * value for the aligned 32 bytes block containing the first bytes
98 * and mask the irrelevant part.
101 ld1 {vdata1.16b, vdata2.16b}, [src], #32
103 adds cntin, cntin, tmp
104 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
105 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
106 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
107 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
108 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
109 addp vend.16b, vend.16b, vend.16b /* 128->64 */
111 /* Clear the soff*2 lower bits */
115 /* The first block can also be the last */
117 /* Have we found something already? */
121 ld1 {vdata1.16b, vdata2.16b}, [src], #32
122 subs cntin, cntin, #32
123 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
124 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
125 /* If we're out of data we finish regardless of the result */
127 /* Use a fast check for the termination condition */
128 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
129 addp vend.2d, vend.2d, vend.2d
131 /* We're not out of data, loop if we haven't found the character */
135 /* Termination condition found, let's calculate the syndrome value */
136 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
137 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
138 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
139 addp vend.16b, vend.16b, vend.16b /* 128->64 */
141 /* Only do the clear for the last possible block */
145 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
146 add tmp, cntrem, soff
154 /* Count the trailing zeros using bit reversing */
156 /* Compensate the last post-increment */
158 /* Check that we have found a character */
160 /* And count the leading zeros */
162 /* Compute the potential result */
163 add result, src, synd, lsr #1
164 /* Select result or NULL */
165 csel result, xzr, result, eq
172 .size memchr, . - memchr