2 strchrnul - find a character or nul in a string
4 Copyright (c) 2014, ARM Limited
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of the company nor the names of its contributors
15 may be used to endorse or promote products derived from this
16 software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* Arguments and results. */
59 For each 32-byte hunk we calculate a 64-bit syndrome value, with
60 two bits per byte (LSB is always in bits 0 and 1, for both big
61 and little-endian systems). For each tuple, bit 0 is set iff
62 the relevant byte matched the requested character or nul. Since the
63 bits in the syndrome reflect exactly the order in which things occur
64 in the original string a count_trailing_zeros() operation will
65 identify exactly which byte is causing the termination. */
67 /* Locals and temporaries. */
69 .macro def_fn f p2align=0
78 /* Magic constant 0x40100401 to allow us to identify which lane
79 matches the termination condition. */
81 movk wtmp2, #0x4010, lsl #16
82 dup vrepchr.16b, chrin
83 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
84 dup vrepmask.4s, wtmp2
88 /* Input string is not 32-byte aligned. Rather than forcing
89 the padding bytes to a safe value, we calculate the syndrome
90 for all the bytes, but then mask off those bits of the
91 syndrome that are related to the padding. */
92 ld1 {vdata1.16b, vdata2.16b}, [src], #32
94 cmeq vhas_nul1.16b, vdata1.16b, #0
95 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
96 cmeq vhas_nul2.16b, vdata2.16b, #0
97 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
98 orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
99 orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
100 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
101 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
103 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
105 addp vend1.16b, vend1.16b, vend1.16b // 128->64
109 bic tmp1, tmp3, tmp1 // Mask padding bits.
113 ld1 {vdata1.16b, vdata2.16b}, [src], #32
114 cmeq vhas_nul1.16b, vdata1.16b, #0
115 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
116 cmeq vhas_nul2.16b, vdata2.16b, #0
117 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
118 /* Use a fast check for the termination condition. */
119 orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
120 orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
121 orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
122 addp vend1.2d, vend1.2d, vend1.2d
126 /* Termination condition found. Now need to establish exactly why
128 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
129 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
130 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
131 addp vend1.16b, vend1.16b, vend1.16b // 128->64
135 /* Count the trailing zeros, by bit reversing... */
137 /* Re-bias source. */
139 clz tmp1, tmp1 /* ... and counting the leading zeros. */
140 /* tmp1 is twice the offset into the fragment. */
141 add result, src, tmp1, lsr #1
144 .size strchrnul, . - strchrnul