1 /* strnlen - calculate the length of a string with limit.
3 Copyright (c) 2013, Linaro Limited
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Linaro nor the
14 names of its contributors may be used to endorse or promote products
15 derived from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
34 /* Arguments and results. */
39 /* Locals and temporaries. */
54 .macro def_fn f p2align=0
62 #define REP8_01 0x0101010101010101
63 #define REP8_7f 0x7f7f7f7f7f7f7f7f
64 #define REP8_80 0x8080808080808080
69 /* Pre-pad to ensure critical loop begins an icache line. */
73 /* Put this code here to avoid wasting more space with pre-padding. */
79 cbz limit, .Lhit_limit
80 mov zeroones, #REP8_01
84 /* Calculate the number of full and partial words -1. */
85 sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
86 lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
88 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
89 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
90 can be done in parallel across the entire word. */
91 /* The inner loop deals with two Dwords at a time. This has a
92 slightly higher start-up cost, but we should win quite quickly,
93 especially on cores with a high number of issue slots per
94 cycle, as we get much better parallelism out of the operations. */
96 /* Start of critial section -- keep to one 64Byte cache line. */
98 ldp data1, data2, [src], #16
100 sub tmp1, data1, zeroones
101 orr tmp2, data1, #REP8_7f
102 sub tmp3, data2, zeroones
103 orr tmp4, data2, #REP8_7f
104 bic has_nul1, tmp1, tmp2
105 bic has_nul2, tmp3, tmp4
106 subs limit_wd, limit_wd, #1
107 orr tmp1, has_nul1, has_nul2
108 ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
110 /* End of critical section -- keep to one 64Byte cache line. */
112 orr tmp1, has_nul1, has_nul2
113 cbz tmp1, .Lhit_limit /* No null in final Qword. */
115 /* We know there's a null in the final Qword. The easiest thing
116 to do now is work out the length of the string and return
120 cbz has_nul1, .Lnul_in_data2
125 mov has_nul2, has_nul1
128 /* For big-endian, carry propagation (if the final byte in the
129 string is 0x01) means we cannot use has_nul directly. The
130 easiest way to get the correct byte is to byte-swap the data
131 and calculate the syndrome a second time. */
133 sub tmp1, data2, zeroones
134 orr tmp2, data2, #REP8_7f
135 bic has_nul2, tmp1, tmp2
138 rev has_nul2, has_nul2
140 add len, len, pos, lsr #3 /* Bits to bytes. */
142 csel len, len, limit, ls /* Return the lower value. */
146 /* Deal with a partial first word.
147 We're doing two things in parallel here;
148 1) Calculate the number of words (but avoiding overflow if
149 limit is near ULONG_MAX) - to do this we need to work out
150 limit + tmp1 - 1 as a 65-bit value before shifting it;
151 2) Load and mask the initial data words - we force the bytes
152 before the ones we are interested in to 0xff - this ensures
153 early bytes will not hit any zero detection. */
154 sub limit_wd, limit, #1
158 and tmp3, limit_wd, #15
159 lsr limit_wd, limit_wd, #4
162 ldp data1, data2, [src], #16
163 lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
167 /* Big-endian. Early bytes are at MSB. */
168 lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
170 /* Little-endian. Early bytes are at LSB. */
171 lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
173 add limit_wd, limit_wd, tmp3, lsr #4
175 orr data1, data1, tmp2
176 orr data2a, data2, tmp2
178 csinv data1, data1, xzr, le
179 csel data2, data2, data2a, le
181 .size strnlen, . - .Lstart /* Include pre-padding in size. */