2 * Copyright (C) 2016 Cavium Inc.
5 * Developed by Semihalf.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <machine/asm.h>
30 __FBSDID("$FreeBSD$");
36 * void bzero(void *p, size_t size)
45 * x5 is number of cache lines to zero - calculated later and
46 * will become non-zero if buffer is long enough to zero by
47 * cache lines (and if it is allowed.)
48 * We need to zero it before proceeding with buffers of size
49 * smaller than 16 bytes - otherwise the x5 will not be
50 * calculated and will retain random value.
51 * "normal" is used for buffers <= 16 bytes and to align buffer
52 * to cache line for buffers bigger than cache line; non-0 x5
53 * after "normal" has completed indicates that it has been used
54 * to align buffer to cache line and now zero by cache lines will
55 * be performed, and x5 is amount of cache lines to loop through.
59 /* No use of cache assisted zero for buffers with size <= 16 */
64 * Load size of line that will be cleaned by dc zva call.
65 * 0 means that the instruction is not allowed
67 ldr x7, =dczva_line_size
72 * Buffer must be larger than cache line for using cache zeroing
73 * (and cache line aligned but this is checked after jump)
79 * Calculate number of bytes to cache aligned address (x4) nad
80 * number of full cache lines (x5). x6 is final address to zero.
90 /* Calculate number of "lines" in buffer */
97 * If number of cache lines is 0, we will not be able to zero
98 * by cache lines, so go normal way.
101 /* x6 is final address to zero */
105 * We are here because x5 is non-0 so normal will be used to
106 * align buffer before cache zeroing. x4 holds number of bytes
107 * needed for alignment.
111 /* When jumping here: x0 holds pointer, x1 holds size */
114 * Get buffer offset into 16 byte aligned address; 0 means pointer
119 /* Calculate one-byte loop runs to 8 byte aligned address. */
123 /* x2 is number of bytes missing for alignment, x1 is buffer size */
129 * Byte by byte copy will copy at least enough bytes to align
130 * pointer and at most "size".
133 strb wzr, [x0], #0x01
137 /* Now pointer is aligned to 8 bytes */
141 * Check if copy of another 8 bytes is needed to align to 16 byte
144 tbz x0, #0x03, aligned_to_16
148 /* While jumping here: x0 is 16 byte alligned address, x1 is size */
150 /* If size is less than 16 bytes, use lead_out to copy what remains */
156 stp xzr, xzr, [x0], #0x10
161 * Lead out requires addresses to be aligned to 8 bytes. It is used to
162 * zero buffers with sizes < 16 and what can not be zeroed by
168 tbz x1, #0x03, lead_out_dword
171 tbz x1, #0x02, lead_out_word
174 tbz x1, #0x01, lead_out_byte
175 strh wzr, [x0], #0x02
177 tbz x1, #0x00, lead_out_end
178 strb wzr, [x0], #0x01
182 * If x5 is non-zero, this means that normal has been used as
183 * a lead in to align buffer address to cache size
188 * Here x5 holds number of lines to zero; x6 is final address of
189 * buffer. x0 is cache line aligned pointer. x7 is cache line size
198 /* Need to zero remaining bytes? */