sys/arm64/arm64/bzero.S

   1 /*-
   2  * Copyright (C) 2016 Cavium Inc.
   3  * All rights reserved.
   4  *
   5  * Developed by Semihalf.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <machine/asm.h>
  30 __FBSDID("$FreeBSD$");
  31
  32
  33 #include "assym.inc"
  34
  35         /*
  36          * void bzero(void *p, size_t size)
  37          *
  38          *  x0 - p
  39          *  x1 - size
  40          */
  41 ENTRY(bzero)
  42         cbz     x1, ending
  43
  44         /*
  45          * x5 is number of cache lines to zero - calculated later and
  46          * will become non-zero if  buffer is long enough to zero by
  47          * cache lines (and if it is allowed.)
  48          * We need to zero it before proceeding with buffers of size
  49          * smaller than 16 bytes - otherwise the x5 will not be
  50          * calculated and will retain random value.
  51          * "normal" is used for buffers <= 16 bytes and to align buffer
  52          * to cache line for buffers bigger than cache line; non-0 x5
  53          * after "normal" has completed indicates that it has been used
  54          * to align buffer to cache line and now zero by cache lines will
  55          * be performed, and x5 is amount of cache lines to loop through.
  56          */
  57         mov     x5, xzr
  58
  59         /* No use of cache assisted zero for buffers with size <= 16 */
  60         cmp     x1, #0x10
  61         b.le    normal
  62
  63         /*
  64          * Load size of line that will be cleaned by dc zva call.
  65          * 0 means that the instruction is not allowed
  66          */
  67         ldr     x7, =dczva_line_size
  68         ldr     x7, [x7]
  69         cbz     x7, normal
  70
  71         /*
  72          * Buffer must be larger than cache line for using cache zeroing
  73          * (and cache line aligned but this is checked after jump)
  74          */
  75         cmp     x1, x7
  76         b.lt    normal
  77
  78         /*
  79          * Calculate number of bytes to cache aligned address (x4) nad
  80          * number of full cache lines (x5). x6 is final address to zero.
  81          */
  82         sub     x2, x7, #0x01
  83         mov     x3, -1
  84         eor     x3, x3, x2
  85         add     x4, x0, x2
  86         and     x4, x4, x3
  87         subs    x4, x4, x0
  88         b.eq    normal
  89
  90         /* Calculate number of "lines" in buffer */
  91         sub     x5, x1, x4
  92         rbit    x2, x7
  93         clz     x2, x2
  94         lsr     x5, x5, x2
  95
  96         /*
  97          * If number of cache lines is 0, we will not be able to zero
  98          * by cache lines, so go normal way.
  99          */
 100         cbz     x5, normal
 101         /* x6 is final address to zero */
 102         add     x6, x0, x1
 103
 104         /*
 105          * We are here because x5 is non-0 so normal will be used to
 106          * align buffer before cache zeroing. x4 holds number of bytes
 107          * needed for alignment.
 108          */
 109         mov     x1, x4
 110
 111         /* When jumping here: x0 holds pointer, x1 holds size */
 112 normal:
 113         /*
 114          * Get buffer offset into 16 byte aligned address; 0 means pointer
 115          * is aligned.
 116          */
 117         ands    x2, x0, #0x0f
 118         b.eq    aligned_to_16
 119         /* Calculate one-byte loop runs to 8 byte aligned address. */
 120         ands    x2, x2, #0x07
 121         mov     x3, #0x08
 122         sub     x2, x3, x2
 123         /* x2 is number of bytes missing for alignment, x1 is buffer size */
 124         cmp     x1, x2
 125         csel    x2, x1, x2, le
 126         sub     x1, x1, x2
 127
 128         /*
 129          * Byte by byte copy will copy at least enough bytes to align
 130          * pointer and at most "size".
 131          */
 132 align:
 133         strb    wzr, [x0], #0x01
 134         subs    x2, x2, #0x01
 135         b.ne    align
 136
 137         /* Now pointer is aligned to 8 bytes */
 138         cmp     x1, #0x10
 139         b.lt    lead_out
 140         /*
 141          * Check if copy of another 8 bytes is needed to align to 16 byte
 142          * address and do it
 143          */
 144         tbz     x0, #0x03, aligned_to_16
 145         str     xzr, [x0], #0x08
 146         sub     x1, x1, #0x08
 147
 148         /* While jumping here: x0 is 16 byte alligned address, x1 is size */
 149 aligned_to_16:
 150         /* If size is less than 16 bytes, use lead_out to copy what remains */
 151         cmp     x1, #0x10
 152         b.lt    lead_out
 153
 154         lsr     x2, x1, #0x04
 155 zero_by_16:
 156         stp     xzr, xzr, [x0], #0x10
 157         subs    x2, x2, #0x01
 158         b.ne    zero_by_16
 159
 160         /*
 161          * Lead out requires addresses to be aligned to 8 bytes. It is used to
 162          * zero buffers with sizes < 16 and what can not be zeroed by
 163          * zero_by_16 loop.
 164          */
 165         ands    x1, x1, #0x0f
 166         b.eq    lead_out_end
 167 lead_out:
 168         tbz     x1, #0x03, lead_out_dword
 169         str     xzr, [x0], #0x08
 170 lead_out_dword:
 171         tbz     x1, #0x02, lead_out_word
 172         str     wzr, [x0], #0x04
 173 lead_out_word:
 174         tbz     x1, #0x01, lead_out_byte
 175         strh    wzr, [x0], #0x02
 176 lead_out_byte:
 177         tbz     x1, #0x00, lead_out_end
 178         strb    wzr, [x0], #0x01
 179
 180 lead_out_end:
 181         /*
 182          * If x5 is non-zero, this means that normal has been used as
 183          * a lead in to align buffer address to cache size
 184          */
 185         cbz     x5, ending
 186
 187         /*
 188          * Here x5 holds number of lines to zero; x6 is final address of
 189          * buffer. x0 is cache line aligned pointer. x7 is cache line size
 190          * in bytes
 191          */
 192 cache_line_zero:
 193         dc      zva, x0
 194         add     x0, x0, x7
 195         subs    x5, x5, #0x01
 196         b.ne    cache_line_zero
 197
 198         /* Need to zero remaining bytes? */
 199         subs    x1, x6, x0
 200         b.ne    normal
 201
 202 ending:
 203         ret
 204
 205 END(bzero)
 206