1 //===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the __udivsi3 (32-bit unsigned integer divide)
10 // function for the ARM 32-bit architecture.
12 //===----------------------------------------------------------------------===//
14 #include "../assembly.h"
22 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
24 @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
25 @ Calculate and return the quotient of the (unsigned) division.
27 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
28 #if __ARM_ARCH_EXT_IDIV__
30 beq LOCAL_LABEL(divby0)
42 #else // ! __ARM_ARCH_EXT_IDIV__
44 bcc LOCAL_LABEL(divby0)
45 #if defined(USE_THUMB_1)
46 bne LOCAL_LABEL(num_neq_denom)
48 LOCAL_LABEL(num_neq_denom):
54 #if defined(USE_THUMB_1)
55 bhs LOCAL_LABEL(num_ge_denom)
58 LOCAL_LABEL(num_ge_denom):
65 // Implement division using binary long division algorithm.
67 // r0 is the numerator, r1 the denominator.
69 // The code before JMP computes the correct shift I, so that
70 // r0 and (r1 << I) have the highest bit set in the same position.
71 // At the time of JMP, ip := .Ldiv0block - 12 * I.
72 // This depends on the fixed instruction size of block.
73 // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
75 // block(shift) implements the test-and-update-quotient core.
76 // It assumes (r0 << shift) can be computed without overflow and
77 // that (r0 << shift) < 2 * r1. The quotient is stored in r3.
79 # if defined(__ARM_FEATURE_CLZ)
82 // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
84 # if defined(USE_THUMB_2)
85 adr ip, LOCAL_LABEL(div0block) + 1
86 sub ip, ip, r3, lsl #1
88 adr ip, LOCAL_LABEL(div0block)
90 sub ip, ip, r3, lsl #2
91 sub ip, ip, r3, lsl #3
94 # else // No CLZ Feature
95 # if defined(USE_THUMB_2)
96 # error THUMB mode requires CLZ or UDIV
98 # if defined(USE_THUMB_1)
99 # define BLOCK_SIZE 10
101 # define BLOCK_SIZE 12
105 # if defined(USE_THUMB_1)
107 adr r0, LOCAL_LABEL(div0block)
110 adr ip, LOCAL_LABEL(div0block)
114 # if defined(USE_THUMB_1)
115 blo LOCAL_LABEL(skip_16)
117 subs r0, r0, #(16 * BLOCK_SIZE)
118 LOCAL_LABEL(skip_16):
121 subhs ip, ip, #(16 * BLOCK_SIZE)
126 # if defined(USE_THUMB_1)
127 blo LOCAL_LABEL(skip_8)
129 subs r0, r0, #(8 * BLOCK_SIZE)
133 subhs ip, ip, #(8 * BLOCK_SIZE)
138 # if defined(USE_THUMB_1)
139 blo LOCAL_LABEL(skip_4)
141 subs r0, r0, #(4 * BLOCK_SIZE)
145 subhs ip, #(4 * BLOCK_SIZE)
150 # if defined(USE_THUMB_1)
151 blo LOCAL_LABEL(skip_2)
153 subs r0, r0, #(2 * BLOCK_SIZE)
157 subhs ip, ip, #(2 * BLOCK_SIZE)
160 // Last block, no need to update r2 or r3.
161 # if defined(USE_THUMB_1)
164 blo LOCAL_LABEL(skip_1)
165 subs r0, r0, #(1 * BLOCK_SIZE)
174 subls ip, ip, #(1 * BLOCK_SIZE)
180 # endif // __ARM_FEATURE_CLZ
184 // due to the range limit of branch in Thumb1, we have to place the
188 # if defined(__ARM_EABI__)
190 bl __aeabi_idiv0 // due to relocation limit, can't use b.
197 #if defined(USE_THUMB_1)
198 #define block(shift) \
199 lsls r2, r1, IMM shift; \
201 blo LOCAL_LABEL(block_skip_##shift); \
203 LOCAL_LABEL(block_skip_##shift) :; \
204 adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
206 // TODO: if current location counter is not not word aligned, we don't
207 // need the .p2align and nop
208 // Label div0block must be word-aligned. First align block 31
210 nop // Padding to align div0block as 31 blocks = 310 bytes
213 #define block(shift) \
214 cmp r0, r1, lsl IMM shift; \
216 WIDE(addhs) r3, r3, IMM (1 << shift); \
217 WIDE(subhs) r0, r0, r1, lsl IMM shift
251 LOCAL_LABEL(div0block):
256 #endif // __ARM_ARCH_EXT_IDIV__
258 END_COMPILERRT_FUNCTION(__udivsi3)
260 NO_EXEC_STACK_DIRECTIVE