1 /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
3 * The LLVM Compiler Infrastructure
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===//
10 * This file implements the __udivsi3 (32-bit unsigned integer divide)
11 * function for the ARM 32-bit architecture.
13 *===----------------------------------------------------------------------===*/
15 #include "../assembly.h"
20 #if __ARM_ARCH_ISA_THUMB == 2
25 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
27 @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
28 @ Calculate and return the quotient of the (unsigned) division.
30 #if __ARM_ARCH_ISA_THUMB == 2
31 DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3)
33 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
35 #if __ARM_ARCH_EXT_IDIV__
37 beq LOCAL_LABEL(divby0)
42 bcc LOCAL_LABEL(divby0)
43 #if __ARM_ARCH_ISA_THUMB == 1
44 bne LOCAL_LABEL(num_neq_denom)
46 LOCAL_LABEL(num_neq_denom):
52 #if __ARM_ARCH_ISA_THUMB == 1
53 bhs LOCAL_LABEL(num_ge_denom)
56 LOCAL_LABEL(num_ge_denom):
64 * Implement division using binary long division algorithm.
66 * r0 is the numerator, r1 the denominator.
68 * The code before JMP computes the correct shift I, so that
69 * r0 and (r1 << I) have the highest bit set in the same position.
70 * At the time of JMP, ip := .Ldiv0block - 12 * I.
71 * This depends on the fixed instruction size of block.
72 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
74 * block(shift) implements the test-and-update-quotient core.
75 * It assumes (r0 << shift) can be computed without overflow and
76 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
79 # if defined(__ARM_FEATURE_CLZ)
82 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
84 # if __ARM_ARCH_ISA_THUMB == 2
85 adr ip, LOCAL_LABEL(div0block) + 1
86 sub ip, ip, r3, lsl #1
88 adr ip, LOCAL_LABEL(div0block)
90 sub ip, ip, r3, lsl #2
91 sub ip, ip, r3, lsl #3
94 # else /* No CLZ Feature */
95 # if __ARM_ARCH_ISA_THUMB == 2
96 # error THUMB mode requires CLZ or UDIV
98 # if __ARM_ARCH_ISA_THUMB == 1
99 # define BLOCK_SIZE 10
101 # define BLOCK_SIZE 12
105 # if __ARM_ARCH_ISA_THUMB == 1
107 adr r0, LOCAL_LABEL(div0block)
110 adr ip, LOCAL_LABEL(div0block)
114 # if __ARM_ARCH_ISA_THUMB == 1
115 blo LOCAL_LABEL(skip_16)
117 subs r0, r0, #(16 * BLOCK_SIZE)
118 LOCAL_LABEL(skip_16):
121 subhs ip, ip, #(16 * BLOCK_SIZE)
126 # if __ARM_ARCH_ISA_THUMB == 1
127 blo LOCAL_LABEL(skip_8)
129 subs r0, r0, #(8 * BLOCK_SIZE)
133 subhs ip, ip, #(8 * BLOCK_SIZE)
138 # if __ARM_ARCH_ISA_THUMB == 1
139 blo LOCAL_LABEL(skip_4)
141 subs r0, r0, #(4 * BLOCK_SIZE)
145 subhs ip, #(4 * BLOCK_SIZE)
150 # if __ARM_ARCH_ISA_THUMB == 1
151 blo LOCAL_LABEL(skip_2)
153 subs r0, r0, #(2 * BLOCK_SIZE)
157 subhs ip, ip, #(2 * BLOCK_SIZE)
160 /* Last block, no need to update r2 or r3. */
161 # if __ARM_ARCH_ISA_THUMB == 1
164 blo LOCAL_LABEL(skip_1)
165 subs r0, r0, #(1 * BLOCK_SIZE)
174 subls ip, ip, #(1 * BLOCK_SIZE)
180 # endif /* __ARM_FEATURE_CLZ */
184 /* due to the range limit of branch in Thumb1, we have to place the
188 # if defined(__ARM_EABI__)
189 bl __aeabi_idiv0 // due to relocation limit, can't use b.
194 #if __ARM_ARCH_ISA_THUMB == 1
195 #define block(shift) \
196 lsls r2, r1, IMM shift; \
198 blo LOCAL_LABEL(block_skip_##shift); \
200 LOCAL_LABEL(block_skip_##shift) :; \
201 adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
203 /* TODO: if current location counter is not not word aligned, we don't
204 need the .p2align and nop */
205 /* Label div0block must be word-aligned. First align block 31 */
207 nop /* Padding to align div0block as 31 blocks = 310 bytes */
210 #define block(shift) \
211 cmp r0, r1, lsl IMM shift; \
213 WIDE(addhs) r3, r3, IMM (1 << shift); \
214 WIDE(subhs) r0, r0, r1, lsl IMM shift
248 LOCAL_LABEL(div0block):
253 #endif /* __ARM_ARCH_EXT_IDIV__ */
255 #if __ARM_ARCH_EXT_IDIV__
265 END_COMPILERRT_FUNCTION(__udivsi3)
267 NO_EXEC_STACK_DIRECTIVE