/*
 * This m4 code has been taken from The SPARC Architecture Manual Version 8.
 */
/*
 * Division/Remainder
 *
 * Input is:
 *   dividend -- the thing being divided
 *   divisor -- how many ways to divide it
 * Important parameters:
 *   N -- how many bits per iteration we try to get
 *        as our current guess:
 *   WORDSIZE -- how many bits altogether we're talking about:
 *               obviously:
 * A derived constant:
 *   TOPBITS -- how many bits are in the top "decade" of a number:
 *
 * Important variables are:
 *   Q -- the partial quotient under development -- initially 0
 *   R -- the remainder so far -- initially == the dividend
 *   ITER -- number of iterations of the main division loop which will
 *           be required. Equal to CEIL( lg2(quotient)/4 )
 *           Note that this is log_base_(2ˆ4) of the quotient.
 *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
 * Cost:
 *   current estimate for non-large dividend is
 *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
 *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
 *   different path, as the upper bits of the quotient must be developed
 *   one bit at a time.
 *   This uses the m4 and cpp macro preprocessors.
 */
/*
 * This is the recursive definition of how we develop quotient digits.
 * It takes three important parameters:
 *   $1 -- the current depth, 1<=$1<=4
 *   $2 -- the current accumulation of quotient bits
 *   4 -- max depth
 * We add a new bit to $2 and either recurse or insert the bits in the quotient.
 * Dynamic input:
 *   %o3 -- current remainder
 *   %o2 -- current quotient
 *   %o5 -- current comparand
 *   cc -- set on current value of %o3
 * Dynamic output:
 *   %o3', %o2', %o5', cc'
 */
#include "../assembly.h"
.text
	.align 32
DEFINE_COMPILERRT_FUNCTION(__udivsi3)
	b	divide
	mov	0,%g3			! result always nonnegative
.text
	.align 32
DEFINE_COMPILERRT_FUNCTION(__divsi3)
	orcc	%o1,%o0,%g0	! are either %o0 or %o1 negative
	bge	divide			! if not, skip this junk
	xor	%o1,%o0,%g3	! record sign of result in sign of %g3
	tst	%o1
	bge	2f
	tst	%o0
	! %o1 < 0
	bge	divide
	neg	%o1
	2:
	! %o0 < 0
	neg	%o0
	! FALL THROUGH
divide:
	! Compute size of quotient, scale comparand.
	orcc	%o1,%g0,%o5		! movcc %o1,%o5
	te	2			! if %o1 = 0
	mov	%o0,%o3
	mov	0,%o2
	sethi	%hi(1<<(32-4 -1)),%g1
	cmp	%o3,%g1
	blu	not_really_big
	mov	0,%o4
	!
	! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
	! as our usual 4-at-a-shot divide step will cause overflow and havoc.
	! The total number of bits in the result here is 4*%o4+%g2, where
	! %g2 <= 4.
	! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
! the top decade: so don't even bother to compare to %o3.
1:
	cmp	%o5,%g1
	bgeu	3f
	mov	1,%g2
	sll	%o5,4,%o5
	b	1b
	inc	%o4
! Now compute %g2
2:	addcc	%o5,%o5,%o5
	bcc	not_too_big
	add	%g2,1,%g2
		! We're here if the %o1 overflowed when Shifting.
		! This means that %o3 has the high-order bit set.
		! Restore %o5 and subtract from %o3.
		sll	%g1,4 ,%g1	! high order bit
		srl	%o5,1,%o5		! rest of %o5
		add	%o5,%g1,%o5
		b	do_single_div
		dec	%g2
not_too_big:
3:	cmp	%o5,%o3
	blu	2b
	nop
	be	do_single_div
	nop
! %o5 > %o3: went too far: back up 1 step
!     srl %o5,1,%o5
!      dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
do_single_div:
	deccc	%g2
	bl	end_regular_divide
	nop
	sub	%o3,%o5,%o3
	mov	1,%o2
	b,a	end_single_divloop
	! EMPTY
single_divloop:
	sll	%o2,1,%o2
	bl	1f
	srl	%o5,1,%o5
	! %o3 >= 0
		sub	%o3,%o5,%o3
		b	2f
		inc	%o2
	1:	! %o3 < 0
		add	%o3,%o5,%o3
		dec	%o2
	2:
	end_single_divloop:
		deccc	%g2
		bge	single_divloop
		tst	%o3
		b,a	end_regular_divide
		! EMPTY
not_really_big:
1:
	sll	%o5,4,%o5
	cmp	%o5,%o3
	bleu	1b
	inccc	%o4
	be	got_result
	dec	%o4
do_regular_divide:
	! Do the main division iteration
	tst	%o3
	! Fall through into divide loop
divloop:
	sll	%o2,4,%o2
		!depth 1, accumulated bits 0
	bl	L.1.16
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 2, accumulated bits 1
	bl	L.2.17
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 3, accumulated bits 3
	bl	L.3.19
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits 7
	bl	L.4.23
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (7*2+1), %o2
L.4.23:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (7*2-1), %o2
L.3.19:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits 5
	bl	L.4.21
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (5*2+1), %o2
L.4.21:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (5*2-1), %o2
L.2.17:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 3, accumulated bits 1
	bl	L.3.17
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits 3
	bl	L.4.19
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (3*2+1), %o2
L.4.19:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (3*2-1), %o2
L.3.17:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits 1
	bl	L.4.17
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (1*2+1), %o2
L.4.17:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (1*2-1), %o2
L.1.16:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 2, accumulated bits -1
	bl	L.2.15
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 3, accumulated bits -1
	bl	L.3.15
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits -1
	bl	L.4.15
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-1*2+1), %o2
L.4.15:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-1*2-1), %o2
L.3.15:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits -3
	bl	L.4.13
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-3*2+1), %o2
L.4.13:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-3*2-1), %o2
L.2.15:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 3, accumulated bits -3
	bl	L.3.13
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits -5
	bl	L.4.11
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-5*2+1), %o2
L.4.11:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-5*2-1), %o2
L.3.13:
	! remainder is negative
	addcc	%o3,%o5,%o3
	 	!depth 4, accumulated bits -7
	bl	L.4.9
	srl	%o5,1,%o5
	! remainder is nonnegative
	subcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-7*2+1), %o2
L.4.9:
	! remainder is negative
	addcc	%o3,%o5,%o3
		b	9f
		add	%o2, (-7*2-1), %o2
	9:
end_regular_divide:
	deccc	%o4
	bge	divloop
	tst	%o3
	bl,a	got_result
	! non-restoring fixup if remainder < 0, otherwise annulled
	dec	%o2
got_result:
	tst	%g3
	bl,a	1f
	! negate for answer < 0, otherwise annulled
	neg	%o2,%o2			! %o2 <- -%o2
1:
	retl				! leaf-routine return
	mov	%o2,%o0			! quotient <- %o2