contrib/compiler-rt/lib/sparc64/modsi3.S

   1 /*
   2  * This m4 code has been taken from The SPARC Architecture Manual Version 8.
   3  */
   4 /*
   5  * Division/Remainder
   6  *
   7  * Input is:
   8  *   dividend -- the thing being divided
   9  *   divisor -- how many ways to divide it
  10  * Important parameters:
  11  *   N -- how many bits per iteration we try to get
  12  *        as our current guess:
  13  *   WORDSIZE -- how many bits altogether we're talking about:
  14  *               obviously:
  15  * A derived constant:
  16  *   TOPBITS -- how many bits are in the top "decade" of a number:
  17  *
  18  * Important variables are:
  19  *   Q -- the partial quotient under development -- initially 0
  20  *   R -- the remainder so far -- initially == the dividend
  21  *   ITER -- number of iterations of the main division loop which will
  22  *           be required. Equal to CEIL( lg2(quotient)/4 )
  23  *           Note that this is log_base_(2ˆ4) of the quotient.
  24  *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
  25  * Cost:
  26  *   current estimate for non-large dividend is
  27  *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
  28  *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
  29  *   different path, as the upper bits of the quotient must be developed
  30  *   one bit at a time.
  31  *   This uses the m4 and cpp macro preprocessors.
  32  */
  33 /*
  34  * This is the recursive definition of how we develop quotient digits.
  35  * It takes three important parameters:
  36  *   $1 -- the current depth, 1<=$1<=4
  37  *   $2 -- the current accumulation of quotient bits
  38  *   4 -- max depth
  39  * We add a new bit to $2 and either recurse or insert the bits in the quotient.
  40  * Dynamic input:
  41  *   %o3 -- current remainder
  42  *   %o2 -- current quotient
  43  *   %o5 -- current comparand
  44  *   cc -- set on current value of %o3
  45  * Dynamic output:
  46  *   %o3', %o2', %o5', cc'
  47  */
  48 #include "../assembly.h"
  49 .text
  50         .align 32
  51 DEFINE_COMPILERRT_FUNCTION(__umodsi3)
  52         b       divide
  53         mov     0,%g3                   ! result always nonnegative
  54 .text
  55         .align 32
  56 DEFINE_COMPILERRT_FUNCTION(__modsi3)
  57         orcc    %o1,%o0,%g0     ! are either %o0 or %o1 negative
  58         bge     divide                  ! if not, skip this junk
  59         mov     %o0,%g3         ! record sign of result in sign of %g3
  60         tst     %o1
  61         bge     2f
  62         tst     %o0
  63         ! %o1 < 0
  64         bge     divide
  65         neg     %o1
  66         2:
  67         ! %o0 < 0
  68         neg     %o0
  69         ! FALL THROUGH
  70 divide:
  71         ! Compute size of quotient, scale comparand.
  72         orcc    %o1,%g0,%o5             ! movcc %o1,%o5
  73         te      2                       ! if %o1 = 0
  74         mov     %o0,%o3
  75         mov     0,%o2
  76         sethi   %hi(1<<(32-4 -1)),%g1
  77         cmp     %o3,%g1
  78         blu     not_really_big
  79         mov     0,%o4
  80         !
  81         ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
  82         ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
  83         ! The total number of bits in the result here is 4*%o4+%g2, where
  84         ! %g2 <= 4.
  85         ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
  86 ! the top decade: so don't even bother to compare to %o3.
  87 1:
  88         cmp     %o5,%g1
  89         bgeu    3f
  90         mov     1,%g2
  91         sll     %o5,4,%o5
  92         b       1b
  93         inc     %o4
  94 ! Now compute %g2
  95 2:      addcc   %o5,%o5,%o5
  96         bcc     not_too_big
  97         add     %g2,1,%g2
  98                 ! We're here if the %o1 overflowed when Shifting.
  99                 ! This means that %o3 has the high-order bit set.
 100                 ! Restore %o5 and subtract from %o3.
 101                 sll     %g1,4 ,%g1      ! high order bit
 102                 srl     %o5,1,%o5               ! rest of %o5
 103                 add     %o5,%g1,%o5
 104                 b       do_single_div
 105                 dec     %g2
 106 not_too_big:
 107 3:      cmp     %o5,%o3
 108         blu     2b
 109         nop
 110         be      do_single_div
 111         nop
 112 ! %o5 > %o3: went too far: back up 1 step
 113 !     srl %o5,1,%o5
 114 !      dec %g2
 115 ! do single-bit divide steps
 116 !
 117 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
 118 ! first divide step without thinking. BUT, the others are conditional,
 119 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
 120 ! order bit set in the first step, just falling into the regular
 121 ! division loop will mess up the first time around.
 122 ! So we unroll slightly...
 123 do_single_div:
 124         deccc   %g2
 125         bl      end_regular_divide
 126         nop
 127         sub     %o3,%o5,%o3
 128         mov     1,%o2
 129         b,a     end_single_divloop
 130         ! EMPTY
 131 single_divloop:
 132         sll     %o2,1,%o2
 133         bl      1f
 134         srl     %o5,1,%o5
 135         ! %o3 >= 0
 136                 sub     %o3,%o5,%o3
 137                 b       2f
 138                 inc     %o2
 139         1:      ! %o3 < 0
 140                 add     %o3,%o5,%o3
 141                 dec     %o2
 142         2:
 143         end_single_divloop:
 144                 deccc   %g2
 145                 bge     single_divloop
 146                 tst     %o3
 147                 b,a     end_regular_divide
 148                 ! EMPTY
 149 not_really_big:
 150 1:
 151         sll     %o5,4,%o5
 152         cmp     %o5,%o3
 153         bleu    1b
 154         inccc   %o4
 155         be      got_result
 156         dec     %o4
 157 do_regular_divide:
 158         ! Do the main division iteration
 159         tst     %o3
 160         ! Fall through into divide loop
 161 divloop:
 162         sll     %o2,4,%o2
 163                 !depth 1, accumulated bits 0
 164         bl      L.1.16
 165         srl     %o5,1,%o5
 166         ! remainder is nonnegative
 167         subcc   %o3,%o5,%o3
 168                 !depth 2, accumulated bits 1
 169         bl      L.2.17
 170         srl     %o5,1,%o5
 171         ! remainder is nonnegative
 172         subcc   %o3,%o5,%o3
 173                 !depth 3, accumulated bits 3
 174         bl      L.3.19
 175         srl     %o5,1,%o5
 176         ! remainder is nonnegative
 177         subcc   %o3,%o5,%o3
 178                 !depth 4, accumulated bits 7
 179         bl      L.4.23
 180         srl     %o5,1,%o5
 181         ! remainder is nonnegative
 182         subcc   %o3,%o5,%o3
 183                 b       9f
 184                 add     %o2, (7*2+1), %o2
 185 L.4.23:
 186         ! remainder is negative
 187         addcc   %o3,%o5,%o3
 188                 b       9f
 189                 add     %o2, (7*2-1), %o2
 190 L.3.19:
 191         ! remainder is negative
 192         addcc   %o3,%o5,%o3
 193                 !depth 4, accumulated bits 5
 194         bl      L.4.21
 195         srl     %o5,1,%o5
 196         ! remainder is nonnegative
 197         subcc   %o3,%o5,%o3
 198                 b       9f
 199                 add     %o2, (5*2+1), %o2
 200 L.4.21:
 201         ! remainder is negative
 202         addcc   %o3,%o5,%o3
 203                 b       9f
 204                 add     %o2, (5*2-1), %o2
 205 L.2.17:
 206         ! remainder is negative
 207         addcc   %o3,%o5,%o3
 208                 !depth 3, accumulated bits 1
 209         bl      L.3.17
 210         srl     %o5,1,%o5
 211         ! remainder is nonnegative
 212         subcc   %o3,%o5,%o3
 213                 !depth 4, accumulated bits 3
 214         bl      L.4.19
 215         srl     %o5,1,%o5
 216         ! remainder is nonnegative
 217         subcc   %o3,%o5,%o3
 218                 b       9f
 219                 add     %o2, (3*2+1), %o2
 220 L.4.19:
 221         ! remainder is negative
 222         addcc   %o3,%o5,%o3
 223                 b       9f
 224                 add     %o2, (3*2-1), %o2
 225 L.3.17:
 226         ! remainder is negative
 227         addcc   %o3,%o5,%o3
 228                 !depth 4, accumulated bits 1
 229         bl      L.4.17
 230         srl     %o5,1,%o5
 231         ! remainder is nonnegative
 232         subcc   %o3,%o5,%o3
 233                 b       9f
 234                 add     %o2, (1*2+1), %o2
 235 L.4.17:
 236         ! remainder is negative
 237         addcc   %o3,%o5,%o3
 238                 b       9f
 239                 add     %o2, (1*2-1), %o2
 240 L.1.16:
 241         ! remainder is negative
 242         addcc   %o3,%o5,%o3
 243                 !depth 2, accumulated bits -1
 244         bl      L.2.15
 245         srl     %o5,1,%o5
 246         ! remainder is nonnegative
 247         subcc   %o3,%o5,%o3
 248                 !depth 3, accumulated bits -1
 249         bl      L.3.15
 250         srl     %o5,1,%o5
 251         ! remainder is nonnegative
 252         subcc   %o3,%o5,%o3
 253                 !depth 4, accumulated bits -1
 254         bl      L.4.15
 255         srl     %o5,1,%o5
 256         ! remainder is nonnegative
 257         subcc   %o3,%o5,%o3
 258                 b       9f
 259                 add     %o2, (-1*2+1), %o2
 260 L.4.15:
 261         ! remainder is negative
 262         addcc   %o3,%o5,%o3
 263                 b       9f
 264                 add     %o2, (-1*2-1), %o2
 265 L.3.15:
 266         ! remainder is negative
 267         addcc   %o3,%o5,%o3
 268                 !depth 4, accumulated bits -3
 269         bl      L.4.13
 270         srl     %o5,1,%o5
 271         ! remainder is nonnegative
 272         subcc   %o3,%o5,%o3
 273                 b       9f
 274                 add     %o2, (-3*2+1), %o2
 275 L.4.13:
 276         ! remainder is negative
 277         addcc   %o3,%o5,%o3
 278                 b       9f
 279                 add     %o2, (-3*2-1), %o2
 280 L.2.15:
 281         ! remainder is negative
 282         addcc   %o3,%o5,%o3
 283                 !depth 3, accumulated bits -3
 284         bl      L.3.13
 285         srl     %o5,1,%o5
 286         ! remainder is nonnegative
 287         subcc   %o3,%o5,%o3
 288                 !depth 4, accumulated bits -5
 289         bl      L.4.11
 290         srl     %o5,1,%o5
 291         ! remainder is nonnegative
 292         subcc   %o3,%o5,%o3
 293                 b       9f
 294                 add     %o2, (-5*2+1), %o2
 295 L.4.11:
 296         ! remainder is negative
 297         addcc   %o3,%o5,%o3
 298                 b       9f
 299                 add     %o2, (-5*2-1), %o2
 300 L.3.13:
 301         ! remainder is negative
 302         addcc   %o3,%o5,%o3
 303                 !depth 4, accumulated bits -7
 304         bl      L.4.9
 305         srl     %o5,1,%o5
 306         ! remainder is nonnegative
 307         subcc   %o3,%o5,%o3
 308                 b       9f
 309                 add     %o2, (-7*2+1), %o2
 310 L.4.9:
 311         ! remainder is negative
 312         addcc   %o3,%o5,%o3
 313                 b       9f
 314                 add     %o2, (-7*2-1), %o2
 315         9:
 316 end_regular_divide:
 317         deccc   %o4
 318         bge     divloop
 319         tst     %o3
 320         bl,a    got_result
 321         ! non-restoring fixup if remainder < 0, otherwise annulled
 322         add     %o3,%o1,%o3
 323 got_result:
 324         tst     %g3
 325         bl,a    1f
 326         ! negate for answer < 0, otherwise annulled
 327         neg     %o3,%o3                 ! %o3 <- -%o3
 328 1:
 329         retl                            ! leaf-routine return
 330         mov     %o3,%o0                 ! remainder <- %o3