contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is dual licensed under the MIT and the University of Illinois Open
   6 // Source Licenses. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 /* Double Precision Divide */
  11
  12 #define A r1:0
  13 #define AH r1
  14 #define AL r0
  15
  16 #define B r3:2
  17 #define BH r3
  18 #define BL r2
  19
  20 #define Q r5:4
  21 #define QH r5
  22 #define QL r4
  23
  24 #define PROD r7:6
  25 #define PRODHI r7
  26 #define PRODLO r6
  27
  28 #define SFONE r8
  29 #define SFDEN r9
  30 #define SFERROR r10
  31 #define SFRECIP r11
  32
  33 #define EXPBA r13:12
  34 #define EXPB r13
  35 #define EXPA r12
  36
  37 #define REMSUB2 r15:14
  38
  39
  40
  41 #define SIGN r28
  42
  43 #define Q_POSITIVE p3
  44 #define NORMAL p2
  45 #define NO_OVF_UNF p1
  46 #define P_TMP p0
  47
  48 #define RECIPEST_SHIFT 3
  49 #define QADJ 61
  50
  51 #define DFCLASS_NORMAL 0x02
  52 #define DFCLASS_NUMBER 0x0F
  53 #define DFCLASS_INFINITE 0x08
  54 #define DFCLASS_ZERO 0x01
  55 #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
  56 #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
  57
  58 #define DF_MANTBITS 52
  59 #define DF_EXPBITS 11
  60 #define SF_MANTBITS 23
  61 #define SF_EXPBITS 8
  62 #define DF_BIAS 0x3ff
  63
  64 #define SR_ROUND_OFF 22
  65
  66 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  67 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  68 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  69 #define END(TAG) .size TAG,.-TAG
  70
  71         .text
  72         .global __hexagon_divdf3
  73         .type __hexagon_divdf3,@function
  74         Q6_ALIAS(divdf3)
  75         FAST_ALIAS(divdf3)
  76         FAST2_ALIAS(divdf3)
  77         .p2align 5
  78 __hexagon_divdf3:
  79         {
  80                 NORMAL = dfclass(A,#DFCLASS_NORMAL)
  81                 NORMAL = dfclass(B,#DFCLASS_NORMAL)
  82                 EXPBA = combine(BH,AH)
  83                 SIGN = xor(AH,BH)
  84         }
  85 #undef A
  86 #undef AH
  87 #undef AL
  88 #undef B
  89 #undef BH
  90 #undef BL
  91 #define REM r1:0
  92 #define REMHI r1
  93 #define REMLO r0
  94 #define DENOM r3:2
  95 #define DENOMHI r3
  96 #define DENOMLO r2
  97         {
  98                 if (!NORMAL) jump .Ldiv_abnormal
  99                 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
 100                 SFONE = ##0x3f800001
 101         }
 102         {
 103                 SFDEN = or(SFONE,PRODLO)
 104                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
 105                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
 106                 Q_POSITIVE = cmp.gt(SIGN,#-1)
 107         }
 108 #undef SIGN
 109 #define ONE r28
 110 .Ldenorm_continue:
 111         {
 112                 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
 113                 SFERROR = and(SFONE,#-2)
 114                 ONE = #1
 115                 EXPA = sub(EXPA,EXPB)
 116         }
 117 #undef EXPB
 118 #define RECIPEST r13
 119         {
 120                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
 121                 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
 122                 RECIPEST = ##0x00800000 << RECIPEST_SHIFT
 123         }
 124         {
 125                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
 126                 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
 127                 SFERROR = and(SFONE,#-2)
 128         }
 129         {
 130                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
 131                 QH = #-DF_BIAS+1
 132                 QL = #DF_BIAS-1
 133         }
 134         {
 135                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
 136                 NO_OVF_UNF = cmp.gt(EXPA,QH)
 137                 NO_OVF_UNF = !cmp.gt(EXPA,QL)
 138         }
 139         {
 140                 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
 141                 Q = #0
 142                 EXPA = add(EXPA,#-QADJ)
 143         }
 144 #undef SFERROR
 145 #undef SFRECIP
 146 #define TMP r10
 147 #define TMP1 r11
 148         {
 149                 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
 150         }
 151
 152 #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
 153         { \
 154                 PROD = mpyu(RECIPEST,REMHI); \
 155                 REM = asl(REM,# ## ( REMSHIFT )); \
 156         }; \
 157         { \
 158                 PRODLO = # ## 0; \
 159                 REM -= mpyu(PRODHI,DENOMLO); \
 160                 REMSUB2 = mpyu(PRODHI,DENOMHI); \
 161         }; \
 162         { \
 163                 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
 164                 REM -= asl(REMSUB2, # ## 32); \
 165                 EXTRA \
 166         }
 167
 168
 169         DIV_ITER1B(ASL,14,15,)
 170         DIV_ITER1B(ASR,1,15,)
 171         DIV_ITER1B(ASR,16,15,)
 172         DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
 173
 174 #undef REMSUB2
 175 #define TMPPAIR r15:14
 176 #define TMPPAIRHI r15
 177 #define TMPPAIRLO r14
 178 #undef RECIPEST
 179 #define EXPB r13
 180         {
 181                 // compare or sub with carry
 182                 TMPPAIR = sub(REM,DENOM)
 183                 P_TMP = cmp.gtu(DENOM,REM)
 184                 // set up amt to add to q
 185                 if (!P_TMP.new) PRODLO  = #2
 186         }
 187         {
 188                 Q = add(Q,PROD)
 189                 if (!P_TMP) REM = TMPPAIR
 190                 TMPPAIR = #0
 191         }
 192         {
 193                 P_TMP = cmp.eq(REM,TMPPAIR)
 194                 if (!P_TMP.new) QL = or(QL,ONE)
 195         }
 196         {
 197                 PROD = neg(Q)
 198         }
 199         {
 200                 if (!Q_POSITIVE) Q = PROD
 201         }
 202 #undef REM
 203 #undef REMHI
 204 #undef REMLO
 205 #undef DENOM
 206 #undef DENOMLO
 207 #undef DENOMHI
 208 #define A r1:0
 209 #define AH r1
 210 #define AL r0
 211 #define B r3:2
 212 #define BH r3
 213 #define BL r2
 214         {
 215                 A = convert_d2df(Q)
 216                 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
 217         }
 218         {
 219                 AH += asl(EXPA,#DF_MANTBITS-32)
 220                 jumpr r31
 221         }
 222
 223 .Ldiv_ovf_unf:
 224         {
 225                 AH += asl(EXPA,#DF_MANTBITS-32)
 226                 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
 227         }
 228         {
 229                 PROD = abs(Q)
 230                 EXPA = add(EXPA,EXPB)
 231         }
 232         {
 233                 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)          // overflow
 234                 if (P_TMP.new) jump:nt .Ldiv_ovf
 235         }
 236         {
 237                 P_TMP = cmp.gt(EXPA,#0)
 238                 if (P_TMP.new) jump:nt .Lpossible_unf           // round up to normal possible...
 239         }
 240         /* Underflow */
 241         /* We know what the infinite range exponent should be (EXPA) */
 242         /* Q is 2's complement, PROD is abs(Q) */
 243         /* Normalize Q, shift right, add a high bit, convert, change exponent */
 244
 245 #define FUDGE1 7        // how much to shift right
 246 #define FUDGE2 4        // how many guard/round to keep at lsbs
 247
 248         {
 249                 EXPB = add(clb(PROD),#-1)                       // doesn't need to be added in since
 250                 EXPA = sub(#FUDGE1,EXPA)                        // we extract post-converted exponent
 251                 TMP = USR
 252                 TMP1 = #63
 253         }
 254         {
 255                 EXPB = min(EXPA,TMP1)
 256                 TMP1 = or(TMP,#0x030)
 257                 PROD = asl(PROD,EXPB)
 258                 EXPA = #0
 259         }
 260         {
 261                 TMPPAIR = extractu(PROD,EXPBA)                          // bits that will get shifted out
 262                 PROD = lsr(PROD,EXPB)                                   // shift out bits
 263                 B = #1
 264         }
 265         {
 266                 P_TMP = cmp.gtu(B,TMPPAIR)
 267                 if (!P_TMP.new) PRODLO = or(BL,PRODLO)
 268                 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
 269         }
 270         {
 271                 Q = neg(PROD)
 272                 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
 273                 if (!P_TMP.new) TMP = TMP1
 274         }
 275         {
 276                 USR = TMP
 277                 if (Q_POSITIVE) Q = PROD
 278                 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
 279         }
 280         {
 281                 A = convert_d2df(Q)
 282         }
 283         {
 284                 AH += asl(TMP,#DF_MANTBITS-32)
 285                 jumpr r31
 286         }
 287
 288
 289 .Lpossible_unf:
 290         /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
 291         /* The answer is correct, but we need to raise Underflow */
 292         {
 293                 B = extractu(A,#63,#0)
 294                 TMPPAIR = combine(##0x00100000,#0)              // min normal
 295                 TMP = #0x7FFF
 296         }
 297         {
 298                 P_TMP = dfcmp.eq(TMPPAIR,B)             // Is everything zero in the rounded value...
 299                 P_TMP = bitsset(PRODHI,TMP)             // but a bunch of bits set in the unrounded abs(quotient)?
 300         }
 301
 302 #if (__HEXAGON_ARCH__ == 60)
 303                 TMP = USR               // If not, just return
 304                 if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
 305                                         // Note that inexact is already set...
 306 #else
 307         {
 308                 if (!P_TMP) jumpr r31                   // If not, just return
 309                 TMP = USR                               // Else, we want to set Unf+Inexact
 310         }                                               // Note that inexact is already set...
 311 #endif
 312         {
 313                 TMP = or(TMP,#0x30)
 314         }
 315         {
 316                 USR = TMP
 317         }
 318         {
 319                 p0 = dfcmp.eq(A,A)
 320                 jumpr r31
 321         }
 322
 323 .Ldiv_ovf:
 324         /*
 325          * Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
 326          */
 327         {
 328                 TMP = USR
 329                 B = combine(##0x7fefffff,#-1)
 330                 AH = mux(Q_POSITIVE,#0,#-1)
 331         }
 332         {
 333                 PROD = combine(##0x7ff00000,#0)
 334                 QH = extractu(TMP,#2,#SR_ROUND_OFF)
 335                 TMP = or(TMP,#0x28)
 336         }
 337         {
 338                 USR = TMP
 339                 QH ^= lsr(AH,#31)
 340                 QL = QH
 341         }
 342         {
 343                 p0 = !cmp.eq(QL,#1)             // if not round-to-zero
 344                 p0 = !cmp.eq(QH,#2)             // and not rounding the other way
 345                 if (p0.new) B = PROD            // go to inf
 346                 p0 = dfcmp.eq(B,B)              // get exceptions
 347         }
 348         {
 349                 A = insert(B,#63,#0)
 350                 jumpr r31
 351         }
 352
 353 #undef ONE
 354 #define SIGN r28
 355 #undef NORMAL
 356 #undef NO_OVF_UNF
 357 #define P_INF p1
 358 #define P_ZERO p2
 359 .Ldiv_abnormal:
 360         {
 361                 P_TMP = dfclass(A,#DFCLASS_NUMBER)
 362                 P_TMP = dfclass(B,#DFCLASS_NUMBER)
 363                 Q_POSITIVE = cmp.gt(SIGN,#-1)
 364         }
 365         {
 366                 P_INF = dfclass(A,#DFCLASS_INFINITE)
 367                 P_INF = dfclass(B,#DFCLASS_INFINITE)
 368         }
 369         {
 370                 P_ZERO = dfclass(A,#DFCLASS_ZERO)
 371                 P_ZERO = dfclass(B,#DFCLASS_ZERO)
 372         }
 373         {
 374                 if (!P_TMP) jump .Ldiv_nan
 375                 if (P_INF) jump .Ldiv_invalid
 376         }
 377         {
 378                 if (P_ZERO) jump .Ldiv_invalid
 379         }
 380         {
 381                 P_ZERO = dfclass(A,#DFCLASS_NONZERO)            // nonzero
 382                 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)        // non-infinite
 383         }
 384         {
 385                 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
 386                 P_INF = dfclass(B,#DFCLASS_NONZERO)     // nonzero
 387         }
 388         {
 389                 if (!P_ZERO) jump .Ldiv_zero_result
 390                 if (!P_INF) jump .Ldiv_inf_result
 391         }
 392         /* Now we've narrowed it down to (de)normal / (de)normal */
 393         /* Set up A/EXPA B/EXPB and go back */
 394 #undef P_ZERO
 395 #undef P_INF
 396 #define P_TMP2 p1
 397         {
 398                 P_TMP = dfclass(A,#DFCLASS_NORMAL)
 399                 P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
 400                 TMP = ##0x00100000
 401         }
 402         {
 403                 EXPBA = combine(BH,AH)
 404                 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
 405                 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
 406         }
 407         {
 408                 if (P_TMP) AH = or(AH,TMP)                              // if normal, add back in hidden bit
 409                 if (P_TMP2) BH = or(BH,TMP)                             // if normal, add back in hidden bit
 410         }
 411         {
 412                 QH = add(clb(A),#-DF_EXPBITS)
 413                 QL = add(clb(B),#-DF_EXPBITS)
 414                 TMP = #1
 415         }
 416         {
 417                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
 418                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
 419         }
 420         {
 421                 A = asl(A,QH)
 422                 B = asl(B,QL)
 423                 if (!P_TMP) EXPA = sub(TMP,QH)
 424                 if (!P_TMP2) EXPB = sub(TMP,QL)
 425         }       // recreate values needed by resume coke
 426         {
 427                 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
 428         }
 429         {
 430                 SFDEN = or(SFONE,PRODLO)
 431                 jump .Ldenorm_continue
 432         }
 433
 434 .Ldiv_zero_result:
 435         {
 436                 AH = xor(AH,BH)
 437                 B = #0
 438         }
 439         {
 440                 A = insert(B,#63,#0)
 441                 jumpr r31
 442         }
 443 .Ldiv_inf_result:
 444         {
 445                 p2 = dfclass(B,#DFCLASS_ZERO)
 446                 p2 = dfclass(A,#DFCLASS_NONINFINITE)
 447         }
 448         {
 449                 TMP = USR
 450                 if (!p2) jump 1f
 451                 AH = xor(AH,BH)
 452         }
 453         {
 454                 TMP = or(TMP,#0x04)             // DBZ
 455         }
 456         {
 457                 USR = TMP
 458         }
 459 1:
 460         {
 461                 B = combine(##0x7ff00000,#0)
 462                 p0 = dfcmp.uo(B,B)              // take possible exception
 463         }
 464         {
 465                 A = insert(B,#63,#0)
 466                 jumpr r31
 467         }
 468 .Ldiv_nan:
 469         {
 470                 p0 = dfclass(A,#0x10)
 471                 p1 = dfclass(B,#0x10)
 472                 if (!p0.new) A = B
 473                 if (!p1.new) B = A
 474         }
 475         {
 476                 QH = convert_df2sf(A)   // get possible invalid exceptions
 477                 QL = convert_df2sf(B)
 478         }
 479         {
 480                 A = #-1
 481                 jumpr r31
 482         }
 483
 484 .Ldiv_invalid:
 485         {
 486                 TMP = ##0x7f800001
 487         }
 488         {
 489                 A = convert_sf2df(TMP)          // get invalid, get DF qNaN
 490                 jumpr r31
 491         }
 492 END(__hexagon_divdf3)