contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is dual licensed under the MIT and the University of Illinois Open
   6 // Source Licenses. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 /* Double Precision Multiply */
  11
  12 #define A r1:0
  13 #define AH r1
  14 #define AL r0
  15 #define B r3:2
  16 #define BH r3
  17 #define BL r2
  18
  19 #define EXPA r4
  20 #define EXPB r5
  21 #define EXPB_A r5:4
  22
  23 #define ZTMP r7:6
  24 #define ZTMPH r7
  25 #define ZTMPL r6
  26
  27 #define ATMP r13:12
  28 #define ATMPH r13
  29 #define ATMPL r12
  30
  31 #define BTMP r9:8
  32 #define BTMPH r9
  33 #define BTMPL r8
  34
  35 #define ATMP2 r11:10
  36 #define ATMP2H r11
  37 #define ATMP2L r10
  38
  39 #define EXPDIFF r15
  40 #define EXTRACTOFF r14
  41 #define EXTRACTAMT r15:14
  42
  43 #define TMP r28
  44
  45 #define MANTBITS 52
  46 #define HI_MANTBITS 20
  47 #define EXPBITS 11
  48 #define BIAS 1024
  49 #define MANTISSA_TO_INT_BIAS 52
  50 #define SR_BIT_INEXACT 5
  51
  52 #ifndef SR_ROUND_OFF
  53 #define SR_ROUND_OFF 22
  54 #endif
  55
  56 #define NORMAL p3
  57 #define BIGB p2
  58
  59 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  60 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  61 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  62 #define END(TAG) .size TAG,.-TAG
  63
  64         .text
  65         .global __hexagon_adddf3
  66         .global __hexagon_subdf3
  67         .type __hexagon_adddf3, @function
  68         .type __hexagon_subdf3, @function
  69
  70 Q6_ALIAS(adddf3)
  71 FAST_ALIAS(adddf3)
  72 FAST2_ALIAS(adddf3)
  73 Q6_ALIAS(subdf3)
  74 FAST_ALIAS(subdf3)
  75 FAST2_ALIAS(subdf3)
  76
  77         .p2align 5
  78 __hexagon_adddf3:
  79         {
  80                 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
  81                 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
  82                 ATMP = combine(##0x20000000,#0)
  83         }
  84         {
  85                 NORMAL = dfclass(A,#2)
  86                 NORMAL = dfclass(B,#2)
  87                 BTMP = ATMP
  88                 BIGB = cmp.gtu(EXPB,EXPA)                       // Is B substantially greater than A?
  89         }
  90         {
  91                 if (!NORMAL) jump .Ladd_abnormal                // If abnormal, go to special code
  92                 if (BIGB) A = B                         // if B >> A, swap A and B
  93                 if (BIGB) B = A                         // If B >> A, swap A and B
  94                 if (BIGB) EXPB_A = combine(EXPA,EXPB)   // swap exponents
  95         }
  96         {
  97                 ATMP = insert(A,#MANTBITS,#EXPBITS-2)   // Q1.62
  98                 BTMP = insert(B,#MANTBITS,#EXPBITS-2)   // Q1.62
  99                 EXPDIFF = sub(EXPA,EXPB)
 100                 ZTMP = combine(#62,#1)
 101         }
 102 #undef BIGB
 103 #undef NORMAL
 104 #define B_POS p3
 105 #define A_POS p2
 106 #define NO_STICKIES p1
 107 .Ladd_continue:
 108         {
 109                 EXPDIFF = min(EXPDIFF,ZTMPH)            // If exponent difference >= ~60,
 110                                                         // will collapse to sticky bit
 111                 ATMP2 = neg(ATMP)
 112                 A_POS = cmp.gt(AH,#-1)
 113                 EXTRACTOFF = #0
 114         }
 115         {
 116                 if (!A_POS) ATMP = ATMP2
 117                 ATMP2 = extractu(BTMP,EXTRACTAMT)
 118                 BTMP = ASR(BTMP,EXPDIFF)
 119 #undef EXTRACTAMT
 120 #undef EXPDIFF
 121 #undef EXTRACTOFF
 122 #define ZERO r15:14
 123                 ZERO = #0
 124         }
 125         {
 126                 NO_STICKIES = cmp.eq(ATMP2,ZERO)
 127                 if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
 128                 EXPB = add(EXPA,#-BIAS-60)
 129                 B_POS = cmp.gt(BH,#-1)
 130         }
 131         {
 132                 ATMP = add(ATMP,BTMP)                   // ADD!!!
 133                 ATMP2 = sub(ATMP,BTMP)                  // Negate and ADD --> SUB!!!
 134                 ZTMP = combine(#54,##2045)
 135         }
 136         {
 137                 p0 = cmp.gtu(EXPA,ZTMPH)                // must be pretty high in case of large cancellation
 138                 p0 = !cmp.gtu(EXPA,ZTMPL)
 139                 if (!p0.new) jump:nt .Ladd_ovf_unf
 140                 if (!B_POS) ATMP = ATMP2                // if B neg, pick difference
 141         }
 142         {
 143                 A = convert_d2df(ATMP)                  // Convert to Double Precision, taking care of flags, etc.  So nice!
 144                 p0 = cmp.eq(ATMPH,#0)
 145                 p0 = cmp.eq(ATMPL,#0)
 146                 if (p0.new) jump:nt .Ladd_zero          // or maybe conversion handles zero case correctly?
 147         }
 148         {
 149                 AH += asl(EXPB,#HI_MANTBITS)
 150                 jumpr r31
 151         }
 152         .falign
 153 __hexagon_subdf3:
 154         {
 155                 BH = togglebit(BH,#31)
 156                 jump __qdsp_adddf3
 157         }
 158
 159
 160         .falign
 161 .Ladd_zero:
 162         // True zero, full cancellation
 163         // +0 unless round towards negative infinity
 164         {
 165                 TMP = USR
 166                 A = #0
 167                 BH = #1
 168         }
 169         {
 170                 TMP = extractu(TMP,#2,#22)
 171                 BH = asl(BH,#31)
 172         }
 173         {
 174                 p0 = cmp.eq(TMP,#2)
 175                 if (p0.new) AH = xor(AH,BH)
 176                 jumpr r31
 177         }
 178         .falign
 179 .Ladd_ovf_unf:
 180         // Overflow or Denormal is possible
 181         // Good news: Underflow flag is not possible!
 182         /*
 183          * ATMP has 2's complement value
 184          *
 185          * EXPA has A's exponent, EXPB has EXPA-BIAS-60
 186          *
 187          * Convert, extract exponent, add adjustment.
 188          * If > 2046, overflow
 189          * If <= 0, denormal
 190          *
 191          * Note that we've not done our zero check yet, so do that too
 192          *
 193          */
 194         {
 195                 A = convert_d2df(ATMP)
 196                 p0 = cmp.eq(ATMPH,#0)
 197                 p0 = cmp.eq(ATMPL,#0)
 198                 if (p0.new) jump:nt .Ladd_zero
 199         }
 200         {
 201                 TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
 202                 AH += asl(EXPB,#HI_MANTBITS)
 203         }
 204         {
 205                 EXPB = add(EXPB,TMP)
 206                 B = combine(##0x00100000,#0)
 207         }
 208         {
 209                 p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
 210                 if (p0.new) jump:nt .Ladd_ovf
 211         }
 212         {
 213                 p0 = cmp.gt(EXPB,#0)
 214                 if (p0.new) jumpr:t r31
 215                 TMP = sub(#1,EXPB)
 216         }
 217         {
 218                 B = insert(A,#MANTBITS,#0)
 219                 A = ATMP
 220         }
 221         {
 222                 B = lsr(B,TMP)
 223         }
 224         {
 225                 A = insert(B,#63,#0)
 226                 jumpr r31
 227         }
 228         .falign
 229 .Ladd_ovf:
 230         // We get either max finite value or infinity.  Either way, overflow+inexact
 231         {
 232                 A = ATMP                                // 2's complement value
 233                 TMP = USR
 234                 ATMP = combine(##0x7fefffff,#-1)        // positive max finite
 235         }
 236         {
 237                 EXPB = extractu(TMP,#2,#SR_ROUND_OFF)   // rounding bits
 238                 TMP = or(TMP,#0x28)                     // inexact + overflow
 239                 BTMP = combine(##0x7ff00000,#0)         // positive infinity
 240         }
 241         {
 242                 USR = TMP
 243                 EXPB ^= lsr(AH,#31)                     // Does sign match rounding?
 244                 TMP = EXPB                              // unmodified rounding mode
 245         }
 246         {
 247                 p0 = !cmp.eq(TMP,#1)                    // If not round-to-zero and
 248                 p0 = !cmp.eq(EXPB,#2)                   // Not rounding the other way,
 249                 if (p0.new) ATMP = BTMP                 // we should get infinity
 250         }
 251         {
 252                 A = insert(ATMP,#63,#0)                 // insert inf/maxfinite, leave sign
 253         }
 254         {
 255                 p0 = dfcmp.eq(A,A)
 256                 jumpr r31
 257         }
 258
 259 .Ladd_abnormal:
 260         {
 261                 ATMP = extractu(A,#63,#0)               // strip off sign
 262                 BTMP = extractu(B,#63,#0)               // strip off sign
 263         }
 264         {
 265                 p3 = cmp.gtu(ATMP,BTMP)
 266                 if (!p3.new) A = B                      // sort values
 267                 if (!p3.new) B = A                      // sort values
 268         }
 269         {
 270                 // Any NaN --> NaN, possibly raise invalid if sNaN
 271                 p0 = dfclass(A,#0x0f)           // A not NaN?
 272                 if (!p0.new) jump:nt .Linvalid_nan_add
 273                 if (!p3) ATMP = BTMP
 274                 if (!p3) BTMP = ATMP
 275         }
 276         {
 277                 // Infinity + non-infinity number is infinity
 278                 // Infinity + infinity --> inf or nan
 279                 p1 = dfclass(A,#0x08)           // A is infinity
 280                 if (p1.new) jump:nt .Linf_add
 281         }
 282         {
 283                 p2 = dfclass(B,#0x01)           // B is zero
 284                 if (p2.new) jump:nt .LB_zero    // so return A or special 0+0
 285                 ATMP = #0
 286         }
 287         // We are left with adding one or more subnormals
 288         {
 289                 p0 = dfclass(A,#4)
 290                 if (p0.new) jump:nt .Ladd_two_subnormal
 291                 ATMP = combine(##0x20000000,#0)
 292         }
 293         {
 294                 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
 295                 EXPB = #1
 296                 // BTMP already ABS(B)
 297                 BTMP = asl(BTMP,#EXPBITS-2)
 298         }
 299 #undef ZERO
 300 #define EXTRACTOFF r14
 301 #define EXPDIFF r15
 302         {
 303                 ATMP = insert(A,#MANTBITS,#EXPBITS-2)
 304                 EXPDIFF = sub(EXPA,EXPB)
 305                 ZTMP = combine(#62,#1)
 306                 jump .Ladd_continue
 307         }
 308
 309 .Ladd_two_subnormal:
 310         {
 311                 ATMP = extractu(A,#63,#0)
 312                 BTMP = extractu(B,#63,#0)
 313         }
 314         {
 315                 ATMP = neg(ATMP)
 316                 BTMP = neg(BTMP)
 317                 p0 = cmp.gt(AH,#-1)
 318                 p1 = cmp.gt(BH,#-1)
 319         }
 320         {
 321                 if (p0) ATMP = A
 322                 if (p1) BTMP = B
 323         }
 324         {
 325                 ATMP = add(ATMP,BTMP)
 326         }
 327         {
 328                 BTMP = neg(ATMP)
 329                 p0 = cmp.gt(ATMPH,#-1)
 330                 B = #0
 331         }
 332         {
 333                 if (!p0) A = BTMP
 334                 if (p0) A = ATMP
 335                 BH = ##0x80000000
 336         }
 337         {
 338                 if (!p0) AH = or(AH,BH)
 339                 p0 = dfcmp.eq(A,B)
 340                 if (p0.new) jump:nt .Lzero_plus_zero
 341         }
 342         {
 343                 jumpr r31
 344         }
 345
 346 .Linvalid_nan_add:
 347         {
 348                 TMP = convert_df2sf(A)                  // will generate invalid if sNaN
 349                 p0 = dfclass(B,#0x0f)                   // if B is not NaN
 350                 if (p0.new) B = A                       // make it whatever A is
 351         }
 352         {
 353                 BL = convert_df2sf(B)                   // will generate invalid if sNaN
 354                 A = #-1
 355                 jumpr r31
 356         }
 357         .falign
 358 .LB_zero:
 359         {
 360                 p0 = dfcmp.eq(ATMP,A)                   // is A also zero?
 361                 if (!p0.new) jumpr:t r31                // If not, just return A
 362         }
 363         // 0 + 0 is special
 364         // if equal integral values, they have the same sign, which is fine for all rounding
 365         // modes.
 366         // If unequal in sign, we get +0 for all rounding modes except round down
 367 .Lzero_plus_zero:
 368         {
 369                 p0 = cmp.eq(A,B)
 370                 if (p0.new) jumpr:t r31
 371         }
 372         {
 373                 TMP = USR
 374         }
 375         {
 376                 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
 377                 A = #0
 378         }
 379         {
 380                 p0 = cmp.eq(TMP,#2)
 381                 if (p0.new) AH = ##0x80000000
 382                 jumpr r31
 383         }
 384 .Linf_add:
 385         // adding infinities is only OK if they are equal
 386         {
 387                 p0 = !cmp.eq(AH,BH)                     // Do they have different signs
 388                 p0 = dfclass(B,#8)                      // And is B also infinite?
 389                 if (!p0.new) jumpr:t r31                // If not, just a normal inf
 390         }
 391         {
 392                 BL = ##0x7f800001                       // sNAN
 393         }
 394         {
 395                 A = convert_sf2df(BL)                   // trigger invalid, set NaN
 396                 jumpr r31
 397         }
 398 END(__hexagon_adddf3)