contrib/compiler-rt/lib/builtins/hexagon/dffma.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is dual licensed under the MIT and the University of Illinois Open
   6 // Source Licenses. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  11 #define END(TAG) .size TAG,.-TAG
  12
  13 /* Double Precision Multiply */
  14
  15
  16 #define A r1:0
  17 #define AH r1
  18 #define AL r0
  19 #define B r3:2
  20 #define BH r3
  21 #define BL r2
  22 #define C r5:4
  23 #define CH r5
  24 #define CL r4
  25
  26
  27
  28 #define BTMP r15:14
  29 #define BTMPH r15
  30 #define BTMPL r14
  31
  32 #define ATMP r13:12
  33 #define ATMPH r13
  34 #define ATMPL r12
  35
  36 #define CTMP r11:10
  37 #define CTMPH r11
  38 #define CTMPL r10
  39
  40 #define PP_LL r9:8
  41 #define PP_LL_H r9
  42 #define PP_LL_L r8
  43
  44 #define PP_ODD r7:6
  45 #define PP_ODD_H r7
  46 #define PP_ODD_L r6
  47
  48
  49 #define PP_HH r17:16
  50 #define PP_HH_H r17
  51 #define PP_HH_L r16
  52
  53 #define EXPA r18
  54 #define EXPB r19
  55 #define EXPBA r19:18
  56
  57 #define TMP r28
  58
  59 #define P_TMP p0
  60 #define PROD_NEG p3
  61 #define EXACT p2
  62 #define SWAP p1
  63
  64 #define MANTBITS 52
  65 #define HI_MANTBITS 20
  66 #define EXPBITS 11
  67 #define BIAS 1023
  68 #define STACKSPACE 32
  69
  70 #define ADJUST 4
  71
  72 #define FUDGE 7
  73 #define FUDGE2 3
  74
  75 #ifndef SR_ROUND_OFF
  76 #define SR_ROUND_OFF 22
  77 #endif
  78
  79         /*
  80          * First, classify for normal values, and abort if abnormal
  81          *
  82          * Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
  83          *
  84          * Since we know that the 2 MSBs of the H registers is zero, we should never carry
  85          * the partial products that involve the H registers
  86          *
  87          * Try to buy X slots, at the expense of latency if needed
  88          *
  89          * We will have PP_HH with the upper bits of the product, PP_LL with the lower
  90          * PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
  91          * PP_HH can have a minimum of 0x0100_0000_0000_0000
  92          *
  93          * 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
  94          *
  95          * We need to align CTMP.
  96          * If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
  97          * If CTMP << PP align CTMP and add 128 bits.  Then compute sticky
  98          * If CTMP ~= PP, align CTMP and add 128 bits.  May have massive cancellation.
  99          *
 100          * Convert partial product and CTMP to 2's complement prior to addition
 101          *
 102          * After we add, we need to normalize into upper 64 bits, then compute sticky.
 103          *
 104          *
 105          */
 106
 107         .text
 108         .global __hexagon_fmadf4
 109         .type __hexagon_fmadf4,@function
 110         .global __hexagon_fmadf5
 111         .type __hexagon_fmadf5,@function
 112         .global fma
 113         .type fma,@function
 114         Q6_ALIAS(fmadf5)
 115         .p2align 5
 116 __hexagon_fmadf4:
 117 __hexagon_fmadf5:
 118 fma:
 119         {
 120                 P_TMP = dfclass(A,#2)
 121                 P_TMP = dfclass(B,#2)
 122                 ATMP = #0
 123                 BTMP = #0
 124         }
 125         {
 126                 ATMP = insert(A,#MANTBITS,#EXPBITS-3)
 127                 BTMP = insert(B,#MANTBITS,#EXPBITS-3)
 128                 PP_ODD_H = ##0x10000000
 129                 allocframe(#STACKSPACE)
 130         }
 131         {
 132                 PP_LL = mpyu(ATMPL,BTMPL)
 133                 if (!P_TMP) jump .Lfma_abnormal_ab
 134                 ATMPH = or(ATMPH,PP_ODD_H)
 135                 BTMPH = or(BTMPH,PP_ODD_H)
 136         }
 137         {
 138                 P_TMP = dfclass(C,#2)
 139                 if (!P_TMP.new) jump:nt .Lfma_abnormal_c
 140                 CTMP = combine(PP_ODD_H,#0)
 141                 PP_ODD = combine(#0,PP_LL_H)
 142         }
 143 .Lfma_abnormal_c_restart:
 144         {
 145                 PP_ODD += mpyu(BTMPL,ATMPH)
 146                 CTMP = insert(C,#MANTBITS,#EXPBITS-3)
 147                 memd(r29+#0) = PP_HH
 148                 memd(r29+#8) = EXPBA
 149         }
 150         {
 151                 PP_ODD += mpyu(ATMPL,BTMPH)
 152                 EXPBA = neg(CTMP)
 153                 P_TMP = cmp.gt(CH,#-1)
 154                 TMP = xor(AH,BH)
 155         }
 156         {
 157                 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
 158                 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
 159                 PP_HH = combine(#0,PP_ODD_H)
 160                 if (!P_TMP) CTMP = EXPBA
 161         }
 162         {
 163                 PP_HH += mpyu(ATMPH,BTMPH)
 164                 PP_LL = combine(PP_ODD_L,PP_LL_L)
 165 #undef PP_ODD
 166 #undef PP_ODD_H
 167 #undef PP_ODD_L
 168 #undef ATMP
 169 #undef ATMPL
 170 #undef ATMPH
 171 #undef BTMP
 172 #undef BTMPL
 173 #undef BTMPH
 174 #define RIGHTLEFTSHIFT r13:12
 175 #define RIGHTSHIFT r13
 176 #define LEFTSHIFT r12
 177
 178                 EXPA = add(EXPA,EXPB)
 179 #undef EXPB
 180 #undef EXPBA
 181 #define EXPC r19
 182 #define EXPCA r19:18
 183                 EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
 184         }
 185         /* PP_HH:PP_LL now has product */
 186         /* CTMP is negated */
 187         /* EXPA,B,C are extracted */
 188         /*
 189          * We need to negate PP
 190          * Since we will be adding with carry later, if we need to negate,
 191          * just invert all bits now, which we can do conditionally and in parallel
 192          */
 193 #define PP_HH_TMP r15:14
 194 #define PP_LL_TMP r7:6
 195         {
 196                 EXPA = add(EXPA,#-BIAS+(ADJUST))
 197                 PROD_NEG = !cmp.gt(TMP,#-1)
 198                 PP_LL_TMP = #0
 199                 PP_HH_TMP = #0
 200         }
 201         {
 202                 PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
 203                 P_TMP = !cmp.gt(TMP,#-1)
 204                 SWAP = cmp.gt(EXPC,EXPA)        // If C >> PP
 205                 if (SWAP.new) EXPCA = combine(EXPA,EXPC)
 206         }
 207         {
 208                 PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
 209                 if (P_TMP) PP_LL = PP_LL_TMP
 210 #undef PP_LL_TMP
 211 #define CTMP2 r7:6
 212 #define CTMP2H r7
 213 #define CTMP2L r6
 214                 CTMP2 = #0
 215                 EXPC = sub(EXPA,EXPC)
 216         }
 217         {
 218                 if (P_TMP) PP_HH = PP_HH_TMP
 219                 P_TMP = cmp.gt(EXPC,#63)
 220                 if (SWAP) PP_LL = CTMP2
 221                 if (SWAP) CTMP2 = PP_LL
 222         }
 223 #undef PP_HH_TMP
 224 //#define ONE r15:14
 225 //#define S_ONE r14
 226 #define ZERO r15:14
 227 #define S_ZERO r15
 228 #undef PROD_NEG
 229 #define P_CARRY p3
 230         {
 231                 if (SWAP) PP_HH = CTMP  // Swap C and PP
 232                 if (SWAP) CTMP = PP_HH
 233                 if (P_TMP) EXPC = add(EXPC,#-64)
 234                 TMP = #63
 235         }
 236         {
 237                 // If diff > 63, pre-shift-right by 64...
 238                 if (P_TMP) CTMP2 = CTMP
 239                 TMP = asr(CTMPH,#31)
 240                 RIGHTSHIFT = min(EXPC,TMP)
 241                 LEFTSHIFT = #0
 242         }
 243 #undef C
 244 #undef CH
 245 #undef CL
 246 #define STICKIES r5:4
 247 #define STICKIESH r5
 248 #define STICKIESL r4
 249         {
 250                 if (P_TMP) CTMP = combine(TMP,TMP)      // sign extension of pre-shift-right-64
 251                 STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
 252                 CTMP2 = lsr(CTMP2,RIGHTSHIFT)
 253                 LEFTSHIFT = sub(#64,RIGHTSHIFT)
 254         }
 255         {
 256                 ZERO = #0
 257                 TMP = #-2
 258                 CTMP2 |= lsl(CTMP,LEFTSHIFT)
 259                 CTMP = asr(CTMP,RIGHTSHIFT)
 260         }
 261         {
 262                 P_CARRY = cmp.gtu(STICKIES,ZERO)        // If we have sticky bits from C shift
 263                 if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
 264 #undef ZERO
 265 #define ONE r15:14
 266 #define S_ONE r14
 267                 ONE = #1
 268                 STICKIES = #0
 269         }
 270         {
 271                 PP_LL = add(CTMP2,PP_LL,P_CARRY):carry  // use the carry to add the sticky
 272         }
 273         {
 274                 PP_HH = add(CTMP,PP_HH,P_CARRY):carry
 275                 TMP = #62
 276         }
 277         /*
 278          * PP_HH:PP_LL now holds the sum
 279          * We may need to normalize left, up to ??? bits.
 280          *
 281          * I think that if we have massive cancellation, the range we normalize by
 282          * is still limited
 283          */
 284         {
 285                 LEFTSHIFT = add(clb(PP_HH),#-2)
 286                 if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f       // all sign bits?
 287         }
 288         /* We had all sign bits, shift left by 62. */
 289         {
 290                 CTMP = extractu(PP_LL,#62,#2)
 291                 PP_LL = asl(PP_LL,#62)
 292                 EXPA = add(EXPA,#-62)                   // And adjust exponent of result
 293         }
 294         {
 295                 PP_HH = insert(CTMP,#62,#0)             // Then shift 63
 296         }
 297         {
 298                 LEFTSHIFT = add(clb(PP_HH),#-2)
 299         }
 300         .falign
 301 1:
 302         {
 303                 CTMP = asl(PP_HH,LEFTSHIFT)
 304                 STICKIES |= asl(PP_LL,LEFTSHIFT)
 305                 RIGHTSHIFT = sub(#64,LEFTSHIFT)
 306                 EXPA = sub(EXPA,LEFTSHIFT)
 307         }
 308         {
 309                 CTMP |= lsr(PP_LL,RIGHTSHIFT)
 310                 EXACT = cmp.gtu(ONE,STICKIES)
 311                 TMP = #BIAS+BIAS-2
 312         }
 313         {
 314                 if (!EXACT) CTMPL = or(CTMPL,S_ONE)
 315                 // If EXPA is overflow/underflow, jump to ovf_unf
 316                 P_TMP = !cmp.gt(EXPA,TMP)
 317                 P_TMP = cmp.gt(EXPA,#1)
 318                 if (!P_TMP.new) jump:nt .Lfma_ovf_unf
 319         }
 320         {
 321                 // XXX: FIXME: should PP_HH for check of zero be CTMP?
 322                 P_TMP = cmp.gtu(ONE,CTMP)               // is result true zero?
 323                 A = convert_d2df(CTMP)
 324                 EXPA = add(EXPA,#-BIAS-60)
 325                 PP_HH = memd(r29+#0)
 326         }
 327         {
 328                 AH += asl(EXPA,#HI_MANTBITS)
 329                 EXPCA = memd(r29+#8)
 330                 if (!P_TMP) dealloc_return              // not zero, return
 331         }
 332 .Ladd_yields_zero:
 333         /* We had full cancellation.  Return +/- zero (-0 when round-down) */
 334         {
 335                 TMP = USR
 336                 A = #0
 337         }
 338         {
 339                 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
 340                 PP_HH = memd(r29+#0)
 341                 EXPCA = memd(r29+#8)
 342         }
 343         {
 344                 p0 = cmp.eq(TMP,#2)
 345                 if (p0.new) AH = ##0x80000000
 346                 dealloc_return
 347         }
 348
 349 #undef RIGHTLEFTSHIFT
 350 #undef RIGHTSHIFT
 351 #undef LEFTSHIFT
 352 #undef CTMP2
 353 #undef CTMP2H
 354 #undef CTMP2L
 355
 356 .Lfma_ovf_unf:
 357         {
 358                 p0 = cmp.gtu(ONE,CTMP)
 359                 if (p0.new) jump:nt .Ladd_yields_zero
 360         }
 361         {
 362                 A = convert_d2df(CTMP)
 363                 EXPA = add(EXPA,#-BIAS-60)
 364                 TMP = EXPA
 365         }
 366 #define NEW_EXPB r7
 367 #define NEW_EXPA r6
 368         {
 369                 AH += asl(EXPA,#HI_MANTBITS)
 370                 NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
 371         }
 372         {
 373                 NEW_EXPA = add(EXPA,NEW_EXPB)
 374                 PP_HH = memd(r29+#0)
 375                 EXPCA = memd(r29+#8)
 376 #undef PP_HH
 377 #undef PP_HH_H
 378 #undef PP_HH_L
 379 #undef EXPCA
 380 #undef EXPC
 381 #undef EXPA
 382 #undef PP_LL
 383 #undef PP_LL_H
 384 #undef PP_LL_L
 385 #define EXPA r6
 386 #define EXPB r7
 387 #define EXPBA r7:6
 388 #define ATMP r9:8
 389 #define ATMPH r9
 390 #define ATMPL r8
 391 #undef NEW_EXPB
 392 #undef NEW_EXPA
 393                 ATMP = abs(CTMP)
 394         }
 395         {
 396                 p0 = cmp.gt(EXPA,##BIAS+BIAS)
 397                 if (p0.new) jump:nt .Lfma_ovf
 398         }
 399         {
 400                 p0 = cmp.gt(EXPA,#0)
 401                 if (p0.new) jump:nt .Lpossible_unf
 402         }
 403         {
 404                 // TMP has original EXPA.
 405                 // ATMP is corresponding value
 406                 // Normalize ATMP and shift right to correct location
 407                 EXPB = add(clb(ATMP),#-2)               // Amount to left shift to normalize
 408                 EXPA = sub(#1+5,TMP)                    // Amount to right shift to denormalize
 409                 p3 = cmp.gt(CTMPH,#-1)
 410         }
 411         /* Underflow */
 412         /* We know that the infinte range exponent should be EXPA */
 413         /* CTMP is 2's complement, ATMP is abs(CTMP) */
 414         {
 415                 EXPA = add(EXPA,EXPB)           // how much to shift back right
 416                 ATMP = asl(ATMP,EXPB)           // shift left
 417                 AH = USR
 418                 TMP = #63
 419         }
 420         {
 421                 EXPB = min(EXPA,TMP)
 422                 EXPA = #0
 423                 AL = #0x0030
 424         }
 425         {
 426                 B = extractu(ATMP,EXPBA)
 427                 ATMP = asr(ATMP,EXPB)
 428         }
 429         {
 430                 p0 = cmp.gtu(ONE,B)
 431                 if (!p0.new) ATMPL = or(ATMPL,S_ONE)
 432                 ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
 433         }
 434         {
 435                 CTMP = neg(ATMP)
 436                 p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
 437                 if (!p1.new) AH = or(AH,AL)
 438                 B = #0
 439         }
 440         {
 441                 if (p3) CTMP = ATMP
 442                 USR = AH
 443                 TMP = #-BIAS-(MANTBITS+FUDGE2)
 444         }
 445         {
 446                 A = convert_d2df(CTMP)
 447         }
 448         {
 449                 AH += asl(TMP,#HI_MANTBITS)
 450                 dealloc_return
 451         }
 452 .Lpossible_unf:
 453         {
 454                 TMP = ##0x7fefffff
 455                 ATMP = abs(CTMP)
 456         }
 457         {
 458                 p0 = cmp.eq(AL,#0)
 459                 p0 = bitsclr(AH,TMP)
 460                 if (!p0.new) dealloc_return:t
 461                 TMP = #0x7fff
 462         }
 463         {
 464                 p0 = bitsset(ATMPH,TMP)
 465                 BH = USR
 466                 BL = #0x0030
 467         }
 468         {
 469                 if (p0) BH = or(BH,BL)
 470         }
 471         {
 472                 USR = BH
 473         }
 474         {
 475                 p0 = dfcmp.eq(A,A)
 476                 dealloc_return
 477         }
 478 .Lfma_ovf:
 479         {
 480                 TMP = USR
 481                 CTMP = combine(##0x7fefffff,#-1)
 482                 A = CTMP
 483         }
 484         {
 485                 ATMP = combine(##0x7ff00000,#0)
 486                 BH = extractu(TMP,#2,#SR_ROUND_OFF)
 487                 TMP = or(TMP,#0x28)
 488         }
 489         {
 490                 USR = TMP
 491                 BH ^= lsr(AH,#31)
 492                 BL = BH
 493         }
 494         {
 495                 p0 = !cmp.eq(BL,#1)
 496                 p0 = !cmp.eq(BH,#2)
 497         }
 498         {
 499                 p0 = dfcmp.eq(ATMP,ATMP)
 500                 if (p0.new) CTMP = ATMP
 501         }
 502         {
 503                 A = insert(CTMP,#63,#0)
 504                 dealloc_return
 505         }
 506 #undef CTMP
 507 #undef CTMPH
 508 #undef CTMPL
 509 #define BTMP r11:10
 510 #define BTMPH r11
 511 #define BTMPL r10
 512
 513 #undef STICKIES
 514 #undef STICKIESH
 515 #undef STICKIESL
 516 #define C r5:4
 517 #define CH r5
 518 #define CL r4
 519
 520 .Lfma_abnormal_ab:
 521         {
 522                 ATMP = extractu(A,#63,#0)
 523                 BTMP = extractu(B,#63,#0)
 524                 deallocframe
 525         }
 526         {
 527                 p3 = cmp.gtu(ATMP,BTMP)
 528                 if (!p3.new) A = B              // sort values
 529                 if (!p3.new) B = A
 530         }
 531         {
 532                 p0 = dfclass(A,#0x0f)           // A NaN?
 533                 if (!p0.new) jump:nt .Lnan
 534                 if (!p3) ATMP = BTMP
 535                 if (!p3) BTMP = ATMP
 536         }
 537         {
 538                 p1 = dfclass(A,#0x08)           // A is infinity
 539                 p1 = dfclass(B,#0x0e)           // B is nonzero
 540         }
 541         {
 542                 p0 = dfclass(A,#0x08)           // a is inf
 543                 p0 = dfclass(B,#0x01)           // b is zero
 544         }
 545         {
 546                 if (p1) jump .Lab_inf
 547                 p2 = dfclass(B,#0x01)
 548         }
 549         {
 550                 if (p0) jump .Linvalid
 551                 if (p2) jump .Lab_true_zero
 552                 TMP = ##0x7c000000
 553         }
 554         // We are left with a normal or subnormal times a subnormal, A > B
 555         // If A and B are both very small, we will go to a single sticky bit; replace
 556         // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
 557         // if A and B might multiply to something bigger, decrease A exp and increase B exp
 558         // and start over
 559         {
 560                 p0 = bitsclr(AH,TMP)
 561                 if (p0.new) jump:nt .Lfma_ab_tiny
 562         }
 563         {
 564                 TMP = add(clb(BTMP),#-EXPBITS)
 565         }
 566         {
 567                 BTMP = asl(BTMP,TMP)
 568         }
 569         {
 570                 B = insert(BTMP,#63,#0)
 571                 AH -= asl(TMP,#HI_MANTBITS)
 572         }
 573         jump fma
 574
 575 .Lfma_ab_tiny:
 576         ATMP = combine(##0x00100000,#0)
 577         {
 578                 A = insert(ATMP,#63,#0)
 579                 B = insert(ATMP,#63,#0)
 580         }
 581         jump fma
 582
 583 .Lab_inf:
 584         {
 585                 B = lsr(B,#63)
 586                 p0 = dfclass(C,#0x10)
 587         }
 588         {
 589                 A ^= asl(B,#63)
 590                 if (p0) jump .Lnan
 591         }
 592         {
 593                 p1 = dfclass(C,#0x08)
 594                 if (p1.new) jump:nt .Lfma_inf_plus_inf
 595         }
 596         /* A*B is +/- inf, C is finite.  Return A */
 597         {
 598                 jumpr r31
 599         }
 600         .falign
 601 .Lfma_inf_plus_inf:
 602         {       // adding infinities of different signs is invalid
 603                 p0 = dfcmp.eq(A,C)
 604                 if (!p0.new) jump:nt .Linvalid
 605         }
 606         {
 607                 jumpr r31
 608         }
 609
 610 .Lnan:
 611         {
 612                 p0 = dfclass(B,#0x10)
 613                 p1 = dfclass(C,#0x10)
 614                 if (!p0.new) B = A
 615                 if (!p1.new) C = A
 616         }
 617         {       // find sNaNs
 618                 BH = convert_df2sf(B)
 619                 BL = convert_df2sf(C)
 620         }
 621         {
 622                 BH = convert_df2sf(A)
 623                 A = #-1
 624                 jumpr r31
 625         }
 626
 627 .Linvalid:
 628         {
 629                 TMP = ##0x7f800001              // sp snan
 630         }
 631         {
 632                 A = convert_sf2df(TMP)
 633                 jumpr r31
 634         }
 635
 636 .Lab_true_zero:
 637         // B is zero, A is finite number
 638         {
 639                 p0 = dfclass(C,#0x10)
 640                 if (p0.new) jump:nt .Lnan
 641                 if (p0.new) A = C
 642         }
 643         {
 644                 p0 = dfcmp.eq(B,C)              // is C also zero?
 645                 AH = lsr(AH,#31)                // get sign
 646         }
 647         {
 648                 BH ^= asl(AH,#31)               // form correctly signed zero in B
 649                 if (!p0) A = C                  // If C is not zero, return C
 650                 if (!p0) jumpr r31
 651         }
 652         /* B has correctly signed zero, C is also zero */
 653 .Lzero_plus_zero:
 654         {
 655                 p0 = cmp.eq(B,C)                // yes, scalar equals.  +0++0 or -0+-0
 656                 if (p0.new) jumpr:t r31
 657                 A = B
 658         }
 659         {
 660                 TMP = USR
 661         }
 662         {
 663                 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
 664                 A = #0
 665         }
 666         {
 667                 p0 = cmp.eq(TMP,#2)
 668                 if (p0.new) AH = ##0x80000000
 669                 jumpr r31
 670         }
 671 #undef BTMP
 672 #undef BTMPH
 673 #undef BTMPL
 674 #define CTMP r11:10
 675         .falign
 676 .Lfma_abnormal_c:
 677         /* We know that AB is normal * normal */
 678         /* C is not normal: zero, subnormal, inf, or NaN. */
 679         {
 680                 p0 = dfclass(C,#0x10)           // is C NaN?
 681                 if (p0.new) jump:nt .Lnan
 682                 if (p0.new) A = C               // move NaN to A
 683                 deallocframe
 684         }
 685         {
 686                 p0 = dfclass(C,#0x08)           // is C inf?
 687                 if (p0.new) A = C               // return C
 688                 if (p0.new) jumpr:nt r31
 689         }
 690         // zero or subnormal
 691         // If we have a zero, and we know AB is normal*normal, we can just call normal multiply
 692         {
 693                 p0 = dfclass(C,#0x01)           // is C zero?
 694                 if (p0.new) jump:nt __hexagon_muldf3
 695                 TMP = #1
 696         }
 697         // Left with: subnormal
 698         // Adjust C and jump back to restart
 699         {
 700                 allocframe(#STACKSPACE)         // oops, deallocated above, re-allocate frame
 701                 CTMP = #0
 702                 CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
 703                 jump .Lfma_abnormal_c_restart
 704         }
 705 END(fma)