contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dffma.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  10 #define END(TAG) .size TAG,.-TAG
  11
  12 // Double Precision Multiply
  13
  14
  15 #define A r1:0
  16 #define AH r1
  17 #define AL r0
  18 #define B r3:2
  19 #define BH r3
  20 #define BL r2
  21 #define C r5:4
  22 #define CH r5
  23 #define CL r4
  24
  25
  26
  27 #define BTMP r15:14
  28 #define BTMPH r15
  29 #define BTMPL r14
  30
  31 #define ATMP r13:12
  32 #define ATMPH r13
  33 #define ATMPL r12
  34
  35 #define CTMP r11:10
  36 #define CTMPH r11
  37 #define CTMPL r10
  38
  39 #define PP_LL r9:8
  40 #define PP_LL_H r9
  41 #define PP_LL_L r8
  42
  43 #define PP_ODD r7:6
  44 #define PP_ODD_H r7
  45 #define PP_ODD_L r6
  46
  47
  48 #define PP_HH r17:16
  49 #define PP_HH_H r17
  50 #define PP_HH_L r16
  51
  52 #define EXPA r18
  53 #define EXPB r19
  54 #define EXPBA r19:18
  55
  56 #define TMP r28
  57
  58 #define P_TMP p0
  59 #define PROD_NEG p3
  60 #define EXACT p2
  61 #define SWAP p1
  62
  63 #define MANTBITS 52
  64 #define HI_MANTBITS 20
  65 #define EXPBITS 11
  66 #define BIAS 1023
  67 #define STACKSPACE 32
  68
  69 #define ADJUST 4
  70
  71 #define FUDGE 7
  72 #define FUDGE2 3
  73
  74 #ifndef SR_ROUND_OFF
  75 #define SR_ROUND_OFF 22
  76 #endif
  77
  78         // First, classify for normal values, and abort if abnormal
  79         //
  80         // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
  81         //
  82         // Since we know that the 2 MSBs of the H registers is zero, we should never carry
  83         // the partial products that involve the H registers
  84         //
  85         // Try to buy X slots, at the expense of latency if needed
  86         //
  87         // We will have PP_HH with the upper bits of the product, PP_LL with the lower
  88         // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
  89         // PP_HH can have a minimum of 0x0100_0000_0000_0000
  90         //
  91         // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
  92         //
  93         // We need to align CTMP.
  94         // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
  95         // If CTMP << PP align CTMP and add 128 bits.  Then compute sticky
  96         // If CTMP ~= PP, align CTMP and add 128 bits.  May have massive cancellation.
  97         //
  98         // Convert partial product and CTMP to 2's complement prior to addition
  99         //
 100         // After we add, we need to normalize into upper 64 bits, then compute sticky.
 101
 102         .text
 103         .global __hexagon_fmadf4
 104         .type __hexagon_fmadf4,@function
 105         .global __hexagon_fmadf5
 106         .type __hexagon_fmadf5,@function
 107         .global fma
 108         .type fma,@function
 109         Q6_ALIAS(fmadf5)
 110         .p2align 5
 111 __hexagon_fmadf4:
 112 __hexagon_fmadf5:
 113 fma:
 114         {
 115                 P_TMP = dfclass(A,#2)
 116                 P_TMP = dfclass(B,#2)
 117                 ATMP = #0
 118                 BTMP = #0
 119         }
 120         {
 121                 ATMP = insert(A,#MANTBITS,#EXPBITS-3)
 122                 BTMP = insert(B,#MANTBITS,#EXPBITS-3)
 123                 PP_ODD_H = ##0x10000000
 124                 allocframe(#STACKSPACE)
 125         }
 126         {
 127                 PP_LL = mpyu(ATMPL,BTMPL)
 128                 if (!P_TMP) jump .Lfma_abnormal_ab
 129                 ATMPH = or(ATMPH,PP_ODD_H)
 130                 BTMPH = or(BTMPH,PP_ODD_H)
 131         }
 132         {
 133                 P_TMP = dfclass(C,#2)
 134                 if (!P_TMP.new) jump:nt .Lfma_abnormal_c
 135                 CTMP = combine(PP_ODD_H,#0)
 136                 PP_ODD = combine(#0,PP_LL_H)
 137         }
 138 .Lfma_abnormal_c_restart:
 139         {
 140                 PP_ODD += mpyu(BTMPL,ATMPH)
 141                 CTMP = insert(C,#MANTBITS,#EXPBITS-3)
 142                 memd(r29+#0) = PP_HH
 143                 memd(r29+#8) = EXPBA
 144         }
 145         {
 146                 PP_ODD += mpyu(ATMPL,BTMPH)
 147                 EXPBA = neg(CTMP)
 148                 P_TMP = cmp.gt(CH,#-1)
 149                 TMP = xor(AH,BH)
 150         }
 151         {
 152                 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
 153                 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
 154                 PP_HH = combine(#0,PP_ODD_H)
 155                 if (!P_TMP) CTMP = EXPBA
 156         }
 157         {
 158                 PP_HH += mpyu(ATMPH,BTMPH)
 159                 PP_LL = combine(PP_ODD_L,PP_LL_L)
 160 #undef PP_ODD
 161 #undef PP_ODD_H
 162 #undef PP_ODD_L
 163 #undef ATMP
 164 #undef ATMPL
 165 #undef ATMPH
 166 #undef BTMP
 167 #undef BTMPL
 168 #undef BTMPH
 169 #define RIGHTLEFTSHIFT r13:12
 170 #define RIGHTSHIFT r13
 171 #define LEFTSHIFT r12
 172
 173                 EXPA = add(EXPA,EXPB)
 174 #undef EXPB
 175 #undef EXPBA
 176 #define EXPC r19
 177 #define EXPCA r19:18
 178                 EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
 179         }
 180         // PP_HH:PP_LL now has product
 181         // CTMP is negated
 182         // EXPA,B,C are extracted
 183         // We need to negate PP
 184         // Since we will be adding with carry later, if we need to negate,
 185         // just invert all bits now, which we can do conditionally and in parallel
 186 #define PP_HH_TMP r15:14
 187 #define PP_LL_TMP r7:6
 188         {
 189                 EXPA = add(EXPA,#-BIAS+(ADJUST))
 190                 PROD_NEG = !cmp.gt(TMP,#-1)
 191                 PP_LL_TMP = #0
 192                 PP_HH_TMP = #0
 193         }
 194         {
 195                 PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
 196                 P_TMP = !cmp.gt(TMP,#-1)
 197                 SWAP = cmp.gt(EXPC,EXPA)        // If C >> PP
 198                 if (SWAP.new) EXPCA = combine(EXPA,EXPC)
 199         }
 200         {
 201                 PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
 202                 if (P_TMP) PP_LL = PP_LL_TMP
 203 #undef PP_LL_TMP
 204 #define CTMP2 r7:6
 205 #define CTMP2H r7
 206 #define CTMP2L r6
 207                 CTMP2 = #0
 208                 EXPC = sub(EXPA,EXPC)
 209         }
 210         {
 211                 if (P_TMP) PP_HH = PP_HH_TMP
 212                 P_TMP = cmp.gt(EXPC,#63)
 213                 if (SWAP) PP_LL = CTMP2
 214                 if (SWAP) CTMP2 = PP_LL
 215         }
 216 #undef PP_HH_TMP
 217 //#define ONE r15:14
 218 //#define S_ONE r14
 219 #define ZERO r15:14
 220 #define S_ZERO r15
 221 #undef PROD_NEG
 222 #define P_CARRY p3
 223         {
 224                 if (SWAP) PP_HH = CTMP  // Swap C and PP
 225                 if (SWAP) CTMP = PP_HH
 226                 if (P_TMP) EXPC = add(EXPC,#-64)
 227                 TMP = #63
 228         }
 229         {
 230                 // If diff > 63, pre-shift-right by 64...
 231                 if (P_TMP) CTMP2 = CTMP
 232                 TMP = asr(CTMPH,#31)
 233                 RIGHTSHIFT = min(EXPC,TMP)
 234                 LEFTSHIFT = #0
 235         }
 236 #undef C
 237 #undef CH
 238 #undef CL
 239 #define STICKIES r5:4
 240 #define STICKIESH r5
 241 #define STICKIESL r4
 242         {
 243                 if (P_TMP) CTMP = combine(TMP,TMP)      // sign extension of pre-shift-right-64
 244                 STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
 245                 CTMP2 = lsr(CTMP2,RIGHTSHIFT)
 246                 LEFTSHIFT = sub(#64,RIGHTSHIFT)
 247         }
 248         {
 249                 ZERO = #0
 250                 TMP = #-2
 251                 CTMP2 |= lsl(CTMP,LEFTSHIFT)
 252                 CTMP = asr(CTMP,RIGHTSHIFT)
 253         }
 254         {
 255                 P_CARRY = cmp.gtu(STICKIES,ZERO)        // If we have sticky bits from C shift
 256                 if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
 257 #undef ZERO
 258 #define ONE r15:14
 259 #define S_ONE r14
 260                 ONE = #1
 261                 STICKIES = #0
 262         }
 263         {
 264                 PP_LL = add(CTMP2,PP_LL,P_CARRY):carry  // use the carry to add the sticky
 265         }
 266         {
 267                 PP_HH = add(CTMP,PP_HH,P_CARRY):carry
 268                 TMP = #62
 269         }
 270         // PP_HH:PP_LL now holds the sum
 271         // We may need to normalize left, up to ??? bits.
 272         //
 273         // I think that if we have massive cancellation, the range we normalize by
 274         // is still limited
 275         {
 276                 LEFTSHIFT = add(clb(PP_HH),#-2)
 277                 if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f       // all sign bits?
 278         }
 279         // We had all sign bits, shift left by 62.
 280         {
 281                 CTMP = extractu(PP_LL,#62,#2)
 282                 PP_LL = asl(PP_LL,#62)
 283                 EXPA = add(EXPA,#-62)                   // And adjust exponent of result
 284         }
 285         {
 286                 PP_HH = insert(CTMP,#62,#0)             // Then shift 63
 287         }
 288         {
 289                 LEFTSHIFT = add(clb(PP_HH),#-2)
 290         }
 291         .falign
 292 1:
 293         {
 294                 CTMP = asl(PP_HH,LEFTSHIFT)
 295                 STICKIES |= asl(PP_LL,LEFTSHIFT)
 296                 RIGHTSHIFT = sub(#64,LEFTSHIFT)
 297                 EXPA = sub(EXPA,LEFTSHIFT)
 298         }
 299         {
 300                 CTMP |= lsr(PP_LL,RIGHTSHIFT)
 301                 EXACT = cmp.gtu(ONE,STICKIES)
 302                 TMP = #BIAS+BIAS-2
 303         }
 304         {
 305                 if (!EXACT) CTMPL = or(CTMPL,S_ONE)
 306                 // If EXPA is overflow/underflow, jump to ovf_unf
 307                 P_TMP = !cmp.gt(EXPA,TMP)
 308                 P_TMP = cmp.gt(EXPA,#1)
 309                 if (!P_TMP.new) jump:nt .Lfma_ovf_unf
 310         }
 311         {
 312                 // XXX: FIXME: should PP_HH for check of zero be CTMP?
 313                 P_TMP = cmp.gtu(ONE,CTMP)               // is result true zero?
 314                 A = convert_d2df(CTMP)
 315                 EXPA = add(EXPA,#-BIAS-60)
 316                 PP_HH = memd(r29+#0)
 317         }
 318         {
 319                 AH += asl(EXPA,#HI_MANTBITS)
 320                 EXPCA = memd(r29+#8)
 321                 if (!P_TMP) dealloc_return              // not zero, return
 322         }
 323 .Ladd_yields_zero:
 324         // We had full cancellation.  Return +/- zero (-0 when round-down)
 325         {
 326                 TMP = USR
 327                 A = #0
 328         }
 329         {
 330                 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
 331                 PP_HH = memd(r29+#0)
 332                 EXPCA = memd(r29+#8)
 333         }
 334         {
 335                 p0 = cmp.eq(TMP,#2)
 336                 if (p0.new) AH = ##0x80000000
 337                 dealloc_return
 338         }
 339
 340 #undef RIGHTLEFTSHIFT
 341 #undef RIGHTSHIFT
 342 #undef LEFTSHIFT
 343 #undef CTMP2
 344 #undef CTMP2H
 345 #undef CTMP2L
 346
 347 .Lfma_ovf_unf:
 348         {
 349                 p0 = cmp.gtu(ONE,CTMP)
 350                 if (p0.new) jump:nt .Ladd_yields_zero
 351         }
 352         {
 353                 A = convert_d2df(CTMP)
 354                 EXPA = add(EXPA,#-BIAS-60)
 355                 TMP = EXPA
 356         }
 357 #define NEW_EXPB r7
 358 #define NEW_EXPA r6
 359         {
 360                 AH += asl(EXPA,#HI_MANTBITS)
 361                 NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
 362         }
 363         {
 364                 NEW_EXPA = add(EXPA,NEW_EXPB)
 365                 PP_HH = memd(r29+#0)
 366                 EXPCA = memd(r29+#8)
 367 #undef PP_HH
 368 #undef PP_HH_H
 369 #undef PP_HH_L
 370 #undef EXPCA
 371 #undef EXPC
 372 #undef EXPA
 373 #undef PP_LL
 374 #undef PP_LL_H
 375 #undef PP_LL_L
 376 #define EXPA r6
 377 #define EXPB r7
 378 #define EXPBA r7:6
 379 #define ATMP r9:8
 380 #define ATMPH r9
 381 #define ATMPL r8
 382 #undef NEW_EXPB
 383 #undef NEW_EXPA
 384                 ATMP = abs(CTMP)
 385         }
 386         {
 387                 p0 = cmp.gt(EXPA,##BIAS+BIAS)
 388                 if (p0.new) jump:nt .Lfma_ovf
 389         }
 390         {
 391                 p0 = cmp.gt(EXPA,#0)
 392                 if (p0.new) jump:nt .Lpossible_unf
 393         }
 394         {
 395                 // TMP has original EXPA.
 396                 // ATMP is corresponding value
 397                 // Normalize ATMP and shift right to correct location
 398                 EXPB = add(clb(ATMP),#-2)               // Amount to left shift to normalize
 399                 EXPA = sub(#1+5,TMP)                    // Amount to right shift to denormalize
 400                 p3 = cmp.gt(CTMPH,#-1)
 401         }
 402         // Underflow
 403         // We know that the infinte range exponent should be EXPA
 404         // CTMP is 2's complement, ATMP is abs(CTMP)
 405         {
 406                 EXPA = add(EXPA,EXPB)           // how much to shift back right
 407                 ATMP = asl(ATMP,EXPB)           // shift left
 408                 AH = USR
 409                 TMP = #63
 410         }
 411         {
 412                 EXPB = min(EXPA,TMP)
 413                 EXPA = #0
 414                 AL = #0x0030
 415         }
 416         {
 417                 B = extractu(ATMP,EXPBA)
 418                 ATMP = asr(ATMP,EXPB)
 419         }
 420         {
 421                 p0 = cmp.gtu(ONE,B)
 422                 if (!p0.new) ATMPL = or(ATMPL,S_ONE)
 423                 ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
 424         }
 425         {
 426                 CTMP = neg(ATMP)
 427                 p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
 428                 if (!p1.new) AH = or(AH,AL)
 429                 B = #0
 430         }
 431         {
 432                 if (p3) CTMP = ATMP
 433                 USR = AH
 434                 TMP = #-BIAS-(MANTBITS+FUDGE2)
 435         }
 436         {
 437                 A = convert_d2df(CTMP)
 438         }
 439         {
 440                 AH += asl(TMP,#HI_MANTBITS)
 441                 dealloc_return
 442         }
 443 .Lpossible_unf:
 444         {
 445                 TMP = ##0x7fefffff
 446                 ATMP = abs(CTMP)
 447         }
 448         {
 449                 p0 = cmp.eq(AL,#0)
 450                 p0 = bitsclr(AH,TMP)
 451                 if (!p0.new) dealloc_return:t
 452                 TMP = #0x7fff
 453         }
 454         {
 455                 p0 = bitsset(ATMPH,TMP)
 456                 BH = USR
 457                 BL = #0x0030
 458         }
 459         {
 460                 if (p0) BH = or(BH,BL)
 461         }
 462         {
 463                 USR = BH
 464         }
 465         {
 466                 p0 = dfcmp.eq(A,A)
 467                 dealloc_return
 468         }
 469 .Lfma_ovf:
 470         {
 471                 TMP = USR
 472                 CTMP = combine(##0x7fefffff,#-1)
 473                 A = CTMP
 474         }
 475         {
 476                 ATMP = combine(##0x7ff00000,#0)
 477                 BH = extractu(TMP,#2,#SR_ROUND_OFF)
 478                 TMP = or(TMP,#0x28)
 479         }
 480         {
 481                 USR = TMP
 482                 BH ^= lsr(AH,#31)
 483                 BL = BH
 484         }
 485         {
 486                 p0 = !cmp.eq(BL,#1)
 487                 p0 = !cmp.eq(BH,#2)
 488         }
 489         {
 490                 p0 = dfcmp.eq(ATMP,ATMP)
 491                 if (p0.new) CTMP = ATMP
 492         }
 493         {
 494                 A = insert(CTMP,#63,#0)
 495                 dealloc_return
 496         }
 497 #undef CTMP
 498 #undef CTMPH
 499 #undef CTMPL
 500 #define BTMP r11:10
 501 #define BTMPH r11
 502 #define BTMPL r10
 503
 504 #undef STICKIES
 505 #undef STICKIESH
 506 #undef STICKIESL
 507 #define C r5:4
 508 #define CH r5
 509 #define CL r4
 510
 511 .Lfma_abnormal_ab:
 512         {
 513                 ATMP = extractu(A,#63,#0)
 514                 BTMP = extractu(B,#63,#0)
 515                 deallocframe
 516         }
 517         {
 518                 p3 = cmp.gtu(ATMP,BTMP)
 519                 if (!p3.new) A = B              // sort values
 520                 if (!p3.new) B = A
 521         }
 522         {
 523                 p0 = dfclass(A,#0x0f)           // A NaN?
 524                 if (!p0.new) jump:nt .Lnan
 525                 if (!p3) ATMP = BTMP
 526                 if (!p3) BTMP = ATMP
 527         }
 528         {
 529                 p1 = dfclass(A,#0x08)           // A is infinity
 530                 p1 = dfclass(B,#0x0e)           // B is nonzero
 531         }
 532         {
 533                 p0 = dfclass(A,#0x08)           // a is inf
 534                 p0 = dfclass(B,#0x01)           // b is zero
 535         }
 536         {
 537                 if (p1) jump .Lab_inf
 538                 p2 = dfclass(B,#0x01)
 539         }
 540         {
 541                 if (p0) jump .Linvalid
 542                 if (p2) jump .Lab_true_zero
 543                 TMP = ##0x7c000000
 544         }
 545         // We are left with a normal or subnormal times a subnormal, A > B
 546         // If A and B are both very small, we will go to a single sticky bit; replace
 547         // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
 548         // if A and B might multiply to something bigger, decrease A exp and increase B exp
 549         // and start over
 550         {
 551                 p0 = bitsclr(AH,TMP)
 552                 if (p0.new) jump:nt .Lfma_ab_tiny
 553         }
 554         {
 555                 TMP = add(clb(BTMP),#-EXPBITS)
 556         }
 557         {
 558                 BTMP = asl(BTMP,TMP)
 559         }
 560         {
 561                 B = insert(BTMP,#63,#0)
 562                 AH -= asl(TMP,#HI_MANTBITS)
 563         }
 564         jump fma
 565
 566 .Lfma_ab_tiny:
 567         ATMP = combine(##0x00100000,#0)
 568         {
 569                 A = insert(ATMP,#63,#0)
 570                 B = insert(ATMP,#63,#0)
 571         }
 572         jump fma
 573
 574 .Lab_inf:
 575         {
 576                 B = lsr(B,#63)
 577                 p0 = dfclass(C,#0x10)
 578         }
 579         {
 580                 A ^= asl(B,#63)
 581                 if (p0) jump .Lnan
 582         }
 583         {
 584                 p1 = dfclass(C,#0x08)
 585                 if (p1.new) jump:nt .Lfma_inf_plus_inf
 586         }
 587         // A*B is +/- inf, C is finite.  Return A
 588         {
 589                 jumpr r31
 590         }
 591         .falign
 592 .Lfma_inf_plus_inf:
 593         {       // adding infinities of different signs is invalid
 594                 p0 = dfcmp.eq(A,C)
 595                 if (!p0.new) jump:nt .Linvalid
 596         }
 597         {
 598                 jumpr r31
 599         }
 600
 601 .Lnan:
 602         {
 603                 p0 = dfclass(B,#0x10)
 604                 p1 = dfclass(C,#0x10)
 605                 if (!p0.new) B = A
 606                 if (!p1.new) C = A
 607         }
 608         {       // find sNaNs
 609                 BH = convert_df2sf(B)
 610                 BL = convert_df2sf(C)
 611         }
 612         {
 613                 BH = convert_df2sf(A)
 614                 A = #-1
 615                 jumpr r31
 616         }
 617
 618 .Linvalid:
 619         {
 620                 TMP = ##0x7f800001              // sp snan
 621         }
 622         {
 623                 A = convert_sf2df(TMP)
 624                 jumpr r31
 625         }
 626
 627 .Lab_true_zero:
 628         // B is zero, A is finite number
 629         {
 630                 p0 = dfclass(C,#0x10)
 631                 if (p0.new) jump:nt .Lnan
 632                 if (p0.new) A = C
 633         }
 634         {
 635                 p0 = dfcmp.eq(B,C)              // is C also zero?
 636                 AH = lsr(AH,#31)                // get sign
 637         }
 638         {
 639                 BH ^= asl(AH,#31)               // form correctly signed zero in B
 640                 if (!p0) A = C                  // If C is not zero, return C
 641                 if (!p0) jumpr r31
 642         }
 643         // B has correctly signed zero, C is also zero
 644 .Lzero_plus_zero:
 645         {
 646                 p0 = cmp.eq(B,C)                // yes, scalar equals.  +0++0 or -0+-0
 647                 if (p0.new) jumpr:t r31
 648                 A = B
 649         }
 650         {
 651                 TMP = USR
 652         }
 653         {
 654                 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
 655                 A = #0
 656         }
 657         {
 658                 p0 = cmp.eq(TMP,#2)
 659                 if (p0.new) AH = ##0x80000000
 660                 jumpr r31
 661         }
 662 #undef BTMP
 663 #undef BTMPH
 664 #undef BTMPL
 665 #define CTMP r11:10
 666         .falign
 667 .Lfma_abnormal_c:
 668         // We know that AB is normal * normal
 669         // C is not normal: zero, subnormal, inf, or NaN.
 670         {
 671                 p0 = dfclass(C,#0x10)           // is C NaN?
 672                 if (p0.new) jump:nt .Lnan
 673                 if (p0.new) A = C               // move NaN to A
 674                 deallocframe
 675         }
 676         {
 677                 p0 = dfclass(C,#0x08)           // is C inf?
 678                 if (p0.new) A = C               // return C
 679                 if (p0.new) jumpr:nt r31
 680         }
 681         // zero or subnormal
 682         // If we have a zero, and we know AB is normal*normal, we can just call normal multiply
 683         {
 684                 p0 = dfclass(C,#0x01)           // is C zero?
 685                 if (p0.new) jump:nt __hexagon_muldf3
 686                 TMP = #1
 687         }
 688         // Left with: subnormal
 689         // Adjust C and jump back to restart
 690         {
 691                 allocframe(#STACKSPACE)         // oops, deallocated above, re-allocate frame
 692                 CTMP = #0
 693                 CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
 694                 jump .Lfma_abnormal_c_restart
 695         }
 696 END(fma)