contrib/compiler-rt/lib/builtins/divtf3.c

   1 //===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements quad-precision soft-float division
  10 // with the IEEE-754 default rounding (to nearest, ties to even).
  11 //
  12 // For simplicity, this implementation currently flushes denormals to zero.
  13 // It should be a fairly straightforward exercise to implement gradual
  14 // underflow with correct rounding.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #define QUAD_PRECISION
  19 #include "fp_lib.h"
  20
  21 #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
  22 COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
  23
  24   const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
  25   const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
  26   const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
  27
  28   rep_t aSignificand = toRep(a) & significandMask;
  29   rep_t bSignificand = toRep(b) & significandMask;
  30   int scale = 0;
  31
  32   // Detect if a or b is zero, denormal, infinity, or NaN.
  33   if (aExponent - 1U >= maxExponent - 1U ||
  34       bExponent - 1U >= maxExponent - 1U) {
  35
  36     const rep_t aAbs = toRep(a) & absMask;
  37     const rep_t bAbs = toRep(b) & absMask;
  38
  39     // NaN / anything = qNaN
  40     if (aAbs > infRep)
  41       return fromRep(toRep(a) | quietBit);
  42     // anything / NaN = qNaN
  43     if (bAbs > infRep)
  44       return fromRep(toRep(b) | quietBit);
  45
  46     if (aAbs == infRep) {
  47       // infinity / infinity = NaN
  48       if (bAbs == infRep)
  49         return fromRep(qnanRep);
  50       // infinity / anything else = +/- infinity
  51       else
  52         return fromRep(aAbs | quotientSign);
  53     }
  54
  55     // anything else / infinity = +/- 0
  56     if (bAbs == infRep)
  57       return fromRep(quotientSign);
  58
  59     if (!aAbs) {
  60       // zero / zero = NaN
  61       if (!bAbs)
  62         return fromRep(qnanRep);
  63       // zero / anything else = +/- zero
  64       else
  65         return fromRep(quotientSign);
  66     }
  67     // anything else / zero = +/- infinity
  68     if (!bAbs)
  69       return fromRep(infRep | quotientSign);
  70
  71     // One or both of a or b is denormal.  The other (if applicable) is a
  72     // normal number.  Renormalize one or both of a and b, and set scale to
  73     // include the necessary exponent adjustment.
  74     if (aAbs < implicitBit)
  75       scale += normalize(&aSignificand);
  76     if (bAbs < implicitBit)
  77       scale -= normalize(&bSignificand);
  78   }
  79
  80   // Set the implicit significand bit.  If we fell through from the
  81   // denormal path it was already set by normalize( ), but setting it twice
  82   // won't hurt anything.
  83   aSignificand |= implicitBit;
  84   bSignificand |= implicitBit;
  85   int quotientExponent = aExponent - bExponent + scale;
  86
  87   // Align the significand of b as a Q63 fixed-point number in the range
  88   // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
  89   // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
  90   // is accurate to about 3.5 binary digits.
  91   const uint64_t q63b = bSignificand >> 49;
  92   uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b;
  93   // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
  94
  95   // Now refine the reciprocal estimate using a Newton-Raphson iteration:
  96   //
  97   //     x1 = x0 * (2 - x0 * b)
  98   //
  99   // This doubles the number of correct binary digits in the approximation
 100   // with each iteration.
 101   uint64_t correction64;
 102   correction64 = -((rep_t)recip64 * q63b >> 64);
 103   recip64 = (rep_t)recip64 * correction64 >> 63;
 104   correction64 = -((rep_t)recip64 * q63b >> 64);
 105   recip64 = (rep_t)recip64 * correction64 >> 63;
 106   correction64 = -((rep_t)recip64 * q63b >> 64);
 107   recip64 = (rep_t)recip64 * correction64 >> 63;
 108   correction64 = -((rep_t)recip64 * q63b >> 64);
 109   recip64 = (rep_t)recip64 * correction64 >> 63;
 110   correction64 = -((rep_t)recip64 * q63b >> 64);
 111   recip64 = (rep_t)recip64 * correction64 >> 63;
 112
 113   // The reciprocal may have overflowed to zero if the upper half of b is
 114   // exactly 1.0.  This would sabatoge the full-width final stage of the
 115   // computation that follows, so we adjust the reciprocal down by one bit.
 116   recip64--;
 117
 118   // We need to perform one more iteration to get us to 112 binary digits;
 119   // The last iteration needs to happen with extra precision.
 120   const uint64_t q127blo = bSignificand << 15;
 121   rep_t correction, reciprocal;
 122
 123   // NOTE: This operation is equivalent to __multi3, which is not implemented
 124   //       in some architechure
 125   rep_t r64q63, r64q127, r64cH, r64cL, dummy;
 126   wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63);
 127   wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127);
 128
 129   correction = -(r64q63 + (r64q127 >> 64));
 130
 131   uint64_t cHi = correction >> 64;
 132   uint64_t cLo = correction;
 133
 134   wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH);
 135   wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL);
 136
 137   reciprocal = r64cH + (r64cL >> 64);
 138
 139   // Adjust the final 128-bit reciprocal estimate downward to ensure that it
 140   // is strictly smaller than the infinitely precise exact reciprocal. Because
 141   // the computation of the Newton-Raphson step is truncating at every step,
 142   // this adjustment is small; most of the work is already done.
 143   reciprocal -= 2;
 144
 145   // The numerical reciprocal is accurate to within 2^-112, lies in the
 146   // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
 147   // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
 148   // in Q127 with the following properties:
 149   //
 150   //    1. q < a/b
 151   //    2. q is in the interval [0.5, 2.0)
 152   //    3. The error in q is bounded away from 2^-113 (actually, we have a
 153   //       couple of bits to spare, but this is all we need).
 154
 155   // We need a 128 x 128 multiply high to compute q, which isn't a basic
 156   // operation in C, so we need to be a little bit fussy.
 157   rep_t quotient, quotientLo;
 158   wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
 159
 160   // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
 161   // In either case, we are going to compute a residual of the form
 162   //
 163   //     r = a - q*b
 164   //
 165   // We know from the construction of q that r satisfies:
 166   //
 167   //     0 <= r < ulp(q)*b
 168   //
 169   // If r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
 170   // already have the correct result.  The exact halfway case cannot occur.
 171   // We also take this time to right shift quotient if it falls in the [1,2)
 172   // range and adjust the exponent accordingly.
 173   rep_t residual;
 174   rep_t qb;
 175
 176   if (quotient < (implicitBit << 1)) {
 177     wideMultiply(quotient, bSignificand, &dummy, &qb);
 178     residual = (aSignificand << 113) - qb;
 179     quotientExponent--;
 180   } else {
 181     quotient >>= 1;
 182     wideMultiply(quotient, bSignificand, &dummy, &qb);
 183     residual = (aSignificand << 112) - qb;
 184   }
 185
 186   const int writtenExponent = quotientExponent + exponentBias;
 187
 188   if (writtenExponent >= maxExponent) {
 189     // If we have overflowed the exponent, return infinity.
 190     return fromRep(infRep | quotientSign);
 191   } else if (writtenExponent < 1) {
 192     if (writtenExponent == 0) {
 193       // Check whether the rounded result is normal.
 194       const bool round = (residual << 1) > bSignificand;
 195       // Clear the implicit bit.
 196       rep_t absResult = quotient & significandMask;
 197       // Round.
 198       absResult += round;
 199       if (absResult & ~significandMask) {
 200         // The rounded result is normal; return it.
 201         return fromRep(absResult | quotientSign);
 202       }
 203     }
 204     // Flush denormals to zero.  In the future, it would be nice to add
 205     // code to round them correctly.
 206     return fromRep(quotientSign);
 207   } else {
 208     const bool round = (residual << 1) >= bSignificand;
 209     // Clear the implicit bit.
 210     rep_t absResult = quotient & significandMask;
 211     // Insert the exponent.
 212     absResult |= (rep_t)writtenExponent << significandBits;
 213     // Round.
 214     absResult += round;
 215     // Insert the sign and return.
 216     const long double result = fromRep(absResult | quotientSign);
 217     return result;
 218   }
 219 }
 220
 221 #endif