contrib/compiler-rt/lib/builtins/arm/addsf3.S

   1 /*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
   2  *
   3  *                     The LLVM Compiler Infrastructure
   4  *
   5  * This file is dual licensed under the MIT and the University of Illinois Open
   6  * Source Licenses. See LICENSE.TXT for details.
   7  *
   8  *===----------------------------------------------------------------------===//
   9  *
  10  * This file implements the __addsf3 (single precision floating pointer number
  11  * addition with the IEEE-754 default rounding (to nearest, ties to even)
  12  * function for the ARM Thumb1 ISA.
  13  *
  14  *===----------------------------------------------------------------------===*/
  15
  16 #include "../assembly.h"
  17 #define significandBits 23
  18 #define typeWidth 32
  19
  20         .syntax unified
  21         .text
  22   .thumb
  23   .p2align 2
  24
  25 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
  26
  27 DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
  28   push {r4, r5, r6, r7, lr}
  29   // Get the absolute value of a and b.
  30   lsls r2, r0, #1
  31   lsls r3, r1, #1
  32   lsrs r2, r2, #1  /* aAbs */
  33   beq  LOCAL_LABEL(a_zero_nan_inf)
  34   lsrs r3, r3, #1  /* bAbs */
  35   beq  LOCAL_LABEL(zero_nan_inf)
  36
  37   // Detect if a or b is infinity or Nan.
  38   lsrs r6, r2, #(significandBits)
  39   lsrs r7, r3, #(significandBits)
  40   cmp  r6, #0xFF
  41   beq  LOCAL_LABEL(zero_nan_inf)
  42   cmp  r7, #0xFF
  43   beq  LOCAL_LABEL(zero_nan_inf)
  44
  45   // Swap Rep and Abs so that a and aAbs has the larger absolute value.
  46   cmp r2, r3
  47   bhs LOCAL_LABEL(no_swap)
  48   movs r4, r0
  49   movs r5, r2
  50   movs r0, r1
  51   movs r2, r3
  52   movs r1, r4
  53   movs r3, r5
  54 LOCAL_LABEL(no_swap):
  55
  56   // Get the significands and shift them to give us round, guard and sticky.
  57   lsls r4, r0, #(typeWidth - significandBits)
  58   lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
  59   lsls r5, r1, #(typeWidth - significandBits)
  60   lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
  61
  62   // Get the implicitBit.
  63   movs r6, #1
  64   lsls r6, r6, #(significandBits + 3)
  65
  66   // Get aExponent and set implicit bit if necessary.
  67   lsrs r2, r2, #(significandBits)
  68   beq LOCAL_LABEL(a_done_implicit_bit)
  69   orrs r4, r6
  70 LOCAL_LABEL(a_done_implicit_bit):
  71
  72   // Get bExponent and set implicit bit if necessary.
  73   lsrs r3, r3, #(significandBits)
  74   beq LOCAL_LABEL(b_done_implicit_bit)
  75   orrs r5, r6
  76 LOCAL_LABEL(b_done_implicit_bit):
  77
  78   // Get the difference in exponents.
  79   subs r6, r2, r3
  80   beq LOCAL_LABEL(done_align)
  81
  82   // If b is denormal, then a must be normal as align > 0, and we only need to
  83   // right shift bSignificand by (align - 1) bits.
  84   cmp  r3, #0
  85   bne  1f
  86   subs r6, r6, #1
  87 1:
  88
  89   // No longer needs bExponent. r3 is dead here.
  90   // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
  91   movs r3, #(typeWidth)
  92   subs r3, r3, r6
  93   movs r7, r5
  94   lsls r7, r3
  95   beq 1f
  96   movs r7, #1
  97 1:
  98
  99   // bSignificand = bSignificand >> align | sticky;
 100   lsrs r5, r6
 101   orrs r5, r7
 102   bne LOCAL_LABEL(done_align)
 103   movs r5, #1 //  sticky; b is known to be non-zero.
 104
 105 LOCAL_LABEL(done_align):
 106   // isSubtraction = (aRep ^ bRep) >> 31;
 107   movs r7, r0
 108   eors r7, r1
 109   lsrs r7, #31
 110   bne LOCAL_LABEL(do_substraction)
 111
 112   // Same sign, do Addition.
 113
 114   // aSignificand += bSignificand;
 115   adds r4, r4, r5
 116
 117   // Check carry bit.
 118   movs r6, #1
 119   lsls r6, r6, #(significandBits + 3 + 1)
 120   movs r7, r4
 121   ands r7, r6
 122   beq LOCAL_LABEL(form_result)
 123   // If the addition carried up, we need to right-shift the result and
 124   // adjust the exponent.
 125   movs r7, r4
 126   movs r6, #1
 127   ands r7, r6 // sticky = aSignificand & 1;
 128   lsrs r4, #1
 129   orrs r4, r7  // result Significand
 130   adds r2, #1  // result Exponent
 131   // If we have overflowed the type, return +/- infinity.
 132   cmp  r2, 0xFF
 133   beq  LOCAL_LABEL(ret_inf)
 134
 135 LOCAL_LABEL(form_result):
 136   // Shift the sign, exponent and significand into place.
 137   lsrs r0, #(typeWidth - 1)
 138   lsls r0, #(typeWidth - 1) // Get Sign.
 139   lsls r2, #(significandBits)
 140   orrs r0, r2
 141   movs r1, r4
 142   lsls r4, #(typeWidth - significandBits - 3)
 143   lsrs r4, #(typeWidth - significandBits)
 144   orrs r0, r4
 145
 146   // Final rounding.  The result may overflow to infinity, but that is the
 147   // correct result in that case.
 148   // roundGuardSticky = aSignificand & 0x7;
 149   movs r2, #0x7
 150   ands r1, r2
 151   // if (roundGuardSticky > 0x4) result++;
 152
 153   cmp r1, #0x4
 154   blt LOCAL_LABEL(done_round)
 155   beq 1f
 156   adds r0, #1
 157   pop {r4, r5, r6, r7, pc}
 158 1:
 159
 160   // if (roundGuardSticky == 0x4) result += result & 1;
 161   movs r1, r0
 162   lsrs r1, #1
 163   bcc  LOCAL_LABEL(done_round)
 164   adds r0, r0, #1
 165 LOCAL_LABEL(done_round):
 166   pop {r4, r5, r6, r7, pc}
 167
 168 LOCAL_LABEL(do_substraction):
 169   subs r4, r4, r5 // aSignificand -= bSignificand;
 170   beq  LOCAL_LABEL(ret_zero)
 171   movs r6, r4
 172   cmp  r2, 0
 173   beq  LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
 174   // If partial cancellation occured, we need to left-shift the result
 175   // and adjust the exponent:
 176   lsrs r6, r6, #(significandBits + 3)
 177   bne LOCAL_LABEL(form_result)
 178
 179   push {r0, r1, r2, r3}
 180   movs r0, r4
 181   bl   SYMBOL_NAME(__clzsi2)
 182   movs r5, r0
 183   pop {r0, r1, r2, r3}
 184   // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
 185   subs r5, r5, #(typeWidth - significandBits - 3 - 1)
 186   // aSignificand <<= shift; aExponent -= shift;
 187   lsls r4, r5
 188   subs  r2, r2, r5
 189   bgt LOCAL_LABEL(form_result)
 190
 191   // Do normalization if aExponent <= 0.
 192   movs r6, #1
 193   subs r6, r6, r2 // 1 - aExponent;
 194   movs r2, #0 // aExponent = 0;
 195   movs r3, #(typeWidth) // bExponent is dead.
 196   subs r3, r3, r6
 197   movs r7, r4
 198   lsls r7, r3  // stickyBit = (bool)(aSignificant << (typeWidth - align))
 199   beq 1f
 200   movs r7, #1
 201 1:
 202   lsrs r4, r6 /* aSignificand >> shift */
 203   orrs r4, r7
 204   b LOCAL_LABEL(form_result)
 205
 206 LOCAL_LABEL(ret_zero):
 207   movs r0, #0
 208   pop {r4, r5, r6, r7, pc}
 209
 210
 211 LOCAL_LABEL(a_zero_nan_inf):
 212   lsrs r3, r3, #1
 213
 214 LOCAL_LABEL(zero_nan_inf):
 215   // Here  r2 has aAbs, r3 has bAbs
 216   movs r4, #0xFF
 217   lsls r4, r4, #(significandBits) // Make +inf.
 218
 219   cmp r2, r4
 220   bhi LOCAL_LABEL(a_is_nan)
 221   cmp r3, r4
 222   bhi LOCAL_LABEL(b_is_nan)
 223
 224   cmp r2, r4
 225   bne LOCAL_LABEL(a_is_rational)
 226   // aAbs is INF.
 227   eors r1, r0 // aRep ^ bRep.
 228   movs r6, #1
 229   lsls r6, r6, #(typeWidth - 1) // get sign mask.
 230   cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
 231   beq LOCAL_LABEL(a_is_nan)
 232   pop {r4, r5, r6, r7, pc}
 233
 234 LOCAL_LABEL(a_is_rational):
 235   cmp r3, r4
 236   bne LOCAL_LABEL(b_is_rational)
 237   movs r0, r1
 238   pop {r4, r5, r6, r7, pc}
 239
 240 LOCAL_LABEL(b_is_rational):
 241   // either a or b or both are zero.
 242   adds r4, r2, r3
 243   beq  LOCAL_LABEL(both_zero)
 244   cmp r2, #0 // is absA 0 ?
 245   beq LOCAL_LABEL(ret_b)
 246   pop {r4, r5, r6, r7, pc}
 247
 248 LOCAL_LABEL(both_zero):
 249   ands r0, r1 // +0 + -0 = +0
 250   pop {r4, r5, r6, r7, pc}
 251
 252 LOCAL_LABEL(ret_b):
 253   movs r0, r1
 254
 255 LOCAL_LABEL(ret):
 256   pop {r4, r5, r6, r7, pc}
 257
 258 LOCAL_LABEL(b_is_nan):
 259   movs r0, r1
 260 LOCAL_LABEL(a_is_nan):
 261   movs r1, #1
 262   lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
 263   orrs r0, r1
 264   pop {r4, r5, r6, r7, pc}
 265
 266 LOCAL_LABEL(ret_inf):
 267   movs r4, #0xFF
 268   lsls r4, r4, #(significandBits)
 269   orrs r0, r4
 270   lsrs r0, r0, #(significandBits)
 271   lsls r0, r0, #(significandBits)
 272   pop {r4, r5, r6, r7, pc}
 273
 274
 275 END_COMPILERRT_FUNCTION(__addsf3)
 276
 277 NO_EXEC_STACK_DIRECTIVE