1 //===----------------------Hexagon builtin routine ------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /* Double Precision square root */
22 #define SFHALF_SONE r5:4
30 #define FRACRAD r11:10
47 #define SF_MANTBITS 23
50 #define DF_MANTBITS 52
54 #define DFCLASS_ZERO 0x01
55 #define DFCLASS_NORMAL 0x02
56 #define DFCLASS_DENORMAL 0x02
57 #define DFCLASS_INFINITE 0x08
58 #define DFCLASS_NAN 0x10
60 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function
61 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function
62 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function
63 #define END(TAG) .size TAG,.-TAG
66 .global __hexagon_sqrtdf2
67 .type __hexagon_sqrtdf2,@function
68 .global __hexagon_sqrt
69 .type __hexagon_sqrt,@function
81 PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)
82 EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
83 SFHALF_SONE = combine(##0x3f000004,#1)
86 NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal
87 NORMAL = cmp.gt(AH,#-1) // and positive?
88 if (!NORMAL.new) jump:nt .Lsqrt_abnormal
89 SFRAD = or(SFHALF,PRODLO)
95 SF_E,P_TMP = sfinvsqrta(SFRAD)
96 SFHALF = and(SFHALF,#-16)
105 // SF_E : reciprocal square root
107 // sf_S : square root
111 SF_S += sfmpy(SF_E,SFRAD):lib // s0: root
112 SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent...
116 SHIFTAMT = and(EXP,#1)
119 SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1
120 FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden
121 P_EXP1 = cmp.gtu(SHIFTAMT,#0)
124 SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt
125 SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip
127 SHIFTAMT = mux(P_EXP1,#8,#9)
130 SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term
131 FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place
132 SHIFTAMT = mux(P_EXP1,#3,#2)
135 SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt
136 // cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x).
137 PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1)
140 SF_H = and(SF_H,##0x007fffff)
143 SF_H = add(SF_H,##0x00800000 - 3)
144 SHIFTAMT = mux(P_EXP1,#7,#8)
147 RECIPEST = asl(SF_H,SHIFTAMT)
148 SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0))
151 ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1)))
159 #undef SFHALF_SONE // r5:4
173 /* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */
174 /* We can shift and subtract instead of shift and add? */
176 ERROR = asl(FRACRAD,#15)
177 PROD = mpyu(ROOTHI,ROOTHI)
178 P_CARRY0 = cmp.eq(r0,r0)
181 ERROR -= asl(PROD,#15)
182 PROD = mpyu(ROOTHI,ROOTLO)
183 P_CARRY1 = cmp.eq(r0,r0)
186 ERROR -= lsr(PROD,#16)
187 P_CARRY2 = cmp.eq(r0,r0)
190 ERROR = mpyu(ERRORHI,RECIPEST)
193 ROOT += lsr(ERROR,SHIFTAMT)
194 SHIFTAMT = add(SHIFTAMT,#16)
195 ERROR = asl(FRACRAD,#31) // for next iter
199 PROD = mpyu(ROOTHI,ROOTHI)
200 ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed
203 ERROR -= asl(PROD,#31)
204 PROD = mpyu(ROOTLO,ROOTLO)
207 ERROR -= lsr(PROD,#33)
210 ERROR = mpyu(ERRORHI,RECIPEST)
213 ROOT += lsr(ERROR,SHIFTAMT)
214 SHIFTAMT = add(SHIFTAMT,#16)
215 ERROR = asl(FRACRAD,#47) // for next iter
219 PROD = mpyu(ROOTHI,ROOTHI)
222 ERROR -= asl(PROD,#47)
223 PROD = mpyu(ROOTHI,ROOTLO)
226 ERROR -= asl(PROD,#16) // bidir shr 31-47
227 PROD = mpyu(ROOTLO,ROOTLO)
230 ERROR -= lsr(PROD,#17) // 64-47
233 ERROR = mpyu(ERRORHI,RECIPEST)
236 ROOT += lsr(ERROR,SHIFTAMT)
242 #define REM_HI r15:14
243 #define REM_HI_HI r15
247 #define TWOROOT_LO r9:8
250 HL = mpyu(ROOTHI,ROOTLO)
251 LL = mpyu(ROOTLO,ROOTLO)
258 P_CARRY0 = cmp.eq(r0,r0)
261 HH = mpyu(ROOTHI,ROOTHI)
262 REM_LO = sub(REM_LO,LL,P_CARRY0):carry
267 TWOROOT_LO += asl(ROOT,#1)
271 #define REM_HI_TMP r3:2
272 #define REM_HI_TMP_HI r3
273 #define REM_LO_TMP r5:4
275 REM_HI = sub(FRACRAD,HH,P_CARRY0):carry
276 REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry
285 REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry
287 EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp
290 // If carry set, no borrow: result was still positive
291 if (P_CARRY1) ROOT = ONE
292 if (P_CARRY1) REM_LO = REM_LO_TMP
293 if (P_CARRY1) REM_HI = REM_HI_TMP
296 REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry
298 EXP = asr(EXP,#1) // divide signed exp by 2
301 REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry
305 if (P_CARRY2) ROOT = ONE
306 if (P_CARRY2) REM_LO = REM_LO_TMP
307 // since tworoot <= 2^32, remhi must be zero
315 P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero
316 if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully
324 RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag
325 EXP = add(EXP,ADJ) // add back bias
328 RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust
354 P_TMP = dfclass(A,#DFCLASS_ZERO) // zero?
355 if (P_TMP.new) jumpr:t r31
358 P_TMP = dfclass(A,#DFCLASS_NAN)
359 if (P_TMP.new) jump:nt .Lsqrt_nan
362 P_TMP = cmp.gt(AH,#-1)
363 if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg
364 if (!P_TMP.new) EXP = ##0x7F800001 // sNaN
367 P_TMP = dfclass(A,#DFCLASS_INFINITE)
368 if (P_TMP.new) jumpr:nt r31
370 // If we got here, we're denormal
371 // prepare to restart
373 A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa
376 EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize?
379 A = asl(A,EXP) // Shift mantissa
380 EXP = sub(#1,EXP) // Form exponent
383 AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent
386 TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1)
387 SFHALF = ##0x3f000004 // form half constant
390 SFRAD = or(SFHALF,TMPLO) // form sf value
391 SFHALF = and(SFHALF,#-16)
392 jump .Ldenormal_restart // restart
396 EXP = convert_df2sf(A) // if sNaN, get invalid
402 A = convert_sf2df(EXP) // Invalid,NaNval
406 END(__hexagon_sqrtdf2)