]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S
Merge clang 7.0.1 and several follow-up changes
[FreeBSD/FreeBSD.git] / contrib / compiler-rt / lib / builtins / hexagon / dfdiv.S
1 //===----------------------Hexagon builtin routine ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 /* Double Precision Divide */
11
12 #define A r1:0
13 #define AH r1
14 #define AL r0
15
16 #define B r3:2
17 #define BH r3
18 #define BL r2
19
20 #define Q r5:4
21 #define QH r5
22 #define QL r4
23
24 #define PROD r7:6
25 #define PRODHI r7
26 #define PRODLO r6
27
28 #define SFONE r8
29 #define SFDEN r9
30 #define SFERROR r10
31 #define SFRECIP r11
32
33 #define EXPBA r13:12
34 #define EXPB r13
35 #define EXPA r12
36
37 #define REMSUB2 r15:14
38
39
40
41 #define SIGN r28
42
43 #define Q_POSITIVE p3
44 #define NORMAL p2
45 #define NO_OVF_UNF p1
46 #define P_TMP p0
47
48 #define RECIPEST_SHIFT 3
49 #define QADJ 61
50
51 #define DFCLASS_NORMAL 0x02
52 #define DFCLASS_NUMBER 0x0F
53 #define DFCLASS_INFINITE 0x08
54 #define DFCLASS_ZERO 0x01
55 #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
56 #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
57
58 #define DF_MANTBITS 52
59 #define DF_EXPBITS 11
60 #define SF_MANTBITS 23
61 #define SF_EXPBITS 8
62 #define DF_BIAS 0x3ff
63
64 #define SR_ROUND_OFF 22
65
66 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
67 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
68 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
69 #define END(TAG) .size TAG,.-TAG
70
71         .text
72         .global __hexagon_divdf3
73         .type __hexagon_divdf3,@function
74         Q6_ALIAS(divdf3)
75         FAST_ALIAS(divdf3)
76         FAST2_ALIAS(divdf3)
77         .p2align 5
78 __hexagon_divdf3:
79         {
80                 NORMAL = dfclass(A,#DFCLASS_NORMAL)
81                 NORMAL = dfclass(B,#DFCLASS_NORMAL)
82                 EXPBA = combine(BH,AH)
83                 SIGN = xor(AH,BH)
84         }
85 #undef A
86 #undef AH
87 #undef AL
88 #undef B
89 #undef BH
90 #undef BL
91 #define REM r1:0
92 #define REMHI r1
93 #define REMLO r0
94 #define DENOM r3:2
95 #define DENOMHI r3
96 #define DENOMLO r2
97         {
98                 if (!NORMAL) jump .Ldiv_abnormal
99                 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
100                 SFONE = ##0x3f800001
101         }
102         {
103                 SFDEN = or(SFONE,PRODLO)
104                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
105                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
106                 Q_POSITIVE = cmp.gt(SIGN,#-1)
107         }
108 #undef SIGN
109 #define ONE r28
110 .Ldenorm_continue:
111         {
112                 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
113                 SFERROR = and(SFONE,#-2)
114                 ONE = #1
115                 EXPA = sub(EXPA,EXPB)
116         }
117 #undef EXPB
118 #define RECIPEST r13
119         {
120                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
121                 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
122                 RECIPEST = ##0x00800000 << RECIPEST_SHIFT
123         }
124         {
125                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
126                 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
127                 SFERROR = and(SFONE,#-2)
128         }
129         {
130                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
131                 QH = #-DF_BIAS+1
132                 QL = #DF_BIAS-1
133         }
134         {
135                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
136                 NO_OVF_UNF = cmp.gt(EXPA,QH)
137                 NO_OVF_UNF = !cmp.gt(EXPA,QL)
138         }
139         {
140                 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
141                 Q = #0
142                 EXPA = add(EXPA,#-QADJ)
143         }
144 #undef SFERROR
145 #undef SFRECIP
146 #define TMP r10
147 #define TMP1 r11
148         {
149                 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
150         }
151
152 #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
153         { \
154                 PROD = mpyu(RECIPEST,REMHI); \
155                 REM = asl(REM,# ## ( REMSHIFT )); \
156         }; \
157         { \
158                 PRODLO = # ## 0; \
159                 REM -= mpyu(PRODHI,DENOMLO); \
160                 REMSUB2 = mpyu(PRODHI,DENOMHI); \
161         }; \
162         { \
163                 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
164                 REM -= asl(REMSUB2, # ## 32); \
165                 EXTRA \
166         }
167
168
169         DIV_ITER1B(ASL,14,15,)
170         DIV_ITER1B(ASR,1,15,)
171         DIV_ITER1B(ASR,16,15,)
172         DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
173
174 #undef REMSUB2
175 #define TMPPAIR r15:14
176 #define TMPPAIRHI r15
177 #define TMPPAIRLO r14
178 #undef RECIPEST
179 #define EXPB r13
180         {
181                 // compare or sub with carry
182                 TMPPAIR = sub(REM,DENOM)
183                 P_TMP = cmp.gtu(DENOM,REM)
184                 // set up amt to add to q
185                 if (!P_TMP.new) PRODLO  = #2
186         }
187         {
188                 Q = add(Q,PROD)
189                 if (!P_TMP) REM = TMPPAIR
190                 TMPPAIR = #0
191         }
192         {
193                 P_TMP = cmp.eq(REM,TMPPAIR)
194                 if (!P_TMP.new) QL = or(QL,ONE)
195         }
196         {
197                 PROD = neg(Q)
198         }
199         {
200                 if (!Q_POSITIVE) Q = PROD
201         }
202 #undef REM
203 #undef REMHI
204 #undef REMLO
205 #undef DENOM
206 #undef DENOMLO
207 #undef DENOMHI
208 #define A r1:0
209 #define AH r1
210 #define AL r0
211 #define B r3:2
212 #define BH r3
213 #define BL r2
214         {
215                 A = convert_d2df(Q)
216                 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
217         }
218         {
219                 AH += asl(EXPA,#DF_MANTBITS-32)
220                 jumpr r31
221         }
222
223 .Ldiv_ovf_unf:
224         {
225                 AH += asl(EXPA,#DF_MANTBITS-32)
226                 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
227         }
228         {
229                 PROD = abs(Q)
230                 EXPA = add(EXPA,EXPB)
231         }
232         {
233                 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)          // overflow
234                 if (P_TMP.new) jump:nt .Ldiv_ovf
235         }
236         {
237                 P_TMP = cmp.gt(EXPA,#0)
238                 if (P_TMP.new) jump:nt .Lpossible_unf           // round up to normal possible...
239         }
240         /* Underflow */
241         /* We know what the infinite range exponent should be (EXPA) */
242         /* Q is 2's complement, PROD is abs(Q) */
243         /* Normalize Q, shift right, add a high bit, convert, change exponent */
244
245 #define FUDGE1 7        // how much to shift right
246 #define FUDGE2 4        // how many guard/round to keep at lsbs
247
248         {
249                 EXPB = add(clb(PROD),#-1)                       // doesn't need to be added in since
250                 EXPA = sub(#FUDGE1,EXPA)                        // we extract post-converted exponent
251                 TMP = USR
252                 TMP1 = #63
253         }
254         {
255                 EXPB = min(EXPA,TMP1)
256                 TMP1 = or(TMP,#0x030)
257                 PROD = asl(PROD,EXPB)
258                 EXPA = #0
259         }
260         {
261                 TMPPAIR = extractu(PROD,EXPBA)                          // bits that will get shifted out
262                 PROD = lsr(PROD,EXPB)                                   // shift out bits
263                 B = #1
264         }
265         {
266                 P_TMP = cmp.gtu(B,TMPPAIR)
267                 if (!P_TMP.new) PRODLO = or(BL,PRODLO)
268                 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
269         }
270         {
271                 Q = neg(PROD)
272                 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
273                 if (!P_TMP.new) TMP = TMP1
274         }
275         {
276                 USR = TMP
277                 if (Q_POSITIVE) Q = PROD
278                 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
279         }
280         {
281                 A = convert_d2df(Q)
282         }
283         {
284                 AH += asl(TMP,#DF_MANTBITS-32)
285                 jumpr r31
286         }
287
288
289 .Lpossible_unf:
290         /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
291         /* The answer is correct, but we need to raise Underflow */
292         {
293                 B = extractu(A,#63,#0)
294                 TMPPAIR = combine(##0x00100000,#0)              // min normal
295                 TMP = #0x7FFF
296         }
297         {
298                 P_TMP = dfcmp.eq(TMPPAIR,B)             // Is everything zero in the rounded value...
299                 P_TMP = bitsset(PRODHI,TMP)             // but a bunch of bits set in the unrounded abs(quotient)?
300         }
301
302 #if (__HEXAGON_ARCH__ == 60)
303                 TMP = USR               // If not, just return
304                 if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
305                                         // Note that inexact is already set...
306 #else
307         {
308                 if (!P_TMP) jumpr r31                   // If not, just return
309                 TMP = USR                               // Else, we want to set Unf+Inexact
310         }                                               // Note that inexact is already set...
311 #endif
312         {
313                 TMP = or(TMP,#0x30)
314         }
315         {
316                 USR = TMP
317         }
318         {
319                 p0 = dfcmp.eq(A,A)
320                 jumpr r31
321         }
322
323 .Ldiv_ovf:
324         /*
325          * Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
326          */
327         {
328                 TMP = USR
329                 B = combine(##0x7fefffff,#-1)
330                 AH = mux(Q_POSITIVE,#0,#-1)
331         }
332         {
333                 PROD = combine(##0x7ff00000,#0)
334                 QH = extractu(TMP,#2,#SR_ROUND_OFF)
335                 TMP = or(TMP,#0x28)
336         }
337         {
338                 USR = TMP
339                 QH ^= lsr(AH,#31)
340                 QL = QH
341         }
342         {
343                 p0 = !cmp.eq(QL,#1)             // if not round-to-zero
344                 p0 = !cmp.eq(QH,#2)             // and not rounding the other way
345                 if (p0.new) B = PROD            // go to inf
346                 p0 = dfcmp.eq(B,B)              // get exceptions
347         }
348         {
349                 A = insert(B,#63,#0)
350                 jumpr r31
351         }
352
353 #undef ONE
354 #define SIGN r28
355 #undef NORMAL
356 #undef NO_OVF_UNF
357 #define P_INF p1
358 #define P_ZERO p2
359 .Ldiv_abnormal:
360         {
361                 P_TMP = dfclass(A,#DFCLASS_NUMBER)
362                 P_TMP = dfclass(B,#DFCLASS_NUMBER)
363                 Q_POSITIVE = cmp.gt(SIGN,#-1)
364         }
365         {
366                 P_INF = dfclass(A,#DFCLASS_INFINITE)
367                 P_INF = dfclass(B,#DFCLASS_INFINITE)
368         }
369         {
370                 P_ZERO = dfclass(A,#DFCLASS_ZERO)
371                 P_ZERO = dfclass(B,#DFCLASS_ZERO)
372         }
373         {
374                 if (!P_TMP) jump .Ldiv_nan
375                 if (P_INF) jump .Ldiv_invalid
376         }
377         {
378                 if (P_ZERO) jump .Ldiv_invalid
379         }
380         {
381                 P_ZERO = dfclass(A,#DFCLASS_NONZERO)            // nonzero
382                 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)        // non-infinite
383         }
384         {
385                 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
386                 P_INF = dfclass(B,#DFCLASS_NONZERO)     // nonzero
387         }
388         {
389                 if (!P_ZERO) jump .Ldiv_zero_result
390                 if (!P_INF) jump .Ldiv_inf_result
391         }
392         /* Now we've narrowed it down to (de)normal / (de)normal */
393         /* Set up A/EXPA B/EXPB and go back */
394 #undef P_ZERO
395 #undef P_INF
396 #define P_TMP2 p1
397         {
398                 P_TMP = dfclass(A,#DFCLASS_NORMAL)
399                 P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
400                 TMP = ##0x00100000
401         }
402         {
403                 EXPBA = combine(BH,AH)
404                 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
405                 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
406         }
407         {
408                 if (P_TMP) AH = or(AH,TMP)                              // if normal, add back in hidden bit
409                 if (P_TMP2) BH = or(BH,TMP)                             // if normal, add back in hidden bit
410         }
411         {
412                 QH = add(clb(A),#-DF_EXPBITS)
413                 QL = add(clb(B),#-DF_EXPBITS)
414                 TMP = #1
415         }
416         {
417                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
418                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
419         }
420         {
421                 A = asl(A,QH)
422                 B = asl(B,QL)
423                 if (!P_TMP) EXPA = sub(TMP,QH)
424                 if (!P_TMP2) EXPB = sub(TMP,QL)
425         }       // recreate values needed by resume coke
426         {
427                 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
428         }
429         {
430                 SFDEN = or(SFONE,PRODLO)
431                 jump .Ldenorm_continue
432         }
433
434 .Ldiv_zero_result:
435         {
436                 AH = xor(AH,BH)
437                 B = #0
438         }
439         {
440                 A = insert(B,#63,#0)
441                 jumpr r31
442         }
443 .Ldiv_inf_result:
444         {
445                 p2 = dfclass(B,#DFCLASS_ZERO)
446                 p2 = dfclass(A,#DFCLASS_NONINFINITE)
447         }
448         {
449                 TMP = USR
450                 if (!p2) jump 1f
451                 AH = xor(AH,BH)
452         }
453         {
454                 TMP = or(TMP,#0x04)             // DBZ
455         }
456         {
457                 USR = TMP
458         }
459 1:
460         {
461                 B = combine(##0x7ff00000,#0)
462                 p0 = dfcmp.uo(B,B)              // take possible exception
463         }
464         {
465                 A = insert(B,#63,#0)
466                 jumpr r31
467         }
468 .Ldiv_nan:
469         {
470                 p0 = dfclass(A,#0x10)
471                 p1 = dfclass(B,#0x10)
472                 if (!p0.new) A = B
473                 if (!p1.new) B = A
474         }
475         {
476                 QH = convert_df2sf(A)   // get possible invalid exceptions
477                 QL = convert_df2sf(B)
478         }
479         {
480                 A = #-1
481                 jumpr r31
482         }
483
484 .Ldiv_invalid:
485         {
486                 TMP = ##0x7f800001
487         }
488         {
489                 A = convert_sf2df(TMP)          // get invalid, get DF qNaN
490                 jumpr r31
491         }
492 END(__hexagon_divdf3)