]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/compiler-rt/lib/builtins/i386/floatundisf.S
Merge ^/vendor/clang/dist up to its last change, and resolve conflicts.
[FreeBSD/FreeBSD.git] / contrib / llvm-project / compiler-rt / lib / builtins / i386 / floatundisf.S
1 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2 // See https://llvm.org/LICENSE.txt for license information.
3 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
5 #include "../assembly.h"
6
7 // float __floatundisf(du_int a);
8
9 // Note that there is a hardware instruction, fildll, that does most of what
10 // this function needs to do.  However, because of our ia32 ABI, it will take
11 // a write-small read-large stall, so the software implementation here is
12 // actually several cycles faster.
13
14 // This is a branch-free implementation.  A branchy implementation might be
15 // faster for the common case if you know something a priori about the input
16 // distribution.
17
18 /* branch-free x87 implementation - one cycle slower than without x87.
19
20 #ifdef __i386__
21
22 CONST_SECTION
23 .balign 3
24
25                 .quad   0x43f0000000000000
26 twop64: .quad   0x0000000000000000
27
28 #define                 TWOp64                  twop64-0b(%ecx,%eax,8)
29
30 .text
31 .balign 4
32 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
33         movl            8(%esp),                %eax
34         movd            8(%esp),                %xmm1
35         movd            4(%esp),                %xmm0
36         punpckldq       %xmm1,                  %xmm0
37         calll           0f
38 0:      popl            %ecx
39         sarl            $31,                    %eax
40         movq            %xmm0,                  4(%esp)
41         fildll          4(%esp)
42         faddl           TWOp64
43         fstps           4(%esp)
44         flds            4(%esp)
45         ret
46 END_COMPILERRT_FUNCTION(__floatundisf)
47
48 #endif // __i386__
49
50 */
51
52 // branch-free, x87-free implementation - faster at the expense of code size
53
54 #ifdef __i386__
55
56 CONST_SECTION
57
58         .balign 16
59 twop52:
60         .quad 0x4330000000000000
61         .quad 0x0000000000000fff
62
63         .balign 16
64 sticky:
65         .quad 0x0000000000000000
66         .long 0x00000012
67
68         .balign 16
69 twelve:
70         .long 0x00000000
71
72 #define                 TWOp52                  twop52-0b(%ecx)
73 #define                 STICKY                  sticky-0b(%ecx,%eax,8)
74
75 .text
76 .balign 4
77 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
78         movl            8(%esp),                %eax
79         movd            8(%esp),                %xmm1
80         movd            4(%esp),                %xmm0
81         punpckldq       %xmm1,                  %xmm0
82
83         calll           0f
84 0:      popl            %ecx
85         shrl            %eax                                    // high 31 bits of input as sint32
86         addl            $0x7ff80000,    %eax
87         sarl            $31,                    %eax    // (big input) ? -1 : 0
88         movsd           STICKY,                 %xmm1   // (big input) ? 0xfff : 0
89         movl            $12,                    %edx
90         andl            %eax,                   %edx    // (big input) ? 12 : 0
91         movd            %edx,                   %xmm3
92         andpd           %xmm0,                  %xmm1   // (big input) ? input & 0xfff : 0
93         movsd           TWOp52,                 %xmm2   // 0x1.0p52
94         psrlq           %xmm3,                  %xmm0   // (big input) ? input >> 12 : input
95         orpd            %xmm2,                  %xmm1   // 0x1.0p52 + ((big input) ? input & 0xfff : input)
96         orpd            %xmm1,                  %xmm0   // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
97         subsd           %xmm2,                  %xmm0   // (double)((big input) ? (input >> 12 | input & 0xfff) : input)
98         cvtsd2ss        %xmm0,                  %xmm0   // (float)((big input) ? (input >> 12 | input & 0xfff) : input)
99         pslld           $23,                    %xmm3
100         paddd           %xmm3,                  %xmm0   // (float)input
101         movd            %xmm0,                  4(%esp)
102         flds            4(%esp)
103         ret
104 END_COMPILERRT_FUNCTION(__floatundisf)
105
106 #endif // __i386__
107
108 NO_EXEC_STACK_DIRECTIVE
109