1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
5 ; Make sure fdiv is promoted to f32.
7 ; GCN-LABEL: {{^}}v_fdiv_f16
11 ; SI-DAG: v_div_scale_f32
23 ; VI: flat_load_ushort [[LHS:v[0-9]+]]
24 ; VI: flat_load_ushort [[RHS:v[0-9]+]]
26 ; VI-DAG: v_cvt_f32_f16_e32 [[CVT_LHS:v[0-9]+]], [[LHS]]
27 ; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]]
29 ; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]]
30 ; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]]
31 ; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]]
32 ; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]]
33 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
34 define void @v_fdiv_f16(
35 half addrspace(1)* %r,
36 half addrspace(1)* %a,
37 half addrspace(1)* %b) #0 {
39 %tid = call i32 @llvm.amdgcn.workitem.id.x()
40 %tid.ext = sext i32 %tid to i64
41 %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
42 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
43 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
44 %a.val = load volatile half, half addrspace(1)* %gep.a
45 %b.val = load volatile half, half addrspace(1)* %gep.b
46 %r.val = fdiv half %a.val, %b.val
47 store half %r.val, half addrspace(1)* %gep.r
51 ; GCN-LABEL: {{^}}v_rcp_f16:
52 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
54 ; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
56 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
57 define void @v_rcp_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
59 %tid = call i32 @llvm.amdgcn.workitem.id.x()
60 %tid.ext = sext i32 %tid to i64
61 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
62 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
63 %b.val = load volatile half, half addrspace(1)* %gep.b
64 %r.val = fdiv half 1.0, %b.val
65 store half %r.val, half addrspace(1)* %gep.r
69 ; GCN-LABEL: {{^}}v_rcp_f16_abs:
70 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
72 ; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], |[[VAL]]|
74 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
75 define void @v_rcp_f16_abs(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
77 %tid = call i32 @llvm.amdgcn.workitem.id.x()
78 %tid.ext = sext i32 %tid to i64
79 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
80 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
81 %b.val = load volatile half, half addrspace(1)* %gep.b
82 %b.abs = call half @llvm.fabs.f16(half %b.val)
83 %r.val = fdiv half 1.0, %b.abs
84 store half %r.val, half addrspace(1)* %gep.r
88 ; GCN-LABEL: {{^}}v_rcp_f16_arcp:
89 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
91 ; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
93 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
94 define void @v_rcp_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
96 %tid = call i32 @llvm.amdgcn.workitem.id.x()
97 %tid.ext = sext i32 %tid to i64
98 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
99 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
100 %b.val = load volatile half, half addrspace(1)* %gep.b
101 %r.val = fdiv arcp half 1.0, %b.val
102 store half %r.val, half addrspace(1)* %gep.r
106 ; GCN-LABEL: {{^}}v_rcp_f16_neg:
107 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
109 ; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[VAL]]
111 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
112 define void @v_rcp_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
114 %tid = call i32 @llvm.amdgcn.workitem.id.x()
115 %tid.ext = sext i32 %tid to i64
116 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
117 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
118 %b.val = load volatile half, half addrspace(1)* %gep.b
119 %r.val = fdiv half -1.0, %b.val
120 store half %r.val, half addrspace(1)* %gep.r
124 ; GCN-LABEL: {{^}}v_rsq_f16:
125 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
127 ; VI: v_rsq_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
129 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
130 define void @v_rsq_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
132 %tid = call i32 @llvm.amdgcn.workitem.id.x()
133 %tid.ext = sext i32 %tid to i64
134 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
135 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
136 %b.val = load volatile half, half addrspace(1)* %gep.b
137 %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
138 %r.val = fdiv half 1.0, %b.sqrt
139 store half %r.val, half addrspace(1)* %gep.r
143 ; GCN-LABEL: {{^}}v_rsq_f16_neg:
144 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
146 ; VI: v_sqrt_f16_e32 [[SQRT:v[0-9]+]], [[VAL]]
147 ; VI-NEXT: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[SQRT]]
149 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
150 define void @v_rsq_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
152 %tid = call i32 @llvm.amdgcn.workitem.id.x()
153 %tid.ext = sext i32 %tid to i64
154 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
155 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
156 %b.val = load volatile half, half addrspace(1)* %gep.b
157 %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
158 %r.val = fdiv half -1.0, %b.sqrt
159 store half %r.val, half addrspace(1)* %gep.r
163 ; GCN-LABEL: {{^}}v_fdiv_f16_arcp:
164 ; VI: flat_load_ushort [[LHS:v[0-9]+]]
165 ; VI: flat_load_ushort [[RHS:v[0-9]+]]
167 ; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
168 ; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
170 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
171 define void @v_fdiv_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #0 {
173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
174 %tid.ext = sext i32 %tid to i64
175 %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
176 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
177 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
178 %a.val = load volatile half, half addrspace(1)* %gep.a
179 %b.val = load volatile half, half addrspace(1)* %gep.b
180 %r.val = fdiv arcp half %a.val, %b.val
181 store half %r.val, half addrspace(1)* %gep.r
185 ; GCN-LABEL: {{^}}v_fdiv_f16_unsafe:
186 ; VI: flat_load_ushort [[LHS:v[0-9]+]]
187 ; VI: flat_load_ushort [[RHS:v[0-9]+]]
189 ; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
190 ; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
192 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
193 define void @v_fdiv_f16_unsafe(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #2 {
195 %tid = call i32 @llvm.amdgcn.workitem.id.x()
196 %tid.ext = sext i32 %tid to i64
197 %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
198 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
199 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
200 %a.val = load volatile half, half addrspace(1)* %gep.a
201 %b.val = load volatile half, half addrspace(1)* %gep.b
202 %r.val = fdiv half %a.val, %b.val
203 store half %r.val, half addrspace(1)* %gep.r
207 declare i32 @llvm.amdgcn.workitem.id.x() #1
208 declare half @llvm.sqrt.f16(half) #1
209 declare half @llvm.fabs.f16(half) #1
211 attributes #0 = { nounwind }
212 attributes #1 = { nounwind readnone }
213 attributes #2 = { nounwind "unsafe-fp-math"="true" }