test/CodeGen/AMDGPU/fdiv.f16.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
   3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
   4
   5 ; Make sure fdiv is promoted to f32.
   6
   7 ; GCN-LABEL: {{^}}v_fdiv_f16
   8 ; SI:     v_cvt_f32_f16
   9 ; SI:     v_cvt_f32_f16
  10 ; SI:     v_div_scale_f32
  11 ; SI-DAG: v_div_scale_f32
  12 ; SI-DAG: v_rcp_f32
  13 ; SI:     v_fma_f32
  14 ; SI:     v_fma_f32
  15 ; SI:     v_mul_f32
  16 ; SI:     v_fma_f32
  17 ; SI:     v_fma_f32
  18 ; SI:     v_fma_f32
  19 ; SI:     v_div_fmas_f32
  20 ; SI:     v_div_fixup_f32
  21 ; SI:     v_cvt_f16_f32
  22
  23 ; VI: flat_load_ushort [[LHS:v[0-9]+]]
  24 ; VI: flat_load_ushort [[RHS:v[0-9]+]]
  25
  26 ; VI-DAG: v_cvt_f32_f16_e32 [[CVT_LHS:v[0-9]+]], [[LHS]]
  27 ; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]]
  28
  29 ; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]]
  30 ; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]]
  31 ; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]]
  32 ; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]]
  33 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
  34 define void @v_fdiv_f16(
  35     half addrspace(1)* %r,
  36     half addrspace(1)* %a,
  37     half addrspace(1)* %b) #0 {
  38 entry:
  39   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  40   %tid.ext = sext i32 %tid to i64
  41   %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
  42   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
  43   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
  44   %a.val = load volatile half, half addrspace(1)* %gep.a
  45   %b.val = load volatile half, half addrspace(1)* %gep.b
  46   %r.val = fdiv half %a.val, %b.val
  47   store half %r.val, half addrspace(1)* %gep.r
  48   ret void
  49 }
  50
  51 ; GCN-LABEL: {{^}}v_rcp_f16:
  52 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
  53 ; VI-NOT: [[VAL]]
  54 ; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
  55 ; VI-NOT: [[RESULT]]
  56 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
  57 define void @v_rcp_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
  58 entry:
  59   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  60   %tid.ext = sext i32 %tid to i64
  61   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
  62   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
  63   %b.val = load volatile half, half addrspace(1)* %gep.b
  64   %r.val = fdiv half 1.0, %b.val
  65   store half %r.val, half addrspace(1)* %gep.r
  66   ret void
  67 }
  68
  69 ; GCN-LABEL: {{^}}v_rcp_f16_abs:
  70 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
  71 ; VI-NOT: [[VAL]]
  72 ; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], |[[VAL]]|
  73 ; VI-NOT: [RESULT]]
  74 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
  75 define void @v_rcp_f16_abs(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
  76 entry:
  77   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  78   %tid.ext = sext i32 %tid to i64
  79   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
  80   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
  81   %b.val = load volatile half, half addrspace(1)* %gep.b
  82   %b.abs = call half @llvm.fabs.f16(half %b.val)
  83   %r.val = fdiv half 1.0, %b.abs
  84   store half %r.val, half addrspace(1)* %gep.r
  85   ret void
  86 }
  87
  88 ; GCN-LABEL: {{^}}v_rcp_f16_arcp:
  89 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
  90 ; VI-NOT: [[VAL]]
  91 ; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
  92 ; VI-NOT: [[RESULT]]
  93 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
  94 define void @v_rcp_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
  95 entry:
  96   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  97   %tid.ext = sext i32 %tid to i64
  98   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
  99   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
 100   %b.val = load volatile half, half addrspace(1)* %gep.b
 101   %r.val = fdiv arcp half 1.0, %b.val
 102   store half %r.val, half addrspace(1)* %gep.r
 103   ret void
 104 }
 105
 106 ; GCN-LABEL: {{^}}v_rcp_f16_neg:
 107 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
 108 ; VI-NOT: [[VAL]]
 109 ; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[VAL]]
 110 ; VI-NOT: [RESULT]]
 111 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 112 define void @v_rcp_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
 113 entry:
 114   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 115   %tid.ext = sext i32 %tid to i64
 116   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
 117   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
 118   %b.val = load volatile half, half addrspace(1)* %gep.b
 119   %r.val = fdiv half -1.0, %b.val
 120   store half %r.val, half addrspace(1)* %gep.r
 121   ret void
 122 }
 123
 124 ; GCN-LABEL: {{^}}v_rsq_f16:
 125 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
 126 ; VI-NOT: [[VAL]]
 127 ; VI: v_rsq_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 128 ; VI-NOT: [RESULT]]
 129 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 130 define void @v_rsq_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
 131 entry:
 132   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 133   %tid.ext = sext i32 %tid to i64
 134   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
 135   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
 136   %b.val = load volatile half, half addrspace(1)* %gep.b
 137   %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
 138   %r.val = fdiv half 1.0, %b.sqrt
 139   store half %r.val, half addrspace(1)* %gep.r
 140   ret void
 141 }
 142
 143 ; GCN-LABEL: {{^}}v_rsq_f16_neg:
 144 ; VI: flat_load_ushort [[VAL:v[0-9]+]]
 145 ; VI-NOT: [[VAL]]
 146 ; VI: v_sqrt_f16_e32 [[SQRT:v[0-9]+]], [[VAL]]
 147 ; VI-NEXT: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[SQRT]]
 148 ; VI-NOT: [RESULT]]
 149 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 150 define void @v_rsq_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
 151 entry:
 152   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 153   %tid.ext = sext i32 %tid to i64
 154   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
 155   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
 156   %b.val = load volatile half, half addrspace(1)* %gep.b
 157   %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
 158   %r.val = fdiv half -1.0, %b.sqrt
 159   store half %r.val, half addrspace(1)* %gep.r
 160   ret void
 161 }
 162
 163 ; GCN-LABEL: {{^}}v_fdiv_f16_arcp:
 164 ; VI: flat_load_ushort [[LHS:v[0-9]+]]
 165 ; VI: flat_load_ushort [[RHS:v[0-9]+]]
 166
 167 ; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
 168 ; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
 169
 170 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 171 define void @v_fdiv_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #0 {
 172 entry:
 173   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 174   %tid.ext = sext i32 %tid to i64
 175   %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
 176   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
 177   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
 178   %a.val = load volatile half, half addrspace(1)* %gep.a
 179   %b.val = load volatile half, half addrspace(1)* %gep.b
 180   %r.val = fdiv arcp half %a.val, %b.val
 181   store half %r.val, half addrspace(1)* %gep.r
 182   ret void
 183 }
 184
 185 ; GCN-LABEL: {{^}}v_fdiv_f16_unsafe:
 186 ; VI: flat_load_ushort [[LHS:v[0-9]+]]
 187 ; VI: flat_load_ushort [[RHS:v[0-9]+]]
 188
 189 ; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
 190 ; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
 191
 192 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 193 define void @v_fdiv_f16_unsafe(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #2 {
 194 entry:
 195   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 196   %tid.ext = sext i32 %tid to i64
 197   %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
 198   %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
 199   %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
 200   %a.val = load volatile half, half addrspace(1)* %gep.a
 201   %b.val = load volatile half, half addrspace(1)* %gep.b
 202   %r.val = fdiv half %a.val, %b.val
 203   store half %r.val, half addrspace(1)* %gep.r
 204   ret void
 205 }
 206
 207 declare i32 @llvm.amdgcn.workitem.id.x() #1
 208 declare half @llvm.sqrt.f16(half) #1
 209 declare half @llvm.fabs.f16(half) #1
 210
 211 attributes #0 = { nounwind }
 212 attributes #1 = { nounwind readnone }
 213 attributes #2 = { nounwind "unsafe-fp-math"="true" }