test/CodeGen/AMDGPU/ret.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   3
   4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
   5
   6 ; GCN-LABEL: {{^}}vgpr:
   7 ; GCN: v_mov_b32_e32 v1, v0
   8 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
   9 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
  10 ; GCN: s_waitcnt expcnt(0)
  11 ; GCN-NOT: s_endpgm
  12 define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
  13   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
  14   %x = fadd float %3, 1.0
  15   %a = insertvalue {float, float} undef, float %x, 0
  16   %b = insertvalue {float, float} %a, float %3, 1
  17   ret {float, float} %b
  18 }
  19
  20 ; GCN-LABEL: {{^}}vgpr_literal:
  21 ; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0
  22 ; GCN: s_waitcnt expcnt(0)
  23 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
  24 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
  25 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
  26 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
  27 ; GCN-NOT: s_endpgm
  28 define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
  29   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
  30   ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
  31 }
  32
  33
  34 ; GCN: .long 165580
  35 ; GCN-NEXT: .long 562
  36 ; GCN-NEXT: .long 165584
  37 ; GCN-NEXT: .long 562
  38 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
  39 ; GCN-NOT: v_mov_b32_e32 v0
  40 ; GCN-NOT: v_mov_b32_e32 v1
  41 ; GCN-NOT: v_mov_b32_e32 v2
  42 ; GCN: v_mov_b32_e32 v3, v4
  43 ; GCN: v_mov_b32_e32 v4, v6
  44 ; GCN-NOT: s_endpgm
  45 attributes #0 = { "InitialPSInputAddr"="0" }
  46 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
  47   %i0 = extractelement <2 x i32> %4, i32 0
  48   %i1 = extractelement <2 x i32> %4, i32 1
  49   %i2 = extractelement <2 x i32> %7, i32 0
  50   %i3 = extractelement <2 x i32> %8, i32 0
  51   %f0 = bitcast i32 %i0 to float
  52   %f1 = bitcast i32 %i1 to float
  53   %f2 = bitcast i32 %i2 to float
  54   %f3 = bitcast i32 %i3 to float
  55   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
  56   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
  57   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
  58   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
  59   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
  60   ret {float, float, float, float, float} %r4
  61 }
  62
  63
  64 ; GCN: .long 165580
  65 ; GCN-NEXT: .long 1
  66 ; GCN-NEXT: .long 165584
  67 ; GCN-NEXT: .long 1
  68 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
  69 ; GCN: v_mov_b32_e32 v0, 1.0
  70 ; GCN-NOT: s_endpgm
  71 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
  72   ret float 1.0
  73 }
  74
  75
  76 ; GCN: .long 165580
  77 ; GCN-NEXT: .long 2081
  78 ; GCN-NEXT: .long 165584
  79 ; GCN-NEXT: .long 2081
  80 ; GCN-LABEL: {{^}}ps_input_ena_pos_w:
  81 ; GCN-DAG: v_mov_b32_e32 v0, v4
  82 ; GCN-DAG: v_mov_b32_e32 v1, v2
  83 ; GCN: v_mov_b32_e32 v2, v3
  84 ; GCN-NOT: s_endpgm
  85 define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
  86   %f = bitcast <2 x i32> %8 to <2 x float>
  87   %s = insertvalue {float, <2 x float>} undef, float %14, 0
  88   %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
  89   ret {float, <2 x float>} %s1
  90 }
  91
  92
  93 ; GCN: .long 165580
  94 ; GCN-NEXT: .long 562
  95 ; GCN-NEXT: .long 165584
  96 ; GCN-NEXT: .long 563
  97 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
  98 ; GCN-DAG: v_mov_b32_e32 v0, v2
  99 ; GCN-DAG: v_mov_b32_e32 v1, v3
 100 ; GCN: v_mov_b32_e32 v2, v4
 101 ; GCN-DAG: v_mov_b32_e32 v3, v6
 102 ; GCN-DAG: v_mov_b32_e32 v4, v8
 103 ; GCN-NOT: s_endpgm
 104 attributes #1 = { "InitialPSInputAddr"="1" }
 105 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
 106   %i0 = extractelement <2 x i32> %4, i32 0
 107   %i1 = extractelement <2 x i32> %4, i32 1
 108   %i2 = extractelement <2 x i32> %7, i32 0
 109   %i3 = extractelement <2 x i32> %8, i32 0
 110   %f0 = bitcast i32 %i0 to float
 111   %f1 = bitcast i32 %i1 to float
 112   %f2 = bitcast i32 %i2 to float
 113   %f3 = bitcast i32 %i3 to float
 114   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
 115   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
 116   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
 117   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
 118   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
 119   ret {float, float, float, float, float} %r4
 120 }
 121
 122
 123 ; GCN: .long 165580
 124 ; GCN-NEXT: .long 562
 125 ; GCN-NEXT: .long 165584
 126 ; GCN-NEXT: .long 631
 127 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
 128 ; GCN-DAG: v_mov_b32_e32 v0, v2
 129 ; GCN-DAG: v_mov_b32_e32 v1, v3
 130 ; GCN: v_mov_b32_e32 v2, v6
 131 ; GCN: v_mov_b32_e32 v3, v8
 132 ; GCN: v_mov_b32_e32 v4, v12
 133 ; GCN-NOT: s_endpgm
 134 attributes #2 = { "InitialPSInputAddr"="119" }
 135 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
 136   %i0 = extractelement <2 x i32> %4, i32 0
 137   %i1 = extractelement <2 x i32> %4, i32 1
 138   %i2 = extractelement <2 x i32> %7, i32 0
 139   %i3 = extractelement <2 x i32> %8, i32 0
 140   %f0 = bitcast i32 %i0 to float
 141   %f1 = bitcast i32 %i1 to float
 142   %f2 = bitcast i32 %i2 to float
 143   %f3 = bitcast i32 %i3 to float
 144   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
 145   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
 146   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
 147   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
 148   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
 149   ret {float, float, float, float, float} %r4
 150 }
 151
 152
 153 ; GCN: .long 165580
 154 ; GCN-NEXT: .long 562
 155 ; GCN-NEXT: .long 165584
 156 ; GCN-NEXT: .long 946
 157 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
 158 ; GCN-NOT: v_mov_b32_e32 v0
 159 ; GCN-NOT: v_mov_b32_e32 v1
 160 ; GCN-NOT: v_mov_b32_e32 v2
 161 ; GCN: v_mov_b32_e32 v3, v4
 162 ; GCN: v_mov_b32_e32 v4, v8
 163 ; GCN-NOT: s_endpgm
 164 attributes #3 = { "InitialPSInputAddr"="418" }
 165 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
 166   %i0 = extractelement <2 x i32> %4, i32 0
 167   %i1 = extractelement <2 x i32> %4, i32 1
 168   %i2 = extractelement <2 x i32> %7, i32 0
 169   %i3 = extractelement <2 x i32> %8, i32 0
 170   %f0 = bitcast i32 %i0 to float
 171   %f1 = bitcast i32 %i1 to float
 172   %f2 = bitcast i32 %i2 to float
 173   %f3 = bitcast i32 %i3 to float
 174   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
 175   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
 176   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
 177   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
 178   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
 179   ret {float, float, float, float, float} %r4
 180 }
 181
 182
 183 ; GCN-LABEL: {{^}}sgpr:
 184 ; GCN: s_add_i32 s0, s3, 2
 185 ; GCN: s_mov_b32 s2, s3
 186 ; GCN-NOT: s_endpgm
 187 define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
 188   %x = add i32 %2, 2
 189   %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
 190   %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
 191   %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
 192   ret {i32, i32, i32} %c
 193 }
 194
 195
 196 ; GCN-LABEL: {{^}}sgpr_literal:
 197 ; GCN: s_mov_b32 s0, 5
 198 ; GCN-NOT: s_mov_b32 s0, s0
 199 ; GCN-DAG: s_mov_b32 s1, 6
 200 ; GCN-DAG: s_mov_b32 s2, 7
 201 ; GCN-DAG: s_mov_b32 s3, 8
 202 ; GCN-NOT: s_endpgm
 203 define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
 204   %x = add i32 %2, 2
 205   ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
 206 }
 207
 208
 209 ; GCN-LABEL: {{^}}both:
 210 ; GCN: v_mov_b32_e32 v1, v0
 211 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
 212 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
 213 ; GCN-DAG: s_add_i32 s0, s3, 2
 214 ; GCN-DAG: s_mov_b32 s1, s2
 215 ; GCN: s_mov_b32 s2, s3
 216 ; GCN: s_waitcnt expcnt(0)
 217 ; GCN-NOT: s_endpgm
 218 define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
 219   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
 220   %v = fadd float %3, 1.0
 221   %s = add i32 %2, 2
 222   %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
 223   %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
 224   %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
 225   %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
 226   %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
 227   ret {float, i32, float, i32, i32} %a4
 228 }
 229
 230
 231 ; GCN-LABEL: {{^}}structure_literal:
 232 ; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0
 233 ; GCN: s_waitcnt expcnt(0)
 234 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
 235 ; GCN-DAG: s_mov_b32 s0, 2
 236 ; GCN-DAG: s_mov_b32 s1, 3
 237 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
 238 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
 239 define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
 240   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
 241   ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
 242 }