test/CodeGen/AMDGPU/branch-relaxation.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s
   2 ; Restrict maximum branch to between +7 and -8 dwords
   3
   4 ; Used to emit an always 4 byte instruction. Inline asm always assumes
   5 ; each instruction is the maximum size.
   6 declare void @llvm.amdgcn.s.sleep(i32) #0
   7
   8 declare i32 @llvm.amdgcn.workitem.id.x() #1
   9
  10
  11 ; GCN-LABEL: {{^}}uniform_conditional_max_short_forward_branch:
  12 ; GCN: s_load_dword [[CND:s[0-9]+]]
  13 ; GCN: s_cmp_eq_u32 [[CND]], 0
  14 ; GCN-NEXT: s_cbranch_scc1 [[BB3:BB[0-9]+_[0-9]+]]
  15
  16
  17 ; GCN-NEXT: ; BB#1: ; %bb2
  18 ; GCN-NEXT: ;;#ASMSTART
  19 ; GCN-NEXT: v_nop_e64
  20 ; GCN-NEXT: v_nop_e64
  21 ; GCN-NEXT: v_nop_e64
  22 ; GCN-NEXT: ;;#ASMEND
  23 ; GCN-NEXT: s_sleep 0
  24
  25 ; GCN-NEXT: [[BB3]]: ; %bb3
  26 ; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
  27 ; GCN: buffer_store_dword [[V_CND]]
  28 ; GCN: s_endpgm
  29 define void @uniform_conditional_max_short_forward_branch(i32 addrspace(1)* %arg, i32 %cnd) #0 {
  30 bb:
  31   %cmp = icmp eq i32 %cnd, 0
  32   br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
  33
  34 bb2:
  35 ; 24 bytes
  36   call void asm sideeffect
  37    "v_nop_e64
  38     v_nop_e64
  39     v_nop_e64", ""() #0
  40   call void @llvm.amdgcn.s.sleep(i32 0)
  41   br label %bb3
  42
  43 bb3:
  44   store volatile i32 %cnd, i32 addrspace(1)* %arg
  45   ret void
  46 }
  47
  48 ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_branch:
  49 ; GCN: s_load_dword [[CND:s[0-9]+]]
  50 ; GCN: s_cmp_eq_u32 [[CND]], 0
  51 ; GCN-NEXT: s_cbranch_scc0 [[LONGBB:BB[0-9]+_[0-9]+]]
  52
  53 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0
  54 ; GCN-NEXT: s_getpc_b64 vcc
  55 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4)
  56 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0
  57 ; GCN-NEXT: s_setpc_b64 vcc
  58
  59 ; GCN-NEXT: [[LONGBB]]:
  60 ; GCN-NEXT: ;;#ASMSTART
  61 ; GCN: v_nop_e64
  62 ; GCN: v_nop_e64
  63 ; GCN: v_nop_e64
  64 ; GCN: v_nop_e64
  65 ; GCN-NEXT: ;;#ASMEND
  66
  67 ; GCN-NEXT: [[ENDBB]]:
  68 ; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
  69 ; GCN: buffer_store_dword [[V_CND]]
  70 ; GCN: s_endpgm
  71 define void @uniform_conditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %cnd) #0 {
  72 bb0:
  73   %cmp = icmp eq i32 %cnd, 0
  74   br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch
  75
  76 bb2:
  77 ; 32 bytes
  78   call void asm sideeffect
  79    "v_nop_e64
  80     v_nop_e64
  81     v_nop_e64
  82     v_nop_e64", ""() #0
  83   br label %bb3
  84
  85 bb3:
  86   store volatile i32 %cnd, i32 addrspace(1)* %arg
  87   ret void
  88 }
  89
  90 ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
  91 ; GCN: s_load_dword [[CND:s[0-9]+]]
  92 ; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
  93 ; GCN-DAG: v_cmp_eq_f32_e64 vcc, [[CND]], 0
  94 ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
  95
  96 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0
  97 ; GCN-NEXT: s_getpc_b64 vcc
  98 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4)
  99 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0
 100 ; GCN-NEXT: s_setpc_b64 vcc
 101
 102 ; GCN-NEXT: [[LONGBB]]:
 103 ; GCN: v_nop_e64
 104 ; GCN: v_nop_e64
 105 ; GCN: v_nop_e64
 106 ; GCN: v_nop_e64
 107
 108 ; GCN: [[ENDBB]]:
 109 ; GCN: buffer_store_dword [[V_CND]]
 110 ; GCN: s_endpgm
 111 define void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
 112 bb0:
 113   %cmp = fcmp oeq float %cnd, 0.0
 114   br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch
 115
 116 bb2:
 117   call void asm sideeffect " ; 32 bytes
 118     v_nop_e64
 119     v_nop_e64
 120     v_nop_e64
 121     v_nop_e64", ""() #0
 122   br label %bb3
 123
 124 bb3:
 125   store volatile float %cnd, float addrspace(1)* %arg
 126   ret void
 127 }
 128
 129 ; GCN-LABEL: {{^}}min_long_forward_vbranch:
 130
 131 ; GCN: buffer_load_dword
 132 ; GCN: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
 133 ; GCN: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
 134 ; GCN: s_xor_b64 [[SAVE]], exec, [[SAVE]]
 135
 136 ; GCN: v_nop_e64
 137 ; GCN: v_nop_e64
 138 ; GCN: v_nop_e64
 139 ; GCN: v_nop_e64
 140
 141 ; GCN: s_or_b64 exec, exec, [[SAVE]]
 142 ; GCN: buffer_store_dword
 143 ; GCN: s_endpgm
 144 define void @min_long_forward_vbranch(i32 addrspace(1)* %arg) #0 {
 145 bb:
 146   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 147   %tid.ext = zext i32 %tid to i64
 148   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tid.ext
 149   %load = load volatile i32, i32 addrspace(1)* %gep
 150   %cmp = icmp eq i32 %load, 0
 151   br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch
 152
 153 bb2:
 154   call void asm sideeffect " ; 32 bytes
 155     v_nop_e64
 156     v_nop_e64
 157     v_nop_e64
 158     v_nop_e64", ""() #0
 159   br label %bb3
 160
 161 bb3:
 162   store volatile i32 %load, i32 addrspace(1)* %gep
 163   ret void
 164 }
 165
 166 ; GCN-LABEL: {{^}}long_backward_sbranch:
 167 ; GCN: s_mov_b32 [[LOOPIDX:s[0-9]+]], 0{{$}}
 168
 169 ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; %bb2
 170 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
 171 ; GCN-NEXT: s_add_i32 [[INC:s[0-9]+]], [[LOOPIDX]], 1
 172 ; GCN-NEXT: s_cmp_lt_i32 [[INC]], 10
 173
 174 ; GCN-NEXT: ;;#ASMSTART
 175 ; GCN-NEXT: v_nop_e64
 176 ; GCN-NEXT: v_nop_e64
 177 ; GCN-NEXT: v_nop_e64
 178 ; GCN-NEXT: ;;#ASMEND
 179
 180 ; GCN-NEXT: s_cbranch_scc0 [[ENDBB:BB[0-9]+_[0-9]+]]
 181
 182 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb2
 183 ; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1
 184 ; GCN-NEXT: s_getpc_b64 vcc
 185 ; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONG_JUMP]]+4)-[[LOOPBB]]
 186 ; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0
 187 ; GCN-NEXT: s_setpc_b64 vcc
 188
 189 ; GCN-NEXT: [[ENDBB]]:
 190 ; GCN-NEXT: s_endpgm
 191 define void @long_backward_sbranch(i32 addrspace(1)* %arg) #0 {
 192 bb:
 193   br label %bb2
 194
 195 bb2:
 196   %loop.idx = phi i32 [ 0, %bb ], [ %inc, %bb2 ]
 197    ; 24 bytes
 198   call void asm sideeffect
 199    "v_nop_e64
 200     v_nop_e64
 201     v_nop_e64", ""() #0
 202   %inc = add nsw i32 %loop.idx, 1 ; add cost 4
 203   %cmp = icmp slt i32 %inc, 10 ; condition cost = 8
 204   br i1 %cmp, label %bb2, label %bb3 ; -
 205
 206 bb3:
 207   ret void
 208 }
 209
 210 ; Requires expansion of unconditional branch from %bb2 to %bb4 (and
 211 ; expansion of conditional branch from %bb to %bb3.
 212
 213 ; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch:
 214 ; GCN: s_cmp_eq_u32
 215 ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]+_[0-9]+]]
 216
 217 ; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0
 218 ; GCN-NEXT: s_getpc_b64 vcc
 219 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4)
 220 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}}
 221 ; GCN-NEXT: s_setpc_b64 vcc
 222
 223 ; GCN-NEXT: [[BB2]]: ; %bb2
 224 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
 225 ; GCN: buffer_store_dword [[BB2_K]]
 226 ; GCN: s_waitcnt vmcnt(0)
 227
 228 ; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2
 229 ; GCN-NEXT: s_getpc_b64 vcc
 230 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4)
 231 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}}
 232 ; GCN-NEXT: s_setpc_b64 vcc
 233
 234 ; GCN: [[BB3]]: ; %bb3
 235 ; GCN: v_nop_e64
 236 ; GCN: v_nop_e64
 237 ; GCN: v_nop_e64
 238 ; GCN: v_nop_e64
 239 ; GCN: ;;#ASMEND
 240
 241 ; GCN-NEXT: [[BB4]]: ; %bb4
 242 ; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63
 243 ; GCN: buffer_store_dword [[BB4_K]]
 244 ; GCN-NEXT: s_endpgm
 245 ; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
 246 define void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
 247 bb0:
 248   %tmp = icmp ne i32 %arg1, 0
 249   br i1 %tmp, label %bb2, label %bb3
 250
 251 bb2:
 252   store volatile i32 17, i32 addrspace(1)* undef
 253   br label %bb4
 254
 255 bb3:
 256   ; 32 byte asm
 257   call void asm sideeffect
 258    "v_nop_e64
 259     v_nop_e64
 260     v_nop_e64
 261     v_nop_e64", ""() #0
 262   br label %bb4
 263
 264 bb4:
 265   store volatile i32 63, i32 addrspace(1)* %arg
 266   ret void
 267 }
 268
 269 ; GCN-LABEL: {{^}}uniform_unconditional_min_long_backward_branch:
 270 ; GCN-NEXT: ; BB#0: ; %entry
 271
 272 ; GCN-NEXT: [[LOOP:BB[0-9]_[0-9]+]]: ; %loop
 273 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
 274 ; GCN-NEXT: ;;#ASMSTART
 275 ; GCN-NEXT: v_nop_e64
 276 ; GCN-NEXT: v_nop_e64
 277 ; GCN-NEXT: v_nop_e64
 278 ; GCN-NEXT: v_nop_e64
 279 ; GCN-NEXT: ;;#ASMEND
 280
 281 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
 282 ; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1
 283 ; GCN-NEXT: s_getpc_b64 vcc
 284 ; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONGBB]]+4)-[[LOOP]]
 285 ; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0{{$}}
 286 ; GCN-NEXT: s_setpc_b64 vcc
 287 ; GCN-NEXT .Lfunc_end{{[0-9]+}}:
 288 define void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
 289 entry:
 290   br label %loop
 291
 292 loop:
 293   ; 32 byte asm
 294   call void asm sideeffect
 295    "v_nop_e64
 296     v_nop_e64
 297     v_nop_e64
 298     v_nop_e64", ""() #0
 299   br label %loop
 300 }
 301
 302 ; Expansion of branch from %bb1 to %bb3 introduces need to expand
 303 ; branch from %bb0 to %bb2
 304
 305 ; GCN-LABEL: {{^}}expand_requires_expand:
 306 ; GCN-NEXT: ; BB#0: ; %bb0
 307 ; GCN: s_load_dword
 308 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 0{{$}}
 309 ; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]]
 310
 311 ; GCN-NEXT: [[LONGBB0:BB[0-9]+_[0-9]+]]: ; %bb0
 312 ; GCN-NEXT: s_getpc_b64 vcc
 313 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4)
 314 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}}
 315 ; GCN-NEXT: s_setpc_b64 vcc
 316
 317 ; GCN-NEXT: [[BB1]]: ; %bb1
 318 ; GCN-NEXT: s_load_dword
 319 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 320 ; GCN-NEXT: s_cmp_eq_u32 s{{[0-9]+}}, 3{{$}}
 321 ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]_[0-9]+]]
 322
 323 ; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]: ; %bb1
 324 ; GCN-NEXT: s_getpc_b64 vcc
 325 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4)
 326 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}}
 327 ; GCN-NEXT: s_setpc_b64 vcc
 328
 329 ; GCN-NEXT: [[BB2]]: ; %bb2
 330 ; GCN-NEXT: ;;#ASMSTART
 331 ; GCN-NEXT: v_nop_e64
 332 ; GCN-NEXT: v_nop_e64
 333 ; GCN-NEXT: v_nop_e64
 334 ; GCN-NEXT: v_nop_e64
 335 ; GCN-NEXT: ;;#ASMEND
 336
 337 ; GCN-NEXT: [[BB3]]: ; %bb3
 338 ; GCN-NEXT: s_endpgm
 339 define void @expand_requires_expand(i32 %cond0) #0 {
 340 bb0:
 341   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
 342   %cmp0 = icmp slt i32 %cond0, 0
 343   br i1 %cmp0, label %bb2, label %bb1
 344
 345 bb1:
 346   %val = load volatile i32, i32 addrspace(2)* undef
 347   %cmp1 = icmp eq i32 %val, 3
 348   br i1 %cmp1, label %bb3, label %bb2
 349
 350 bb2:
 351   call void asm sideeffect
 352    "v_nop_e64
 353     v_nop_e64
 354     v_nop_e64
 355     v_nop_e64", ""() #0
 356   br label %bb3
 357
 358 bb3:
 359   ret void
 360 }
 361
 362 ; Requires expanding of required skip branch.
 363
 364 ; GCN-LABEL: {{^}}uniform_inside_divergent:
 365 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
 366 ; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
 367 ; GCN-NEXT: s_xor_b64  [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
 368 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
 369 ; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]]
 370
 371 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry
 372 ; GCN-NEXT: s_getpc_b64 vcc
 373 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB]]+4)
 374 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}}
 375 ; GCN-NEXT: s_setpc_b64 vcc
 376
 377 ; GCN-NEXT: [[IF]]: ; %if
 378 ; GCN: buffer_store_dword
 379 ; GCN: s_cmp_lg_u32
 380 ; GCN: s_cbranch_scc1 [[ENDIF]]
 381
 382 ; GCN-NEXT: ; BB#2: ; %if_uniform
 383 ; GCN: buffer_store_dword
 384 ; GCN: s_waitcnt vmcnt(0)
 385
 386 ; GCN-NEXT: [[ENDIF]]: ; %endif
 387 ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
 388 ; GCN-NEXT: s_endpgm
 389 define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
 390 entry:
 391   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 392   %d_cmp = icmp ult i32 %tid, 16
 393   br i1 %d_cmp, label %if, label %endif
 394
 395 if:
 396   store i32 0, i32 addrspace(1)* %out
 397   %u_cmp = icmp eq i32 %cond, 0
 398   br i1 %u_cmp, label %if_uniform, label %endif
 399
 400 if_uniform:
 401   store i32 1, i32 addrspace(1)* %out
 402   br label %endif
 403
 404 endif:
 405   ret void
 406 }
 407
 408 ; si_mask_branch
 409 ; s_cbranch_execz
 410 ; s_branch
 411
 412 ; GCN-LABEL: {{^}}analyze_mask_branch:
 413 ; GCN: v_cmp_lt_f32_e32 vcc
 414 ; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
 415 ; GCN-NEXT: s_xor_b64 [[MASK]], exec, [[MASK]]
 416 ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
 417 ; GCN-NEXT: s_cbranch_execz [[BRANCH_SKIP:BB[0-9]+_[0-9]+]]
 418 ; GCN-NEXT: s_branch [[LOOP_BODY:BB[0-9]+_[0-9]+]]
 419
 420 ; GCN-NEXT: [[BRANCH_SKIP]]: ; %entry
 421 ; GCN-NEXT: s_getpc_b64 vcc
 422 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[RET]]-([[BRANCH_SKIP]]+4)
 423 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0
 424 ; GCN-NEXT: s_setpc_b64 vcc
 425
 426 ; GCN-NEXT: [[LOOP_BODY]]: ; %loop_body
 427 ; GCN: s_mov_b64 vcc, -1{{$}}
 428 ; GCN: ;;#ASMSTART
 429 ; GCN: v_nop_e64
 430 ; GCN: v_nop_e64
 431 ; GCN: v_nop_e64
 432 ; GCN: v_nop_e64
 433 ; GCN: v_nop_e64
 434 ; GCN: v_nop_e64
 435 ; GCN: ;;#ASMEND
 436 ; GCN-NEXT: s_cbranch_vccz [[RET]]
 437
 438 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body
 439 ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
 440 ; GCN-NEXT: s_getpc_b64 vcc
 441 ; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONGBB]]+4)-[[LOOP_BODY]]
 442 ; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0
 443 ; GCN-NEXT: s_setpc_b64 vcc
 444
 445 ; GCN-NEXT: [[RET]]: ; %Flow
 446 ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
 447 ; GCN: buffer_store_dword
 448 ; GCN-NEXT: s_endpgm
 449 define void @analyze_mask_branch() #0 {
 450 entry:
 451   %reg = call float asm sideeffect "v_mov_b32_e64 $0, 0", "=v"()
 452   %cmp0 = fcmp ogt float %reg, 0.000000e+00
 453   br i1 %cmp0, label %loop, label %ret
 454
 455 loop:
 456   %phi = phi float [ 0.000000e+00, %loop_body ], [ 1.000000e+00, %entry ]
 457   call void asm sideeffect
 458     "v_nop_e64
 459      v_nop_e64", ""() #0
 460   %cmp1 = fcmp olt float %phi, 8.0
 461   br i1 %cmp1, label %loop_body, label %ret
 462
 463 loop_body:
 464   call void asm sideeffect
 465   "v_nop_e64
 466    v_nop_e64
 467    v_nop_e64
 468    v_nop_e64", ""() #0
 469   br label %loop
 470
 471 ret:
 472   store volatile i32 7, i32 addrspace(1)* undef
 473   ret void
 474 }
 475
 476 ; GCN-LABEL: {{^}}long_branch_hang:
 477 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
 478 ; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
 479 ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
 480
 481 ; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
 482 ; GCN: s_setpc_b64
 483
 484 ; GCN-NEXT: [[LONG_BR_0]]:
 485 ; GCN-DAG: v_cmp_lt_i32
 486 ; GCN-DAG: v_cmp_gt_i32
 487 ; GCN: s_cbranch_vccnz
 488
 489 ; GCN: s_setpc_b64
 490 ; GCN: s_setpc_b64
 491
 492 ; GCN: [[LONG_BR_DEST0]]
 493 ; GCN: v_cmp_ne_u32_e32
 494 ; GCN-NEXT: s_cbranch_vccz
 495 ; GCN: s_setpc_b64
 496
 497 ; GCN: s_endpgm
 498 define amdgpu_kernel void @long_branch_hang(i32 addrspace(1)* nocapture %arg, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i64 %arg5) #0 {
 499 bb:
 500   %tmp = icmp slt i32 %arg2, 9
 501   %tmp6 = icmp eq i32 %arg1, 0
 502   %tmp7 = icmp sgt i32 %arg4, 0
 503   %tmp8 = icmp sgt i32 %arg4, 5
 504   br i1 %tmp8, label %bb9, label %bb13
 505
 506 bb9:                                              ; preds = %bb
 507   %tmp10 = and i1 %tmp7, %tmp
 508   %tmp11 = icmp slt i32 %arg3, %arg4
 509   %tmp12 = or i1 %tmp11, %tmp7
 510   br i1 %tmp12, label %bb19, label %bb14
 511
 512 bb13:                                             ; preds = %bb
 513   br i1 %tmp6, label %bb19, label %bb14
 514
 515 bb14:                                             ; preds = %bb13, %bb9
 516   %tmp15 = icmp slt i32 %arg3, %arg4
 517   %tmp16 = or i1 %tmp15, %tmp
 518   %tmp17 = and i1 %tmp6, %tmp16
 519   %tmp18 = zext i1 %tmp17 to i32
 520   br label %bb19
 521
 522 bb19:                                             ; preds = %bb14, %bb13, %bb9
 523   %tmp20 = phi i32 [ undef, %bb9 ], [ undef, %bb13 ], [ %tmp18, %bb14 ]
 524   %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %arg5
 525   store i32 %tmp20, i32 addrspace(1)* %tmp21, align 4
 526   ret void
 527 }
 528
 529 attributes #0 = { nounwind }
 530 attributes #1 = { nounwind readnone }