2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
31 // expected-no-diagnostics
42 // no schedule clauses
45 #pragma omp distribute parallel for simd
46 for (int i = 0; i < n; ++i) {
50 // dist_schedule: static no chunk
53 #pragma omp distribute parallel for simd dist_schedule(static)
54 for (int i = 0; i < n; ++i) {
58 // dist_schedule: static chunk
61 #pragma omp distribute parallel for simd dist_schedule(static, ch)
62 for (int i = 0; i < n; ++i) {
66 // schedule: static no chunk
69 #pragma omp distribute parallel for simd schedule(static)
70 for (int i = 0; i < n; ++i) {
74 // schedule: static chunk
77 #pragma omp distribute parallel for simd schedule(static, ch)
78 for (int i = 0; i < n; ++i) {
82 // schedule: dynamic no chunk
85 #pragma omp distribute parallel for simd schedule(dynamic)
86 for (int i = 0; i < n; ++i) {
90 // schedule: dynamic chunk
93 #pragma omp distribute parallel for simd schedule(dynamic, ch)
94 for (int i = 0; i < n; ++i) {
107 // LAMBDA-LABEL: @main
108 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
110 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
112 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
113 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
115 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
116 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
118 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
119 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
121 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
122 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
124 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
125 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
127 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
128 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
130 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
131 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
133 // no schedule clauses
136 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
137 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
139 #pragma omp distribute parallel for simd
140 for (int i = 0; i < n; ++i) {
142 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
143 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
144 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
145 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
146 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
148 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
150 // check EUB for distribute
151 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
152 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
153 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
154 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
155 // LAMBDA-DAG: [[EUB_TRUE]]:
156 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
157 // LAMBDA: br label %[[EUB_END:.+]]
158 // LAMBDA-DAG: [[EUB_FALSE]]:
159 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
160 // LAMBDA: br label %[[EUB_END]]
161 // LAMBDA-DAG: [[EUB_END]]:
162 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
163 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
166 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
167 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
168 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
170 // check exit condition
171 // LAMBDA: [[OMP_JUMP_BACK]]:
172 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
173 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
174 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
175 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
177 // check that PrevLB and PrevUB are passed to the 'for'
178 // LAMBDA: [[DIST_BODY]]:
179 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
180 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
181 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
182 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
183 // check that distlb and distub are properly passed to fork_call
184 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
185 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
186 // LAMBDA: br label %[[DIST_INC:.+]]
188 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
189 // LAMBDA: [[DIST_INC]]:
190 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
191 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
192 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
193 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
194 // LAMBDA: br label %[[OMP_JUMP_BACK]]
196 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
199 // implementation of 'parallel for'
200 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
202 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
203 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
204 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
206 // initialize lb and ub to PrevLB and PrevUB
207 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
209 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
210 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
211 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
212 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
213 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
214 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
215 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
216 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
217 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
219 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
220 // In this case we use EUB
221 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
222 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
223 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
224 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
225 // LAMBDA: [[PF_EUB_TRUE]]:
226 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
227 // LAMBDA: br label %[[PF_EUB_END:.+]]
228 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
229 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
230 // LAMBDA: br label %[[PF_EUB_END]]
231 // LAMBDA-DAG: [[PF_EUB_END]]:
232 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
233 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
236 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
237 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
238 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
240 // check exit condition
241 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
242 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
243 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
244 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
245 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
247 // check that PrevLB and PrevUB are passed to the 'for'
248 // LAMBDA: [[PF_BODY]]:
249 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
250 // LAMBDA: br label {{.+}}
252 // check stride 1 for 'for' in 'distribute parallel for simd'
253 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
254 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
255 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
256 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
258 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
266 // dist_schedule: static no chunk (same sa default - no dist_schedule)
269 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
270 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
272 #pragma omp distribute parallel for simd dist_schedule(static)
273 for (int i = 0; i < n; ++i) {
275 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
276 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
277 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
278 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
279 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
281 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
283 // check EUB for distribute
284 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
285 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
286 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
287 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
288 // LAMBDA-DAG: [[EUB_TRUE]]:
289 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
290 // LAMBDA: br label %[[EUB_END:.+]]
291 // LAMBDA-DAG: [[EUB_FALSE]]:
292 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
293 // LAMBDA: br label %[[EUB_END]]
294 // LAMBDA-DAG: [[EUB_END]]:
295 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
296 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
299 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
300 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
301 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
303 // check exit condition
304 // LAMBDA: [[OMP_JUMP_BACK]]:
305 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
306 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
307 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
308 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
310 // check that PrevLB and PrevUB are passed to the 'for'
311 // LAMBDA: [[DIST_BODY]]:
312 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
313 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
314 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
315 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
316 // check that distlb and distub are properly passed to fork_call
317 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
318 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
319 // LAMBDA: br label %[[DIST_INC:.+]]
321 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
322 // LAMBDA: [[DIST_INC]]:
323 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
324 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
325 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
326 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
327 // LAMBDA: br label %[[OMP_JUMP_BACK]]
329 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
332 // implementation of 'parallel for'
333 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
335 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
336 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
337 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
339 // initialize lb and ub to PrevLB and PrevUB
340 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
342 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
343 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
344 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
345 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
346 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
347 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
348 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
349 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
350 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
352 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
353 // In this case we use EUB
354 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
355 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
356 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
357 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
358 // LAMBDA: [[PF_EUB_TRUE]]:
359 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
360 // LAMBDA: br label %[[PF_EUB_END:.+]]
361 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
362 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
363 // LAMBDA: br label %[[PF_EUB_END]]
364 // LAMBDA-DAG: [[PF_EUB_END]]:
365 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
366 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
369 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
370 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
371 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
373 // check exit condition
374 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
375 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
376 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
377 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
378 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
380 // check that PrevLB and PrevUB are passed to the 'for'
381 // LAMBDA: [[PF_BODY]]:
382 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
383 // LAMBDA: br label {{.+}}
385 // check stride 1 for 'for' in 'distribute parallel for simd'
386 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
387 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
388 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
389 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
391 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
398 // dist_schedule: static chunk
401 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
402 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
404 #pragma omp distribute parallel for simd dist_schedule(static, ch)
405 for (int i = 0; i < n; ++i) {
407 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
408 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
409 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
410 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
411 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
413 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
414 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
415 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
417 // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
418 // check EUB for distribute
419 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
420 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
421 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
422 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
423 // LAMBDA-DAG: [[EUB_TRUE]]:
424 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
425 // LAMBDA: br label %[[EUB_END:.+]]
426 // LAMBDA-DAG: [[EUB_FALSE]]:
427 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
428 // LAMBDA: br label %[[EUB_END]]
429 // LAMBDA-DAG: [[EUB_END]]:
430 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
431 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
434 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
435 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
437 // check exit condition
438 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
439 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
440 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
441 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
443 // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
444 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
446 // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
447 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
448 // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
449 // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
450 // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
452 // check that PrevLB and PrevUB are passed to the 'for'
453 // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
454 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
455 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
456 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
457 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
458 // check that distlb and distub are properly passed to fork_call
459 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
460 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
461 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
464 // LAMBDA: [[DIST_INNER_LOOP_INC]]:
465 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
466 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
467 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
468 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
469 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
471 // LAMBDA: [[DIST_INNER_LOOP_END]]:
472 // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
474 // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
475 // check NextLB and NextUB
476 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
477 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
478 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
479 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
480 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
481 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
482 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
483 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
484 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
487 // LAMBDA: [[DIST_OUTER_LOOP_END]]:
488 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
491 // skip implementation of 'parallel for': using default scheduling and was tested above
497 // schedule: static no chunk
500 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
501 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
503 #pragma omp distribute parallel for simd schedule(static)
504 for (int i = 0; i < n; ++i) {
506 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
507 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
508 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
509 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
510 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
512 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
513 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
514 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
517 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
518 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
520 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
521 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
522 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
524 // initialize lb and ub to PrevLB and PrevUB
525 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
526 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
527 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
528 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
529 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
530 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
531 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
532 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
533 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
534 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
535 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
537 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
538 // In this case we use EUB
539 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
540 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
541 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
542 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
543 // LAMBDA: [[PF_EUB_TRUE]]:
544 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
545 // LAMBDA: br label %[[PF_EUB_END:.+]]
546 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
547 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
548 // LAMBDA: br label %[[PF_EUB_END]]
549 // LAMBDA-DAG: [[PF_EUB_END]]:
550 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
551 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
554 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
555 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
556 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
558 // check exit condition
559 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
560 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
561 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
562 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
563 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
565 // check that PrevLB and PrevUB are passed to the 'for'
566 // LAMBDA: [[PF_BODY]]:
567 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
568 // LAMBDA: br label {{.+}}
570 // check stride 1 for 'for' in 'distribute parallel for simd'
571 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
572 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
573 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
574 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
576 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
584 // schedule: static chunk
587 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
588 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
590 #pragma omp distribute parallel for simd schedule(static, ch)
591 for (int i = 0; i < n; ++i) {
593 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
594 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
595 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
596 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
599 // 'parallel for' implementation using outer and inner loops and PrevEUB
600 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
601 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
602 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
603 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
604 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
606 // initialize lb and ub to PrevLB and PrevUB
607 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
608 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
609 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
610 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
611 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
612 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
613 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
614 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
615 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
616 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
617 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
618 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
620 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
621 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
622 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
623 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
624 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
625 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
626 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
627 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
628 // LAMBDA: [[PF_EUB_TRUE]]:
629 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
630 // LAMBDA: br label %[[PF_EUB_END:.+]]
631 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
632 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
633 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
634 // LAMBDA: br label %[[PF_EUB_END]]
635 // LAMBDA-DAG: [[PF_EUB_END]]:
636 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
637 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
638 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
639 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
640 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
642 // initialize omp.iv (IV = LB)
643 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
644 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
646 // outer loop: while (IV < UB) {
647 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
648 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
649 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
650 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
652 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
653 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
655 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
656 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
657 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
658 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
659 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
661 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
662 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
665 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
667 // IV = IV + 1 and inner loop latch
668 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
669 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
670 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
671 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
672 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
674 // check NextLB and NextUB
675 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
676 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
678 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
679 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
680 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
681 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
682 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
683 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
684 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
685 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
686 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
687 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
689 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
690 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
697 // schedule: dynamic no chunk
700 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
701 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
703 #pragma omp distribute parallel for simd schedule(dynamic)
704 for (int i = 0; i < n; ++i) {
706 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
707 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
708 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
709 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
712 // 'parallel for' implementation using outer and inner loops and PrevEUB
713 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
714 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
715 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
716 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
717 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
719 // initialize lb and ub to PrevLB and PrevUB
720 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
721 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
722 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
723 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
724 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
725 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
726 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
727 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
728 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
729 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
730 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
731 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
732 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
733 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
735 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
736 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
737 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
738 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
740 // initialize omp.iv (IV = LB)
741 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
742 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
743 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
744 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
746 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
747 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
748 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
749 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
750 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
752 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
753 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
756 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
758 // IV = IV + 1 and inner loop latch
759 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
760 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
761 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
762 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
763 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
765 // check NextLB and NextUB
766 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
767 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
769 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
770 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
772 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
779 // schedule: dynamic chunk
782 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
783 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
785 #pragma omp distribute parallel for simd schedule(dynamic, ch)
786 for (int i = 0; i < n; ++i) {
788 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
789 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
790 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
791 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
794 // 'parallel for' implementation using outer and inner loops and PrevEUB
795 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
796 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
797 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
798 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
799 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
801 // initialize lb and ub to PrevLB and PrevUB
802 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
803 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
804 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
805 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
806 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
807 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
808 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
809 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
810 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
811 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
812 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
813 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
814 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
815 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
817 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
818 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
819 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
820 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
822 // initialize omp.iv (IV = LB)
823 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
824 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
825 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
826 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
828 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
829 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
830 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
831 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
832 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
834 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
835 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
838 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
840 // IV = IV + 1 and inner loop latch
841 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
842 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
843 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
844 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
845 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
847 // check NextLB and NextUB
848 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
849 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
851 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
852 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
854 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
863 // CHECK-LABEL: @main
865 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
866 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
868 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
869 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
871 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
872 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
874 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
875 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
877 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
878 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
880 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
881 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
883 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
884 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
886 // CHECK: call{{.+}} [[TMAIN:@.+]]()
888 // no schedule clauses
891 // CHECK: define internal void [[OFFLOADING_FUN_1]](
892 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
894 #pragma omp distribute parallel for simd
895 for (int i = 0; i < n; ++i) {
897 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
898 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
899 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
900 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
901 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
903 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
905 // check EUB for distribute
906 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
907 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
908 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
909 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
910 // CHECK-DAG: [[EUB_TRUE]]:
911 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
912 // CHECK: br label %[[EUB_END:.+]]
913 // CHECK-DAG: [[EUB_FALSE]]:
914 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
915 // CHECK: br label %[[EUB_END]]
916 // CHECK-DAG: [[EUB_END]]:
917 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
918 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
921 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
922 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
923 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
925 // check exit condition
926 // CHECK: [[OMP_JUMP_BACK]]:
927 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
928 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
929 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
930 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
932 // check that PrevLB and PrevUB are passed to the 'for'
933 // CHECK: [[DIST_BODY]]:
934 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
935 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
936 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
937 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
938 // check that distlb and distub are properly passed to fork_call
939 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
940 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
941 // CHECK: br label %[[DIST_INC:.+]]
943 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
944 // CHECK: [[DIST_INC]]:
945 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
946 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
947 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
948 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
949 // CHECK: br label %[[OMP_JUMP_BACK]]
951 // CHECK-DAG: call void @__kmpc_for_static_fini(
954 // implementation of 'parallel for'
955 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
957 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
958 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
959 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
961 // initialize lb and ub to PrevLB and PrevUB
962 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
963 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
964 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
965 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
966 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
967 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
968 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
969 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
970 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
971 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
972 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
974 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
975 // In this case we use EUB
976 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
977 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
978 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
979 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
980 // CHECK: [[PF_EUB_TRUE]]:
981 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
982 // CHECK: br label %[[PF_EUB_END:.+]]
983 // CHECK-DAG: [[PF_EUB_FALSE]]:
984 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
985 // CHECK: br label %[[PF_EUB_END]]
986 // CHECK-DAG: [[PF_EUB_END]]:
987 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
988 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
991 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
992 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
993 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
995 // check exit condition
996 // CHECK: [[OMP_PF_JUMP_BACK]]:
997 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
998 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
999 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1000 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1002 // check that PrevLB and PrevUB are passed to the 'for'
1003 // CHECK: [[PF_BODY]]:
1004 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1005 // CHECK: br label {{.+}}
1007 // check stride 1 for 'for' in 'distribute parallel for simd'
1008 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1009 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1010 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1011 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1013 // CHECK-DAG: call void @__kmpc_for_static_fini(
1017 // dist_schedule: static no chunk
1020 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1021 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1023 #pragma omp distribute parallel for simd dist_schedule(static)
1024 for (int i = 0; i < n; ++i) {
1026 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1027 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1028 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1029 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1030 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1032 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1034 // check EUB for distribute
1035 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1036 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1037 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1038 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1039 // CHECK-DAG: [[EUB_TRUE]]:
1040 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1041 // CHECK: br label %[[EUB_END:.+]]
1042 // CHECK-DAG: [[EUB_FALSE]]:
1043 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1044 // CHECK: br label %[[EUB_END]]
1045 // CHECK-DAG: [[EUB_END]]:
1046 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1047 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1049 // initialize omp.iv
1050 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1051 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1052 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1054 // check exit condition
1055 // CHECK: [[OMP_JUMP_BACK]]:
1056 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1057 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1058 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1059 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1061 // check that PrevLB and PrevUB are passed to the 'for'
1062 // CHECK: [[DIST_BODY]]:
1063 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1064 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1065 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1066 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1067 // check that distlb and distub are properly passed to fork_call
1068 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1069 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1070 // CHECK: br label %[[DIST_INC:.+]]
1072 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1073 // CHECK: [[DIST_INC]]:
1074 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1075 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1076 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1077 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1078 // CHECK: br label %[[OMP_JUMP_BACK]]
1080 // CHECK-DAG: call void @__kmpc_for_static_fini(
1083 // implementation of 'parallel for'
1084 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1086 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1087 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1088 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1090 // initialize lb and ub to PrevLB and PrevUB
1091 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1092 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1093 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1094 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1095 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1096 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1097 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1098 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1099 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1100 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1101 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1103 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1104 // In this case we use EUB
1105 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1106 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1107 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1108 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1109 // CHECK: [[PF_EUB_TRUE]]:
1110 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1111 // CHECK: br label %[[PF_EUB_END:.+]]
1112 // CHECK-DAG: [[PF_EUB_FALSE]]:
1113 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1114 // CHECK: br label %[[PF_EUB_END]]
1115 // CHECK-DAG: [[PF_EUB_END]]:
1116 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1117 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1119 // initialize omp.iv
1120 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1121 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1122 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1124 // check exit condition
1125 // CHECK: [[OMP_PF_JUMP_BACK]]:
1126 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1127 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1128 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1129 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1131 // check that PrevLB and PrevUB are passed to the 'for'
1132 // CHECK: [[PF_BODY]]:
1133 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1134 // CHECK: br label {{.+}}
1136 // check stride 1 for 'for' in 'distribute parallel for simd'
1137 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1138 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1139 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1140 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1142 // CHECK-DAG: call void @__kmpc_for_static_fini(
1146 // dist_schedule: static chunk
1149 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1150 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1152 #pragma omp distribute parallel for simd dist_schedule(static, ch)
1153 for (int i = 0; i < n; ++i) {
1155 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1156 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1157 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1158 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1159 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1161 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1162 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1163 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
1165 // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
1166 // check EUB for distribute
1167 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1168 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1169 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1170 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1171 // CHECK-DAG: [[EUB_TRUE]]:
1172 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1173 // CHECK: br label %[[EUB_END:.+]]
1174 // CHECK-DAG: [[EUB_FALSE]]:
1175 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1176 // CHECK: br label %[[EUB_END]]
1177 // CHECK-DAG: [[EUB_END]]:
1178 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1179 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1181 // initialize omp.iv
1182 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1183 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1185 // check exit condition
1186 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1187 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1188 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1189 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
1191 // CHECK: [[DIST_OUTER_LOOP_BODY]]:
1192 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
1194 // CHECK: [[DIST_INNER_LOOP_HEADER]]:
1195 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
1196 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
1197 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
1198 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1200 // check that PrevLB and PrevUB are passed to the 'for'
1201 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1202 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1203 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1204 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1205 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1206 // check that distlb and distub are properly passed to fork_call
1207 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1208 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1209 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1212 // CHECK: [[DIST_INNER_LOOP_INC]]:
1213 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1214 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1215 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1216 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1217 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
1219 // CHECK: [[DIST_INNER_LOOP_END]]:
1220 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
1222 // CHECK: [[DIST_OUTER_LOOP_INC]]:
1223 // check NextLB and NextUB
1224 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1225 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1226 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1227 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1228 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1229 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1230 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1231 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1232 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
1235 // CHECK: [[DIST_OUTER_LOOP_END]]:
1236 // CHECK-DAG: call void @__kmpc_for_static_fini(
1239 // skip implementation of 'parallel for': using default scheduling and was tested above
1242 // schedule: static no chunk
1245 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1246 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1248 #pragma omp distribute parallel for simd schedule(static)
1249 for (int i = 0; i < n; ++i) {
1251 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1252 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1253 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1254 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1255 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1257 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1258 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1259 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1262 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1263 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1265 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1266 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1267 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1269 // initialize lb and ub to PrevLB and PrevUB
1270 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1271 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1272 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1273 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1274 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1275 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1276 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1277 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1278 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1279 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1280 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1282 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1283 // In this case we use EUB
1284 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1285 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1286 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1287 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1288 // CHECK: [[PF_EUB_TRUE]]:
1289 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1290 // CHECK: br label %[[PF_EUB_END:.+]]
1291 // CHECK-DAG: [[PF_EUB_FALSE]]:
1292 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1293 // CHECK: br label %[[PF_EUB_END]]
1294 // CHECK-DAG: [[PF_EUB_END]]:
1295 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1296 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1298 // initialize omp.iv
1299 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1300 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1301 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1303 // check exit condition
1304 // CHECK: [[OMP_PF_JUMP_BACK]]:
1305 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1306 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1307 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1308 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1310 // check that PrevLB and PrevUB are passed to the 'for'
1311 // CHECK: [[PF_BODY]]:
1312 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1313 // CHECK: br label {{.+}}
1315 // check stride 1 for 'for' in 'distribute parallel for simd'
1316 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1317 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1318 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1319 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1321 // CHECK-DAG: call void @__kmpc_for_static_fini(
1325 // schedule: static chunk
1328 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
1329 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
1331 #pragma omp distribute parallel for simd schedule(static, ch)
1332 for (int i = 0; i < n; ++i) {
1334 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
1335 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1336 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
1337 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1340 // 'parallel for' implementation using outer and inner loops and PrevEUB
1341 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1342 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1343 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1344 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1345 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1347 // initialize lb and ub to PrevLB and PrevUB
1348 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1349 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1350 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1351 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1352 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1353 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1354 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1355 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1356 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1357 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1358 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1359 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1361 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
1362 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1363 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1364 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
1365 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1366 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
1367 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
1368 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1369 // CHECK: [[PF_EUB_TRUE]]:
1370 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1371 // CHECK: br label %[[PF_EUB_END:.+]]
1372 // CHECK-DAG: [[PF_EUB_FALSE]]:
1373 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1374 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
1375 // CHECK: br label %[[PF_EUB_END]]
1376 // CHECK-DAG: [[PF_EUB_END]]:
1377 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
1378 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
1379 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
1380 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
1381 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
1383 // initialize omp.iv (IV = LB)
1384 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1385 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1387 // outer loop: while (IV < UB) {
1388 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1389 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1390 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1391 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1393 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1394 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
1396 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
1397 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1398 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1399 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1400 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1402 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1403 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1406 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1408 // IV = IV + 1 and inner loop latch
1409 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1410 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1411 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1412 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1413 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
1415 // check NextLB and NextUB
1416 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1417 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1419 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1420 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1421 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1422 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
1423 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
1424 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1425 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1426 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
1427 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
1428 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1430 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1431 // CHECK-DAG: call void @__kmpc_for_static_fini(
1435 // schedule: dynamic no chunk
1438 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
1439 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
1441 #pragma omp distribute parallel for simd schedule(dynamic)
1442 for (int i = 0; i < n; ++i) {
1444 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
1445 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1446 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
1447 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1450 // 'parallel for' implementation using outer and inner loops and PrevEUB
1451 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1452 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1453 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1454 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1455 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1457 // initialize lb and ub to PrevLB and PrevUB
1458 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1459 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1460 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1461 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1462 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1463 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1464 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1465 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1466 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1467 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1468 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1469 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1470 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1471 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1473 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1474 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1475 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1476 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1478 // initialize omp.iv (IV = LB)
1479 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1480 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1481 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1482 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1484 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1485 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1486 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1487 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1488 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1490 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1491 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1494 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1496 // IV = IV + 1 and inner loop latch
1497 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1498 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1499 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1500 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1501 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1503 // check NextLB and NextUB
1504 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1505 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1507 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1508 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1510 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1514 // schedule: dynamic chunk
1517 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
1518 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
1520 #pragma omp distribute parallel for simd schedule(dynamic, ch)
1521 for (int i = 0; i < n; ++i) {
1523 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
1524 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1525 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
1526 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1529 // 'parallel for' implementation using outer and inner loops and PrevEUB
1530 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1531 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1532 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1533 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1534 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1536 // initialize lb and ub to PrevLB and PrevUB
1537 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1538 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1539 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1540 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1541 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1542 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1543 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1544 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1545 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1546 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1547 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1548 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1549 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1550 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1552 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1553 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1554 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1555 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1557 // initialize omp.iv (IV = LB)
1558 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1559 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1560 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1561 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1563 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1564 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1565 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1566 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1567 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1569 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1570 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1573 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1575 // IV = IV + 1 and inner loop latch
1576 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1577 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1578 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1579 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1580 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1582 // check NextLB and NextUB
1583 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1584 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1586 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1587 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1589 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1593 return tmain<int>();
1598 // CHECK: define{{.+}} [[TMAIN]]()
1600 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1601 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
1603 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1604 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
1606 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1607 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
1609 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1610 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
1612 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1613 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
1615 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1616 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
1618 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1619 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
1621 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
1622 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
1624 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
1625 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1626 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1627 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1628 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1630 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1632 // check EUB for distribute
1633 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1634 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1635 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1636 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1637 // CHECK-DAG: [[EUB_TRUE]]:
1638 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1639 // CHECK: br label %[[EUB_END:.+]]
1640 // CHECK-DAG: [[EUB_FALSE]]:
1641 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1642 // CHECK: br label %[[EUB_END]]
1643 // CHECK-DAG: [[EUB_END]]:
1644 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1645 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1647 // initialize omp.iv
1648 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1649 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1650 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1652 // check exit condition
1653 // CHECK: [[OMP_JUMP_BACK]]:
1654 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1655 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1656 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1657 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1659 // check that PrevLB and PrevUB are passed to the 'for'
1660 // CHECK: [[DIST_BODY]]:
1661 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1662 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1663 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1664 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1665 // check that distlb and distub are properly passed to fork_call
1666 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1667 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1668 // CHECK: br label %[[DIST_INC:.+]]
1670 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1671 // CHECK: [[DIST_INC]]:
1672 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1673 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1674 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1675 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1676 // CHECK: br label %[[OMP_JUMP_BACK]]
1678 // CHECK-DAG: call void @__kmpc_for_static_fini(
1681 // implementation of 'parallel for'
1682 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1684 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1685 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1686 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1688 // initialize lb and ub to PrevLB and PrevUB
1689 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1690 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1691 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1692 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1693 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1694 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1695 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1696 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1697 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1698 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1699 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1701 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1702 // In this case we use EUB
1703 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1704 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1705 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1706 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1707 // CHECK: [[PF_EUB_TRUE]]:
1708 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1709 // CHECK: br label %[[PF_EUB_END:.+]]
1710 // CHECK-DAG: [[PF_EUB_FALSE]]:
1711 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1712 // CHECK: br label %[[PF_EUB_END]]
1713 // CHECK-DAG: [[PF_EUB_END]]:
1714 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1715 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1717 // initialize omp.iv
1718 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1719 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1720 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1722 // check exit condition
1723 // CHECK: [[OMP_PF_JUMP_BACK]]:
1724 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1725 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1726 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1727 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1729 // check that PrevLB and PrevUB are passed to the 'for'
1730 // CHECK: [[PF_BODY]]:
1731 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1732 // CHECK: br label {{.+}}
1734 // check stride 1 for 'for' in 'distribute parallel for simd'
1735 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1736 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1737 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1738 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1740 // CHECK-DAG: call void @__kmpc_for_static_fini(
1743 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1744 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1746 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1747 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1748 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1749 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1750 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1752 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1754 // check EUB for distribute
1755 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1756 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1757 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1758 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1759 // CHECK-DAG: [[EUB_TRUE]]:
1760 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1761 // CHECK: br label %[[EUB_END:.+]]
1762 // CHECK-DAG: [[EUB_FALSE]]:
1763 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1764 // CHECK: br label %[[EUB_END]]
1765 // CHECK-DAG: [[EUB_END]]:
1766 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1767 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1769 // initialize omp.iv
1770 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1771 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1772 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1774 // check exit condition
1775 // CHECK: [[OMP_JUMP_BACK]]:
1776 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1777 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1778 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1779 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1781 // check that PrevLB and PrevUB are passed to the 'for'
1782 // CHECK: [[DIST_BODY]]:
1783 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1784 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1785 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1786 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1787 // check that distlb and distub are properly passed to fork_call
1788 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1789 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1790 // CHECK: br label %[[DIST_INC:.+]]
1792 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1793 // CHECK: [[DIST_INC]]:
1794 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1795 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1796 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1797 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1798 // CHECK: br label %[[OMP_JUMP_BACK]]
1800 // CHECK-DAG: call void @__kmpc_for_static_fini(
1803 // implementation of 'parallel for'
1804 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1806 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1807 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1808 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1810 // initialize lb and ub to PrevLB and PrevUB
1811 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1812 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1813 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1814 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1815 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1816 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1817 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1818 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1819 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1820 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1821 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1823 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1824 // In this case we use EUB
1825 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1826 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1827 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1828 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1829 // CHECK: [[PF_EUB_TRUE]]:
1830 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1831 // CHECK: br label %[[PF_EUB_END:.+]]
1832 // CHECK-DAG: [[PF_EUB_FALSE]]:
1833 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1834 // CHECK: br label %[[PF_EUB_END]]
1835 // CHECK-DAG: [[PF_EUB_END]]:
1836 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1837 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1839 // initialize omp.iv
1840 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1841 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1842 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1844 // check exit condition
1845 // CHECK: [[OMP_PF_JUMP_BACK]]:
1846 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1847 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1848 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1849 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1851 // check that PrevLB and PrevUB are passed to the 'for'
1852 // CHECK: [[PF_BODY]]:
1853 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1854 // CHECK: br label {{.+}}
1856 // check stride 1 for 'for' in 'distribute parallel for simd'
1857 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1858 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1859 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1860 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1862 // CHECK-DAG: call void @__kmpc_for_static_fini(
1865 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1866 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1868 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1869 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1870 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1871 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1872 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1874 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1875 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1876 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
1878 // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
1879 // check EUB for distribute
1880 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1881 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1882 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1883 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1884 // CHECK-DAG: [[EUB_TRUE]]:
1885 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1886 // CHECK: br label %[[EUB_END:.+]]
1887 // CHECK-DAG: [[EUB_FALSE]]:
1888 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1889 // CHECK: br label %[[EUB_END]]
1890 // CHECK-DAG: [[EUB_END]]:
1891 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1892 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1894 // initialize omp.iv
1895 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1896 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1898 // check exit condition
1899 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1900 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1901 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1902 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
1904 // CHECK: [[DIST_OUTER_LOOP_BODY]]:
1905 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
1907 // CHECK: [[DIST_INNER_LOOP_HEADER]]:
1908 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
1909 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
1910 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
1911 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1913 // check that PrevLB and PrevUB are passed to the 'for'
1914 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1915 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1916 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1917 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1918 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1919 // check that distlb and distub are properly passed to fork_call
1920 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1921 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1922 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1925 // CHECK: [[DIST_INNER_LOOP_INC]]:
1926 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1927 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1928 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1929 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1930 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
1932 // CHECK: [[DIST_INNER_LOOP_END]]:
1933 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
1935 // CHECK: [[DIST_OUTER_LOOP_INC]]:
1936 // check NextLB and NextUB
1937 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1938 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1939 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1940 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1941 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1942 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1943 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1944 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1945 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
1948 // CHECK: [[DIST_OUTER_LOOP_END]]:
1949 // CHECK-DAG: call void @__kmpc_for_static_fini(
1952 // skip implementation of 'parallel for': using default scheduling and was tested above
1954 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1955 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1957 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1958 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1959 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1960 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1961 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1963 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1964 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1965 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1968 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1969 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1971 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1972 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1973 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1975 // initialize lb and ub to PrevLB and PrevUB
1976 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1977 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1978 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1979 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1980 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1981 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1982 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1983 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1984 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1985 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1986 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1988 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1989 // In this case we use EUB
1990 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1991 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1992 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1993 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1994 // CHECK: [[PF_EUB_TRUE]]:
1995 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1996 // CHECK: br label %[[PF_EUB_END:.+]]
1997 // CHECK-DAG: [[PF_EUB_FALSE]]:
1998 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1999 // CHECK: br label %[[PF_EUB_END]]
2000 // CHECK-DAG: [[PF_EUB_END]]:
2001 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
2002 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
2004 // initialize omp.iv
2005 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2006 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2007 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
2009 // check exit condition
2010 // CHECK: [[OMP_PF_JUMP_BACK]]:
2011 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
2012 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
2013 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2014 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
2016 // check that PrevLB and PrevUB are passed to the 'for'
2017 // CHECK: [[PF_BODY]]:
2018 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2019 // CHECK: br label {{.+}}
2021 // check stride 1 for 'for' in 'distribute parallel for simd'
2022 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
2023 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
2024 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
2025 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
2027 // CHECK-DAG: call void @__kmpc_for_static_fini(
2030 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
2031 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
2033 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
2034 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2035 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
2036 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2039 // 'parallel for' implementation using outer and inner loops and PrevEUB
2040 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2041 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2042 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2043 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2044 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2046 // initialize lb and ub to PrevLB and PrevUB
2047 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2048 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2049 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2050 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2051 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2052 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2053 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2054 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2055 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2056 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2057 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2058 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2060 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
2061 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2062 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2063 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
2064 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2065 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
2066 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
2067 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2068 // CHECK: [[PF_EUB_TRUE]]:
2069 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2070 // CHECK: br label %[[PF_EUB_END:.+]]
2071 // CHECK-DAG: [[PF_EUB_FALSE]]:
2072 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2073 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
2074 // CHECK: br label %[[PF_EUB_END]]
2075 // CHECK-DAG: [[PF_EUB_END]]:
2076 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
2077 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
2078 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
2079 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
2080 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
2082 // initialize omp.iv (IV = LB)
2083 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2084 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2086 // outer loop: while (IV < UB) {
2087 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2088 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2089 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2090 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2092 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2093 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
2095 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
2096 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2097 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2098 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2099 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2101 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2102 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2105 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2107 // IV = IV + 1 and inner loop latch
2108 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2109 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2110 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2111 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2112 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
2114 // check NextLB and NextUB
2115 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2116 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2118 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2119 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2120 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2121 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
2122 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
2123 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2124 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2125 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
2126 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
2127 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2129 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2130 // CHECK-DAG: call void @__kmpc_for_static_fini(
2133 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
2134 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
2136 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
2137 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2138 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
2139 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2142 // 'parallel for' implementation using outer and inner loops and PrevEUB
2143 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2144 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2145 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2146 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2147 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2149 // initialize lb and ub to PrevLB and PrevUB
2150 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2151 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2152 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2153 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2154 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2155 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2156 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2157 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2158 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2159 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2160 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2161 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2162 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2163 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2165 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2166 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2167 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2168 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2170 // initialize omp.iv (IV = LB)
2171 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2172 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2173 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2174 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2176 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2177 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2178 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2179 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2180 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2182 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2183 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2186 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2188 // IV = IV + 1 and inner loop latch
2189 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2190 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2191 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2192 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2193 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2195 // check NextLB and NextUB
2196 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2197 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2199 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2200 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2202 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2205 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
2206 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
2208 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
2209 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2210 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
2211 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2214 // 'parallel for' implementation using outer and inner loops and PrevEUB
2215 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2216 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2217 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2218 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2219 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2221 // initialize lb and ub to PrevLB and PrevUB
2222 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2223 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2224 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2225 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2226 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2227 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2228 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2229 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2230 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2231 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2232 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2233 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2234 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2235 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2237 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2238 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2239 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2240 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2242 // initialize omp.iv (IV = LB)
2243 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2244 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2245 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2246 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2248 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2249 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2250 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2251 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2252 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2254 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2255 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2258 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2260 // IV = IV + 1 and inner loop latch
2261 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2262 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2263 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2264 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2265 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2267 // check NextLB and NextUB
2268 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2269 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2271 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2272 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2274 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2277 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}