]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - test/OpenMP/distribute_parallel_for_simd_codegen.cpp
Vendor import of clang trunk r338150:
[FreeBSD/FreeBSD.git] / test / OpenMP / distribute_parallel_for_simd_codegen.cpp
1 // Test host code gen
2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
8
9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
16
17 // RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
18 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
19 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
20 // RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
21 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
22 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
23
24 // RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
25 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
26 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
27 // RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
28 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
29 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
31 // expected-no-diagnostics
32 #ifndef HEADER
33 #define HEADER
34
35
36 template <typename T>
37 T tmain() {
38   T *a, *b, *c;
39   int n = 10000;
40   int ch = 100;
41
42   // no schedule clauses
43   #pragma omp target
44   #pragma omp teams
45   #pragma omp distribute parallel for simd
46   for (int i = 0; i < n; ++i) {
47     a[i] = b[i] + c[i];
48   }
49
50   // dist_schedule: static no chunk
51   #pragma omp target
52   #pragma omp teams
53   #pragma omp distribute parallel for simd dist_schedule(static)
54   for (int i = 0; i < n; ++i) {
55     a[i] = b[i] + c[i];
56   }
57
58   // dist_schedule: static chunk
59   #pragma omp target
60   #pragma omp teams
61   #pragma omp distribute parallel for simd dist_schedule(static, ch)
62   for (int i = 0; i < n; ++i) {
63     a[i] = b[i] + c[i];
64   }
65
66   // schedule: static no chunk
67   #pragma omp target
68   #pragma omp teams
69   #pragma omp distribute parallel for simd schedule(static)
70   for (int i = 0; i < n; ++i) {
71     a[i] = b[i] + c[i];
72   }
73
74   // schedule: static chunk
75   #pragma omp target
76   #pragma omp teams
77   #pragma omp distribute parallel for simd schedule(static, ch)
78   for (int i = 0; i < n; ++i) {
79     a[i] = b[i] + c[i];
80   }
81
82   // schedule: dynamic no chunk
83   #pragma omp target
84   #pragma omp teams
85   #pragma omp distribute parallel for simd schedule(dynamic)
86   for (int i = 0; i < n; ++i) {
87     a[i] = b[i] + c[i];
88   }
89
90   // schedule: dynamic chunk
91   #pragma omp target
92   #pragma omp teams
93   #pragma omp distribute parallel for simd schedule(dynamic, ch)
94   for (int i = 0; i < n; ++i) {
95     a[i] = b[i] + c[i];
96   }
97
98   return T();
99 }
100
101 int main() {
102   double *a, *b, *c;
103   int n = 10000;
104   int ch = 100;
105
106 #ifdef LAMBDA
107   // LAMBDA-LABEL: @main
108   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
109   [&]() {
110     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
111
112     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
113     // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
114
115     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
116     // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
117
118     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
119     // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
120
121     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
122     // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
123
124     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
125     // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
126
127     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
128     // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
129
130     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
131     // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
132
133     // no schedule clauses
134     #pragma omp target
135     #pragma omp teams
136     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
137     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
138
139     #pragma omp distribute parallel for simd
140     for (int i = 0; i < n; ++i) {
141       a[i] = b[i] + c[i];
142       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
143       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
144       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
145       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
146       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
147
148       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
149
150       // check EUB for distribute
151       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
152       // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
153       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
154       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
155       // LAMBDA-DAG: [[EUB_TRUE]]:
156       // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
157       // LAMBDA: br label %[[EUB_END:.+]]
158       // LAMBDA-DAG: [[EUB_FALSE]]:
159       // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
160       // LAMBDA: br label %[[EUB_END]]
161       // LAMBDA-DAG: [[EUB_END]]:
162       // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
163       // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
164
165       // initialize omp.iv
166       // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
167       // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
168       // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
169
170       // check exit condition
171       // LAMBDA: [[OMP_JUMP_BACK]]:
172       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
173       // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
174       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
175       // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
176
177       // check that PrevLB and PrevUB are passed to the 'for'
178       // LAMBDA: [[DIST_BODY]]:
179       // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
180       // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
181       // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
182       // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
183       // check that distlb and distub are properly passed to fork_call
184       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
185       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
186       // LAMBDA: br label %[[DIST_INC:.+]]
187
188       // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
189       // LAMBDA: [[DIST_INC]]:
190       // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
191       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
192       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
193       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
194       // LAMBDA: br label %[[OMP_JUMP_BACK]]
195
196       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
197       // LAMBDA: ret
198
199       // implementation of 'parallel for'
200       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
201
202       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
203       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
204       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
205
206       // initialize lb and ub to PrevLB and PrevUB
207       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
208       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
209       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
210       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
211       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
212       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
213       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
214       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
215       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
216       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
217       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
218
219       // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
220       // In this case we use EUB
221       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
222       // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
223       // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
224       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
225       // LAMBDA: [[PF_EUB_TRUE]]:
226       // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
227       // LAMBDA: br label %[[PF_EUB_END:.+]]
228       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
229       // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
230       // LAMBDA: br label %[[PF_EUB_END]]
231       // LAMBDA-DAG: [[PF_EUB_END]]:
232       // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
233       // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
234
235       // initialize omp.iv
236       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
237       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
238       // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
239
240       // check exit condition
241       // LAMBDA: [[OMP_PF_JUMP_BACK]]:
242       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
243       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
244       // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
245       // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
246
247       // check that PrevLB and PrevUB are passed to the 'for'
248       // LAMBDA: [[PF_BODY]]:
249       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
250       // LAMBDA: br label {{.+}}
251
252       // check stride 1 for 'for' in 'distribute parallel for simd'
253       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
254       // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
255       // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
256       // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
257
258       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
259       // LAMBDA: ret
260
261       [&]() {
262         a[i] = b[i] + c[i];
263       }();
264     }
265
266     // dist_schedule: static no chunk (same sa default - no dist_schedule)
267     #pragma omp target
268     #pragma omp teams
269     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
270     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
271
272     #pragma omp distribute parallel for simd dist_schedule(static)
273     for (int i = 0; i < n; ++i) {
274       a[i] = b[i] + c[i];
275       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
276       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
277       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
278       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
279       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
280
281       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
282
283       // check EUB for distribute
284       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
285       // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
286       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
287       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
288       // LAMBDA-DAG: [[EUB_TRUE]]:
289       // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
290       // LAMBDA: br label %[[EUB_END:.+]]
291       // LAMBDA-DAG: [[EUB_FALSE]]:
292       // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
293       // LAMBDA: br label %[[EUB_END]]
294       // LAMBDA-DAG: [[EUB_END]]:
295       // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
296       // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
297
298       // initialize omp.iv
299       // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
300       // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
301       // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
302
303       // check exit condition
304       // LAMBDA: [[OMP_JUMP_BACK]]:
305       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
306       // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
307       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
308       // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
309
310       // check that PrevLB and PrevUB are passed to the 'for'
311       // LAMBDA: [[DIST_BODY]]:
312       // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
313       // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
314       // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
315       // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
316       // check that distlb and distub are properly passed to fork_call
317       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
318       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
319       // LAMBDA: br label %[[DIST_INC:.+]]
320
321       // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
322       // LAMBDA: [[DIST_INC]]:
323       // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
324       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
325       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
326       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
327       // LAMBDA: br label %[[OMP_JUMP_BACK]]
328
329       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
330       // LAMBDA: ret
331
332       // implementation of 'parallel for'
333       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
334
335       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
336       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
337       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
338
339       // initialize lb and ub to PrevLB and PrevUB
340       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
341       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
342       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
343       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
344       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
345       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
346       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
347       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
348       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
349       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
350       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
351
352       // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
353       // In this case we use EUB
354       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
355       // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
356       // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
357       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
358       // LAMBDA: [[PF_EUB_TRUE]]:
359       // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
360       // LAMBDA: br label %[[PF_EUB_END:.+]]
361       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
362       // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
363       // LAMBDA: br label %[[PF_EUB_END]]
364       // LAMBDA-DAG: [[PF_EUB_END]]:
365       // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
366       // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
367
368       // initialize omp.iv
369       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
370       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
371       // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
372
373       // check exit condition
374       // LAMBDA: [[OMP_PF_JUMP_BACK]]:
375       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
376       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
377       // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
378       // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
379
380       // check that PrevLB and PrevUB are passed to the 'for'
381       // LAMBDA: [[PF_BODY]]:
382       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
383       // LAMBDA: br label {{.+}}
384
385       // check stride 1 for 'for' in 'distribute parallel for simd'
386       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
387       // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
388       // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
389       // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
390
391       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
392       // LAMBDA: ret
393       [&]() {
394         a[i] = b[i] + c[i];
395       }();
396     }
397
398     // dist_schedule: static chunk
399     #pragma omp target
400     #pragma omp teams
401     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
402     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
403
404     #pragma omp distribute parallel for simd dist_schedule(static, ch)
405     for (int i = 0; i < n; ++i) {
406       a[i] = b[i] + c[i];
407       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
408       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
409       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
410       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
411       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
412
413       // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
414       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
415       // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
416
417       // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
418       // check EUB for distribute
419       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
420       // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
421       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
422       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
423       // LAMBDA-DAG: [[EUB_TRUE]]:
424       // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
425       // LAMBDA: br label %[[EUB_END:.+]]
426       // LAMBDA-DAG: [[EUB_FALSE]]:
427       // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
428       // LAMBDA: br label %[[EUB_END]]
429       // LAMBDA-DAG: [[EUB_END]]:
430       // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
431       // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
432
433       // initialize omp.iv
434       // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
435       // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
436
437       // check exit condition
438       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
439       // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
440       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
441       // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
442
443       // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
444       // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
445
446       // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
447       // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
448       // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
449       // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
450       // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
451
452       // check that PrevLB and PrevUB are passed to the 'for'
453       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
454       // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
455       // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
456       // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
457       // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
458       // check that distlb and distub are properly passed to fork_call
459       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
460       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
461       // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
462
463       // check DistInc
464       // LAMBDA: [[DIST_INNER_LOOP_INC]]:
465       // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
466       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
467       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
468       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
469       // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
470
471       // LAMBDA: [[DIST_INNER_LOOP_END]]:
472       // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
473
474       // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
475       // check NextLB and NextUB
476       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
477       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
478       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
479       // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
480       // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
481       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
482       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
483       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
484       // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
485
486       // outer loop exit
487       // LAMBDA: [[DIST_OUTER_LOOP_END]]:
488       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
489       // LAMBDA: ret
490
491       // skip implementation of 'parallel for': using default scheduling and was tested above
492       [&]() {
493         a[i] = b[i] + c[i];
494       }();
495     }
496
497     // schedule: static no chunk
498     #pragma omp target
499     #pragma omp teams
500     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
501     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
502
503     #pragma omp distribute parallel for simd schedule(static)
504     for (int i = 0; i < n; ++i) {
505       a[i] = b[i] + c[i];
506       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
507       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
508       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
509       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
510       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
511
512       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
513       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
514       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
515       // LAMBDA: ret
516
517       // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
518       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
519
520       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
521       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
522       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
523
524       // initialize lb and ub to PrevLB and PrevUB
525       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
526       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
527       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
528       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
529       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
530       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
531       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
532       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
533       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
534       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
535       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
536
537       // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
538       // In this case we use EUB
539       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
540       // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
541       // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
542       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
543       // LAMBDA: [[PF_EUB_TRUE]]:
544       // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
545       // LAMBDA: br label %[[PF_EUB_END:.+]]
546       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
547       // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
548       // LAMBDA: br label %[[PF_EUB_END]]
549       // LAMBDA-DAG: [[PF_EUB_END]]:
550       // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
551       // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
552
553       // initialize omp.iv
554       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
555       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
556       // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
557
558       // check exit condition
559       // LAMBDA: [[OMP_PF_JUMP_BACK]]:
560       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
561       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
562       // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
563       // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
564
565       // check that PrevLB and PrevUB are passed to the 'for'
566       // LAMBDA: [[PF_BODY]]:
567       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
568       // LAMBDA: br label {{.+}}
569
570       // check stride 1 for 'for' in 'distribute parallel for simd'
571       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
572       // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
573       // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
574       // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
575
576       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
577       // LAMBDA: ret
578
579       [&]() {
580         a[i] = b[i] + c[i];
581       }();
582     }
583
584     // schedule: static chunk
585     #pragma omp target
586     #pragma omp teams
587     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
588     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
589
590     #pragma omp distribute parallel for simd schedule(static, ch)
591     for (int i = 0; i < n; ++i) {
592       a[i] = b[i] + c[i];
593       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
594       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
595       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
596       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
597       // LAMBDA: ret
598
599       // 'parallel for' implementation using outer and inner loops and PrevEUB
600       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
601       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
602       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
603       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
604       // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
605
606       // initialize lb and ub to PrevLB and PrevUB
607       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
608       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
609       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
610       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
611       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
612       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
613       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
614       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
615       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
616       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
617       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
618       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
619
620       // check PrevEUB (using PrevUB instead of NumIt as upper bound)
621       // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
622       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
623       // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
624       // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
625       // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
626       // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
627       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
628       // LAMBDA: [[PF_EUB_TRUE]]:
629       // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
630       // LAMBDA: br label %[[PF_EUB_END:.+]]
631       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
632       // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
633       // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
634       // LAMBDA: br label %[[PF_EUB_END]]
635       // LAMBDA-DAG: [[PF_EUB_END]]:
636       // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
637       // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
638       // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
639       // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
640       // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
641
642       // initialize omp.iv (IV = LB)
643       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
644       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
645
646       // outer loop: while (IV < UB) {
647       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
648       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
649       // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
650       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
651
652       // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
653       // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
654
655       // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
656       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
657       // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
658       // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
659       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
660
661       // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
662       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
663       // skip body branch
664       // LAMBDA: br{{.+}}
665       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
666
667       // IV = IV + 1 and inner loop latch
668       // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
669       // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
670       // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
671       // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
672       // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
673
674       // check NextLB and NextUB
675       // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
676       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
677
678       // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
679       // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
680       // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
681       // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
682       // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
683       // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
684       // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
685       // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
686       // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
687       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
688
689       // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
690       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
691       // LAMBDA: ret
692       [&]() {
693         a[i] = b[i] + c[i];
694       }();
695     }
696
697     // schedule: dynamic no chunk
698     #pragma omp target
699     #pragma omp teams
700     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
701     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
702
703     #pragma omp distribute parallel for simd schedule(dynamic)
704     for (int i = 0; i < n; ++i) {
705       a[i] = b[i] + c[i];
706       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
707       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
708       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
709       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
710       // LAMBDA: ret
711
712       // 'parallel for' implementation using outer and inner loops and PrevEUB
713       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
714       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
715       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
716       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
717       // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
718
719       // initialize lb and ub to PrevLB and PrevUB
720       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
721       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
722       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
723       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
724       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
725       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
726       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
727       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
728       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
729       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
730       // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
731       // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
732       // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
733       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
734
735       // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
736       // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
737       // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
738       // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
739
740       // initialize omp.iv (IV = LB)
741       // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
742       // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
743       // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
744       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
745
746       // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
747       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
748       // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
749       // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
750       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
751
752       // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
753       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
754       // skip body branch
755       // LAMBDA: br{{.+}}
756       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
757
758       // IV = IV + 1 and inner loop latch
759       // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
760       // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
761       // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
762       // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
763       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
764
765       // check NextLB and NextUB
766       // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
767       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
768
769       // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
770       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
771
772       // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
773       // LAMBDA: ret
774       [&]() {
775         a[i] = b[i] + c[i];
776       }();
777     }
778
779     // schedule: dynamic chunk
780     #pragma omp target
781     #pragma omp teams
782     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
783     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
784
785     #pragma omp distribute parallel for simd schedule(dynamic, ch)
786     for (int i = 0; i < n; ++i) {
787       a[i] = b[i] + c[i];
788       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
789       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
790       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
791       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
792       // LAMBDA: ret
793
794       // 'parallel for' implementation using outer and inner loops and PrevEUB
795       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
796       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
797       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
798       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
799       // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
800
801       // initialize lb and ub to PrevLB and PrevUB
802       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
803       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
804       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
805       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
806       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
807       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
808       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
809       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
810       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
811       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
812       // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
813       // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
814       // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
815       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
816
817       // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
818       // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
819       // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
820       // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
821
822       // initialize omp.iv (IV = LB)
823       // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
824       // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
825       // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
826       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
827
828       // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
829       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
830       // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
831       // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
832       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
833
834       // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
835       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
836       // skip body branch
837       // LAMBDA: br{{.+}}
838       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
839
840       // IV = IV + 1 and inner loop latch
841       // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
842       // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
843       // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
844       // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
845       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
846
847       // check NextLB and NextUB
848       // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
849       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
850
851       // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
852       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
853
854       // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
855       // LAMBDA: ret
856       [&]() {
857         a[i] = b[i] + c[i];
858       }();
859     }
860   }();
861   return 0;
862 #else
863   // CHECK-LABEL: @main
864
865   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
866   // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
867
868   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
869   // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
870
871   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
872   // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
873
874   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
875   // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
876
877   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
878   // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
879
880   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
881   // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
882
883   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
884   // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
885
886   // CHECK: call{{.+}} [[TMAIN:@.+]]()
887
888   // no schedule clauses
889   #pragma omp target
890   #pragma omp teams
891   // CHECK: define internal void [[OFFLOADING_FUN_1]](
892   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
893
894   #pragma omp distribute parallel for simd
895   for (int i = 0; i < n; ++i) {
896     a[i] = b[i] + c[i];
897     // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
898     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
899     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
900     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
901     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
902
903     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
904
905     // check EUB for distribute
906     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
907     // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
908     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
909     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
910     // CHECK-DAG: [[EUB_TRUE]]:
911     // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
912     // CHECK: br label %[[EUB_END:.+]]
913     // CHECK-DAG: [[EUB_FALSE]]:
914     // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
915     // CHECK: br label %[[EUB_END]]
916     // CHECK-DAG: [[EUB_END]]:
917     // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
918     // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
919
920     // initialize omp.iv
921     // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
922     // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
923     // CHECK: br label %[[OMP_JUMP_BACK:.+]]
924
925     // check exit condition
926     // CHECK: [[OMP_JUMP_BACK]]:
927     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
928     // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
929     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
930     // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
931
932     // check that PrevLB and PrevUB are passed to the 'for'
933     // CHECK: [[DIST_BODY]]:
934     // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
935     // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
936     // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
937     // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
938     // check that distlb and distub are properly passed to fork_call
939     // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
940     // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
941     // CHECK: br label %[[DIST_INC:.+]]
942
943     // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
944     // CHECK: [[DIST_INC]]:
945     // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
946     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
947     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
948     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
949     // CHECK: br label %[[OMP_JUMP_BACK]]
950
951     // CHECK-DAG: call void @__kmpc_for_static_fini(
952     // CHECK: ret
953
954     // implementation of 'parallel for'
955     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
956
957     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
958     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
959     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
960
961     // initialize lb and ub to PrevLB and PrevUB
962     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
963     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
964     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
965     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
966     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
967     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
968     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
969     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
970     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
971     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
972     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
973
974     // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
975     // In this case we use EUB
976     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
977     // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
978     // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
979     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
980     // CHECK: [[PF_EUB_TRUE]]:
981     // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
982     // CHECK: br label %[[PF_EUB_END:.+]]
983     // CHECK-DAG: [[PF_EUB_FALSE]]:
984     // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
985     // CHECK: br label %[[PF_EUB_END]]
986     // CHECK-DAG: [[PF_EUB_END]]:
987     // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
988     // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
989
990     // initialize omp.iv
991     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
992     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
993     // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
994
995     // check exit condition
996     // CHECK: [[OMP_PF_JUMP_BACK]]:
997     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
998     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
999     // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1000     // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1001
1002     // check that PrevLB and PrevUB are passed to the 'for'
1003     // CHECK: [[PF_BODY]]:
1004     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1005     // CHECK: br label {{.+}}
1006
1007     // check stride 1 for 'for' in 'distribute parallel for simd'
1008     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1009     // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1010     // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1011     // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1012
1013     // CHECK-DAG: call void @__kmpc_for_static_fini(
1014     // CHECK: ret
1015   }
1016
1017   // dist_schedule: static no chunk
1018   #pragma omp target
1019   #pragma omp teams
1020   // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1021   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1022
1023   #pragma omp distribute parallel for simd dist_schedule(static)
1024   for (int i = 0; i < n; ++i) {
1025     a[i] = b[i] + c[i];
1026     // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1027     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1028     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1029     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1030     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1031
1032     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1033
1034     // check EUB for distribute
1035     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1036     // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1037     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1038     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1039     // CHECK-DAG: [[EUB_TRUE]]:
1040     // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1041     // CHECK: br label %[[EUB_END:.+]]
1042     // CHECK-DAG: [[EUB_FALSE]]:
1043     // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1044     // CHECK: br label %[[EUB_END]]
1045     // CHECK-DAG: [[EUB_END]]:
1046     // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1047     // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1048
1049     // initialize omp.iv
1050     // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1051     // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1052     // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1053
1054     // check exit condition
1055     // CHECK: [[OMP_JUMP_BACK]]:
1056     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1057     // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1058     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1059     // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1060
1061     // check that PrevLB and PrevUB are passed to the 'for'
1062     // CHECK: [[DIST_BODY]]:
1063     // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1064     // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1065     // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1066     // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1067     // check that distlb and distub are properly passed to fork_call
1068     // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1069     // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1070     // CHECK: br label %[[DIST_INC:.+]]
1071
1072     // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1073     // CHECK: [[DIST_INC]]:
1074     // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1075     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1076     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1077     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1078     // CHECK: br label %[[OMP_JUMP_BACK]]
1079
1080     // CHECK-DAG: call void @__kmpc_for_static_fini(
1081     // CHECK: ret
1082
1083     // implementation of 'parallel for'
1084     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1085
1086     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1087     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1088     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1089
1090     // initialize lb and ub to PrevLB and PrevUB
1091     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1092     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1093     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1094     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1095     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1096     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1097     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1098     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1099     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1100     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1101     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1102
1103     // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1104     // In this case we use EUB
1105     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1106     // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1107     // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1108     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1109     // CHECK: [[PF_EUB_TRUE]]:
1110     // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1111     // CHECK: br label %[[PF_EUB_END:.+]]
1112     // CHECK-DAG: [[PF_EUB_FALSE]]:
1113     // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1114     // CHECK: br label %[[PF_EUB_END]]
1115     // CHECK-DAG: [[PF_EUB_END]]:
1116     // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1117     // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1118
1119     // initialize omp.iv
1120     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1121     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1122     // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1123
1124     // check exit condition
1125     // CHECK: [[OMP_PF_JUMP_BACK]]:
1126     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1127     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1128     // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1129     // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1130
1131     // check that PrevLB and PrevUB are passed to the 'for'
1132     // CHECK: [[PF_BODY]]:
1133     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1134     // CHECK: br label {{.+}}
1135
1136     // check stride 1 for 'for' in 'distribute parallel for simd'
1137     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1138     // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1139     // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1140     // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1141
1142     // CHECK-DAG: call void @__kmpc_for_static_fini(
1143     // CHECK: ret
1144   }
1145
1146   // dist_schedule: static chunk
1147   #pragma omp target
1148   #pragma omp teams
1149   // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1150   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1151
1152   #pragma omp distribute parallel for simd dist_schedule(static, ch)
1153   for (int i = 0; i < n; ++i) {
1154     a[i] = b[i] + c[i];
1155     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1156     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1157     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1158     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1159     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1160
1161     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1162     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1163     // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
1164
1165     // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
1166     // check EUB for distribute
1167     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1168     // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1169     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1170     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1171     // CHECK-DAG: [[EUB_TRUE]]:
1172     // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1173     // CHECK: br label %[[EUB_END:.+]]
1174     // CHECK-DAG: [[EUB_FALSE]]:
1175     // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1176     // CHECK: br label %[[EUB_END]]
1177     // CHECK-DAG: [[EUB_END]]:
1178     // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1179     // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1180
1181     // initialize omp.iv
1182     // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1183     // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1184
1185     // check exit condition
1186     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1187     // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1188     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1189     // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
1190
1191     // CHECK: [[DIST_OUTER_LOOP_BODY]]:
1192     // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
1193
1194     // CHECK: [[DIST_INNER_LOOP_HEADER]]:
1195     // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
1196     // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
1197     // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
1198     // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1199
1200     // check that PrevLB and PrevUB are passed to the 'for'
1201     // CHECK: [[DIST_INNER_LOOP_BODY]]:
1202     // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1203     // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1204     // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1205     // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1206     // check that distlb and distub are properly passed to fork_call
1207     // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1208     // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1209     // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1210
1211     // check DistInc
1212     // CHECK: [[DIST_INNER_LOOP_INC]]:
1213     // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1214     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1215     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1216     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1217     // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
1218
1219     // CHECK: [[DIST_INNER_LOOP_END]]:
1220     // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
1221
1222     // CHECK: [[DIST_OUTER_LOOP_INC]]:
1223     // check NextLB and NextUB
1224     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1225     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1226     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1227     // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1228     // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1229     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1230     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1231     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1232     // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
1233
1234     // outer loop exit
1235     // CHECK: [[DIST_OUTER_LOOP_END]]:
1236     // CHECK-DAG: call void @__kmpc_for_static_fini(
1237     // CHECK: ret
1238
1239     // skip implementation of 'parallel for': using default scheduling and was tested above
1240   }
1241
1242   // schedule: static no chunk
1243   #pragma omp target
1244   #pragma omp teams
1245   // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1246   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1247
1248   #pragma omp distribute parallel for simd schedule(static)
1249   for (int i = 0; i < n; ++i) {
1250     a[i] = b[i] + c[i];
1251     // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1252     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1253     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1254     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1255     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1256
1257     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1258     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1259     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1260     // CHECK: ret
1261
1262     // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1263     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1264
1265     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1266     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1267     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1268
1269     // initialize lb and ub to PrevLB and PrevUB
1270     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1271     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1272     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1273     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1274     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1275     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1276     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1277     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1278     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1279     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1280     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1281
1282     // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1283     // In this case we use EUB
1284     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1285     // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1286     // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1287     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1288     // CHECK: [[PF_EUB_TRUE]]:
1289     // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1290     // CHECK: br label %[[PF_EUB_END:.+]]
1291     // CHECK-DAG: [[PF_EUB_FALSE]]:
1292     // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1293     // CHECK: br label %[[PF_EUB_END]]
1294     // CHECK-DAG: [[PF_EUB_END]]:
1295     // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1296     // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1297
1298     // initialize omp.iv
1299     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1300     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1301     // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1302
1303     // check exit condition
1304     // CHECK: [[OMP_PF_JUMP_BACK]]:
1305     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1306     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1307     // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1308     // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1309
1310     // check that PrevLB and PrevUB are passed to the 'for'
1311     // CHECK: [[PF_BODY]]:
1312     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1313     // CHECK: br label {{.+}}
1314
1315     // check stride 1 for 'for' in 'distribute parallel for simd'
1316     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1317     // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1318     // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1319     // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1320
1321     // CHECK-DAG: call void @__kmpc_for_static_fini(
1322     // CHECK: ret
1323   }
1324
1325   // schedule: static chunk
1326   #pragma omp target
1327   #pragma omp teams
1328   // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
1329   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
1330
1331   #pragma omp distribute parallel for simd schedule(static, ch)
1332   for (int i = 0; i < n; ++i) {
1333     a[i] = b[i] + c[i];
1334     // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
1335     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1336     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
1337     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1338     // CHECK: ret
1339
1340     // 'parallel for' implementation using outer and inner loops and PrevEUB
1341     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1342     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1343     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1344     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1345     // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1346
1347     // initialize lb and ub to PrevLB and PrevUB
1348     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1349     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1350     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1351     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1352     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1353     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1354     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1355     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1356     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1357     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1358     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1359     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1360
1361     // check PrevEUB (using PrevUB instead of NumIt as upper bound)
1362     // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1363     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1364     // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
1365     // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1366     // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
1367     // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
1368     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1369     // CHECK: [[PF_EUB_TRUE]]:
1370     // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1371     // CHECK: br label %[[PF_EUB_END:.+]]
1372     // CHECK-DAG: [[PF_EUB_FALSE]]:
1373     // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1374     // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
1375     // CHECK: br label %[[PF_EUB_END]]
1376     // CHECK-DAG: [[PF_EUB_END]]:
1377     // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
1378     // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
1379     // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
1380     // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
1381     // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
1382
1383     // initialize omp.iv (IV = LB)
1384     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1385     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1386
1387     // outer loop: while (IV < UB) {
1388     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1389     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1390     // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1391     // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1392
1393     // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1394     // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
1395
1396     // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
1397     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1398     // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1399     // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1400     // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1401
1402     // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1403     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1404     // skip body branch
1405     // CHECK: br{{.+}}
1406     // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1407
1408     // IV = IV + 1 and inner loop latch
1409     // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1410     // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1411     // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1412     // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1413     // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
1414
1415     // check NextLB and NextUB
1416     // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1417     // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1418
1419     // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1420     // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1421     // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1422     // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
1423     // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
1424     // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1425     // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1426     // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
1427     // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
1428     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1429
1430     // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1431     // CHECK-DAG: call void @__kmpc_for_static_fini(
1432     // CHECK: ret
1433   }
1434
1435   // schedule: dynamic no chunk
1436   #pragma omp target
1437   #pragma omp teams
1438   // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
1439   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
1440
1441   #pragma omp distribute parallel for simd schedule(dynamic)
1442   for (int i = 0; i < n; ++i) {
1443     a[i] = b[i] + c[i];
1444     // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
1445     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1446     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
1447     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1448     // CHECK: ret
1449
1450     // 'parallel for' implementation using outer and inner loops and PrevEUB
1451     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1452     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1453     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1454     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1455     // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1456
1457     // initialize lb and ub to PrevLB and PrevUB
1458     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1459     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1460     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1461     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1462     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1463     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1464     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1465     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1466     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1467     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1468     // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1469     // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1470     // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1471     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1472
1473     // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1474     // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1475     // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1476     // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1477
1478     // initialize omp.iv (IV = LB)
1479     // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1480     // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1481     // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1482     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1483
1484     // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1485     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1486     // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1487     // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1488     // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1489
1490     // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1491     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1492     // skip body branch
1493     // CHECK: br{{.+}}
1494     // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1495
1496     // IV = IV + 1 and inner loop latch
1497     // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1498     // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1499     // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1500     // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1501     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1502
1503     // check NextLB and NextUB
1504     // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1505     // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1506
1507     // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1508     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1509
1510     // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1511     // CHECK: ret
1512   }
1513
1514   // schedule: dynamic chunk
1515   #pragma omp target
1516   #pragma omp teams
1517   // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
1518   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
1519
1520   #pragma omp distribute parallel for simd schedule(dynamic, ch)
1521   for (int i = 0; i < n; ++i) {
1522     a[i] = b[i] + c[i];
1523     // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
1524     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1525     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
1526     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1527     // CHECK: ret
1528
1529     // 'parallel for' implementation using outer and inner loops and PrevEUB
1530     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1531     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1532     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1533     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1534     // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1535
1536     // initialize lb and ub to PrevLB and PrevUB
1537     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1538     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1539     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1540     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1541     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1542     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1543     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1544     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1545     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1546     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1547     // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1548     // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1549     // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1550     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1551
1552     // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1553     // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1554     // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1555     // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1556
1557     // initialize omp.iv (IV = LB)
1558     // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1559     // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1560     // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1561     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1562
1563     // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1564     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1565     // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1566     // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1567     // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1568
1569     // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1570     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1571     // skip body branch
1572     // CHECK: br{{.+}}
1573     // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1574
1575     // IV = IV + 1 and inner loop latch
1576     // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1577     // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1578     // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1579     // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1580     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1581
1582     // check NextLB and NextUB
1583     // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1584     // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1585
1586     // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1587     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1588
1589     // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1590     // CHECK: ret
1591   }
1592
1593   return tmain<int>();
1594 #endif
1595 }
1596
1597 // check code
1598 // CHECK: define{{.+}} [[TMAIN]]()
1599
1600 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1601 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
1602
1603 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1604 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
1605
1606 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1607 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
1608
1609 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1610 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
1611
1612 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1613 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
1614
1615 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1616 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
1617
1618 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1619 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
1620
1621 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
1622 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
1623
1624 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
1625 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1626 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1627 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1628 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1629
1630 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1631
1632 // check EUB for distribute
1633 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1634 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1635 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1636 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1637 // CHECK-DAG: [[EUB_TRUE]]:
1638 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1639 // CHECK: br label %[[EUB_END:.+]]
1640 // CHECK-DAG: [[EUB_FALSE]]:
1641 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1642 // CHECK: br label %[[EUB_END]]
1643 // CHECK-DAG: [[EUB_END]]:
1644 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1645 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1646
1647 // initialize omp.iv
1648 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1649 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1650 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1651
1652 // check exit condition
1653 // CHECK: [[OMP_JUMP_BACK]]:
1654 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1655 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1656 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1657 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1658
1659 // check that PrevLB and PrevUB are passed to the 'for'
1660 // CHECK: [[DIST_BODY]]:
1661 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1662 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1663 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1664 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1665 // check that distlb and distub are properly passed to fork_call
1666 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1667 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1668 // CHECK: br label %[[DIST_INC:.+]]
1669
1670 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1671 // CHECK: [[DIST_INC]]:
1672 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1673 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1674 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1675 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1676 // CHECK: br label %[[OMP_JUMP_BACK]]
1677
1678 // CHECK-DAG: call void @__kmpc_for_static_fini(
1679 // CHECK: ret
1680
1681 // implementation of 'parallel for'
1682 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1683
1684 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1685 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1686 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1687
1688 // initialize lb and ub to PrevLB and PrevUB
1689 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1690 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1691 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1692 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1693 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1694 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1695 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1696 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1697 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1698 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1699 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1700
1701 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1702 // In this case we use EUB
1703 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1704 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1705 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1706 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1707 // CHECK: [[PF_EUB_TRUE]]:
1708 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1709 // CHECK: br label %[[PF_EUB_END:.+]]
1710 // CHECK-DAG: [[PF_EUB_FALSE]]:
1711 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1712 // CHECK: br label %[[PF_EUB_END]]
1713 // CHECK-DAG: [[PF_EUB_END]]:
1714 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1715 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1716
1717 // initialize omp.iv
1718 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1719 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1720 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1721
1722 // check exit condition
1723 // CHECK: [[OMP_PF_JUMP_BACK]]:
1724 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1725 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1726 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1727 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1728
1729 // check that PrevLB and PrevUB are passed to the 'for'
1730 // CHECK: [[PF_BODY]]:
1731 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1732 // CHECK: br label {{.+}}
1733
1734 // check stride 1 for 'for' in 'distribute parallel for simd'
1735 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1736 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1737 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1738 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1739
1740 // CHECK-DAG: call void @__kmpc_for_static_fini(
1741 // CHECK: ret
1742
1743 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1744 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1745
1746 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1747 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1748 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1749 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1750 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1751
1752 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1753
1754 // check EUB for distribute
1755 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1756 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1757 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1758 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1759 // CHECK-DAG: [[EUB_TRUE]]:
1760 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1761 // CHECK: br label %[[EUB_END:.+]]
1762 // CHECK-DAG: [[EUB_FALSE]]:
1763 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1764 // CHECK: br label %[[EUB_END]]
1765 // CHECK-DAG: [[EUB_END]]:
1766 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1767 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1768
1769 // initialize omp.iv
1770 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1771 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1772 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1773
1774 // check exit condition
1775 // CHECK: [[OMP_JUMP_BACK]]:
1776 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1777 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1778 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1779 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1780
1781 // check that PrevLB and PrevUB are passed to the 'for'
1782 // CHECK: [[DIST_BODY]]:
1783 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1784 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1785 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1786 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1787 // check that distlb and distub are properly passed to fork_call
1788 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1789 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1790 // CHECK: br label %[[DIST_INC:.+]]
1791
1792 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1793 // CHECK: [[DIST_INC]]:
1794 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1795 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1796 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1797 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1798 // CHECK: br label %[[OMP_JUMP_BACK]]
1799
1800 // CHECK-DAG: call void @__kmpc_for_static_fini(
1801 // CHECK: ret
1802
1803 // implementation of 'parallel for'
1804 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1805
1806 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1807 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1808 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1809
1810 // initialize lb and ub to PrevLB and PrevUB
1811 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1812 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1813 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1814 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1815 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1816 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1817 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1818 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1819 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1820 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1821 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1822
1823 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1824 // In this case we use EUB
1825 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1826 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1827 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1828 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1829 // CHECK: [[PF_EUB_TRUE]]:
1830 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1831 // CHECK: br label %[[PF_EUB_END:.+]]
1832 // CHECK-DAG: [[PF_EUB_FALSE]]:
1833 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1834 // CHECK: br label %[[PF_EUB_END]]
1835 // CHECK-DAG: [[PF_EUB_END]]:
1836 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1837 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1838
1839 // initialize omp.iv
1840 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1841 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1842 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1843
1844 // check exit condition
1845 // CHECK: [[OMP_PF_JUMP_BACK]]:
1846 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1847 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1848 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1849 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1850
1851 // check that PrevLB and PrevUB are passed to the 'for'
1852 // CHECK: [[PF_BODY]]:
1853 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1854 // CHECK: br label {{.+}}
1855
1856 // check stride 1 for 'for' in 'distribute parallel for simd'
1857 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1858 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1859 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1860 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1861
1862 // CHECK-DAG: call void @__kmpc_for_static_fini(
1863 // CHECK: ret
1864
1865 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1866 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1867
1868 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1869 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1870 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1871 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1872 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1873
1874 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1875 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1876 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
1877
1878 // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
1879 // check EUB for distribute
1880 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1881 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1882 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1883 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1884 // CHECK-DAG: [[EUB_TRUE]]:
1885 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1886 // CHECK: br label %[[EUB_END:.+]]
1887 // CHECK-DAG: [[EUB_FALSE]]:
1888 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1889 // CHECK: br label %[[EUB_END]]
1890 // CHECK-DAG: [[EUB_END]]:
1891 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1892 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1893
1894 // initialize omp.iv
1895 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1896 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1897
1898 // check exit condition
1899 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1900 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1901 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1902 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
1903
1904 // CHECK: [[DIST_OUTER_LOOP_BODY]]:
1905 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
1906
1907 // CHECK: [[DIST_INNER_LOOP_HEADER]]:
1908 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
1909 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
1910 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
1911 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1912
1913 // check that PrevLB and PrevUB are passed to the 'for'
1914 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1915 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1916 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1917 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1918 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1919 // check that distlb and distub are properly passed to fork_call
1920 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1921 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1922 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1923
1924 // check DistInc
1925 // CHECK: [[DIST_INNER_LOOP_INC]]:
1926 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1927 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1928 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1929 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1930 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
1931
1932 // CHECK: [[DIST_INNER_LOOP_END]]:
1933 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
1934
1935 // CHECK: [[DIST_OUTER_LOOP_INC]]:
1936 // check NextLB and NextUB
1937 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1938 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1939 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1940 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1941 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1942 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1943 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1944 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1945 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
1946
1947 // outer loop exit
1948 // CHECK: [[DIST_OUTER_LOOP_END]]:
1949 // CHECK-DAG: call void @__kmpc_for_static_fini(
1950 // CHECK: ret
1951
1952 // skip implementation of 'parallel for': using default scheduling and was tested above
1953
1954 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1955 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1956
1957 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1958 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1959 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1960 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1961 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1962
1963 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1964 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1965 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1966 // CHECK: ret
1967
1968 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1969 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1970
1971 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1972 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1973 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1974
1975 // initialize lb and ub to PrevLB and PrevUB
1976 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1977 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1978 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1979 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1980 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1981 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1982 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1983 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1984 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1985 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1986 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1987
1988 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1989 // In this case we use EUB
1990 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1991 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1992 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1993 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1994 // CHECK: [[PF_EUB_TRUE]]:
1995 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1996 // CHECK: br label %[[PF_EUB_END:.+]]
1997 // CHECK-DAG: [[PF_EUB_FALSE]]:
1998 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1999 // CHECK: br label %[[PF_EUB_END]]
2000 // CHECK-DAG: [[PF_EUB_END]]:
2001 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
2002 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
2003
2004 // initialize omp.iv
2005 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2006 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2007 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
2008
2009 // check exit condition
2010 // CHECK: [[OMP_PF_JUMP_BACK]]:
2011 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
2012 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
2013 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2014 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
2015
2016 // check that PrevLB and PrevUB are passed to the 'for'
2017 // CHECK: [[PF_BODY]]:
2018 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2019 // CHECK: br label {{.+}}
2020
2021 // check stride 1 for 'for' in 'distribute parallel for simd'
2022 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
2023 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
2024 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
2025 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
2026
2027 // CHECK-DAG: call void @__kmpc_for_static_fini(
2028 // CHECK: ret
2029
2030 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
2031 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
2032
2033 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
2034 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2035 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
2036 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2037 // CHECK: ret
2038
2039 // 'parallel for' implementation using outer and inner loops and PrevEUB
2040 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2041 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2042 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2043 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2044 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2045
2046 // initialize lb and ub to PrevLB and PrevUB
2047 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2048 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2049 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2050 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2051 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2052 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2053 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2054 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2055 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2056 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2057 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2058 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2059
2060 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
2061 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2062 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2063 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
2064 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2065 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
2066 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
2067 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2068 // CHECK: [[PF_EUB_TRUE]]:
2069 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2070 // CHECK: br label %[[PF_EUB_END:.+]]
2071 // CHECK-DAG: [[PF_EUB_FALSE]]:
2072 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2073 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
2074 // CHECK: br label %[[PF_EUB_END]]
2075 // CHECK-DAG: [[PF_EUB_END]]:
2076 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
2077 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
2078 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
2079 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
2080 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
2081
2082 // initialize omp.iv (IV = LB)
2083 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2084 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2085
2086 // outer loop: while (IV < UB) {
2087 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2088 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2089 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2090 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2091
2092 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2093 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
2094
2095 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
2096 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2097 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2098 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2099 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2100
2101 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2102 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2103 // skip body branch
2104 // CHECK: br{{.+}}
2105 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2106
2107 // IV = IV + 1 and inner loop latch
2108 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2109 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2110 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2111 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2112 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
2113
2114 // check NextLB and NextUB
2115 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2116 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2117
2118 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2119 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2120 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2121 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
2122 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
2123 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2124 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2125 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
2126 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
2127 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2128
2129 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2130 // CHECK-DAG: call void @__kmpc_for_static_fini(
2131 // CHECK: ret
2132
2133 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
2134 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
2135
2136 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
2137 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2138 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
2139 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2140 // CHECK: ret
2141
2142 // 'parallel for' implementation using outer and inner loops and PrevEUB
2143 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2144 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2145 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2146 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2147 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2148
2149 // initialize lb and ub to PrevLB and PrevUB
2150 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2151 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2152 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2153 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2154 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2155 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2156 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2157 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2158 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2159 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2160 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2161 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2162 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2163 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2164
2165 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2166 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2167 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2168 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2169
2170 // initialize omp.iv (IV = LB)
2171 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2172 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2173 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2174 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2175
2176 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2177 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2178 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2179 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2180 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2181
2182 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2183 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2184 // skip body branch
2185 // CHECK: br{{.+}}
2186 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2187
2188 // IV = IV + 1 and inner loop latch
2189 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2190 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2191 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2192 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2193 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2194
2195 // check NextLB and NextUB
2196 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2197 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2198
2199 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2200 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2201
2202 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2203 // CHECK: ret
2204
2205 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
2206 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
2207
2208 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
2209 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2210 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
2211 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2212 // CHECK: ret
2213
2214 // 'parallel for' implementation using outer and inner loops and PrevEUB
2215 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2216 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2217 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2218 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2219 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2220
2221 // initialize lb and ub to PrevLB and PrevUB
2222 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2223 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2224 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2225 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2226 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2227 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2228 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2229 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2230 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2231 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2232 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2233 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2234 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2235 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2236
2237 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2238 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2239 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2240 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2241
2242 // initialize omp.iv (IV = LB)
2243 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2244 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2245 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2246 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2247
2248 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2249 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2250 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2251 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2252 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2253
2254 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2255 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2256 // skip body branch
2257 // CHECK: br{{.+}}
2258 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2259
2260 // IV = IV + 1 and inner loop latch
2261 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2262 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2263 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2264 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2265 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2266
2267 // check NextLB and NextUB
2268 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2269 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2270
2271 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2272 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2273
2274 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2275 // CHECK: ret
2276
2277 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
2278 #endif