1 Pull in r223170 from upstream llvm trunk (by Michael Zolotukhin):
3 Apply loop-rotate to several vectorizer tests.
5 Such loops shouldn't be vectorized due to the loops form.
6 After applying loop-rotate (+simplifycfg) the tests again start to check
7 what they are intended to check.
9 Pull in r223171 from upstream llvm trunk (by Michael Zolotukhin):
11 PR21302. Vectorize only bottom-tested loops.
13 rdar://problem/18886083
15 This fixes a bug in the llvm vectorizer, which could sometimes cause
16 vectorized loops to perform an additional iteration, leading to possible
17 buffer overruns. Symptoms of this, which are usually segfaults, were
18 first noticed when building gcc ports, here:
20 https://lists.freebsd.org/pipermail/freebsd-ports/2014-September/095466.html
21 https://lists.freebsd.org/pipermail/freebsd-toolchain/2014-September/001211.html
23 Introduced here: http://svnweb.freebsd.org/changeset/base/275633
25 Index: lib/Transforms/Vectorize/LoopVectorize.cpp
26 ===================================================================
27 --- lib/Transforms/Vectorize/LoopVectorize.cpp
28 +++ lib/Transforms/Vectorize/LoopVectorize.cpp
29 @@ -2864,6 +2864,14 @@ bool LoopVectorizationLegality::canVectorize() {
30 if (!TheLoop->getExitingBlock())
33 + // We only handle bottom-tested loops, i.e. loop in which the condition is
34 + // checked at the end of each iteration. With that we can assume that all
35 + // instructions in the loop are executed the same number of times.
36 + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
37 + DEBUG(dbgs() << "LV: loop control flow is not understood by vectorizer\n");
41 // We need to have a loop header.
42 DEBUG(dbgs() << "LV: Found a loop: " <<
43 TheLoop->getHeader()->getName() << '\n');
44 Index: test/Transforms/LoopVectorize/loop-form.ll
45 ===================================================================
46 --- test/Transforms/LoopVectorize/loop-form.ll
47 +++ test/Transforms/LoopVectorize/loop-form.ll
49 +; RUN: opt -S -loop-vectorize < %s | FileCheck %s
50 +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
52 +; Check that we vectorize only bottom-tested loops.
53 +; This is a reduced testcase from PR21302.
55 +; rdar://problem/18886083
57 +%struct.X = type { i32, i16 }
59 +; CHECK-NOT: vector.body
61 +define void @foo(i32 %n) {
66 + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
67 + %cmp = icmp slt i32 %i, %n
68 + br i1 %cmp, label %for.body, label %if.end
71 + %iprom = sext i32 %i to i64
72 + %b = getelementptr inbounds %struct.X* undef, i64 %iprom, i32 1
73 + store i16 0, i16* %b, align 4
74 + %inc = add nsw i32 %i, 1
80 Index: test/Transforms/LoopVectorize/runtime-check-address-space.ll
81 ===================================================================
82 --- test/Transforms/LoopVectorize/runtime-check-address-space.ll
83 +++ test/Transforms/LoopVectorize/runtime-check-address-space.ll
84 @@ -31,25 +31,23 @@ define void @foo(i32 addrspace(1)* %a, i32 addrspa
89 + %cmp1 = icmp slt i32 0, %n
90 + br i1 %cmp1, label %for.body, label %for.end
92 -for.cond: ; preds = %for.body, %entry
93 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
94 - %cmp = icmp slt i32 %i.0, %n
95 - br i1 %cmp, label %for.body, label %for.end
97 -for.body: ; preds = %for.cond
98 - %idxprom = sext i32 %i.0 to i64
99 +for.body: ; preds = %entry, %for.body
100 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
101 + %idxprom = sext i32 %i.02 to i64
102 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
103 %0 = load i32 addrspace(1)* %arrayidx, align 4
104 %mul = mul nsw i32 %0, 3
105 - %idxprom1 = sext i32 %i.0 to i64
106 + %idxprom1 = sext i32 %i.02 to i64
107 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
108 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
109 - %inc = add nsw i32 %i.0, 1
111 + %inc = add nsw i32 %i.02, 1
112 + %cmp = icmp slt i32 %inc, %n
113 + br i1 %cmp, label %for.body, label %for.end
115 -for.end: ; preds = %for.cond
116 +for.end: ; preds = %for.body, %entry
120 @@ -60,25 +58,23 @@ define void @bar0(i32* %a, i32 addrspace(1)* %b, i
125 + %cmp1 = icmp slt i32 0, %n
126 + br i1 %cmp1, label %for.body, label %for.end
128 -for.cond: ; preds = %for.body, %entry
129 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
130 - %cmp = icmp slt i32 %i.0, %n
131 - br i1 %cmp, label %for.body, label %for.end
133 -for.body: ; preds = %for.cond
134 - %idxprom = sext i32 %i.0 to i64
135 +for.body: ; preds = %entry, %for.body
136 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
137 + %idxprom = sext i32 %i.02 to i64
138 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
139 %0 = load i32 addrspace(1)* %arrayidx, align 4
140 %mul = mul nsw i32 %0, 3
141 - %idxprom1 = sext i32 %i.0 to i64
142 + %idxprom1 = sext i32 %i.02 to i64
143 %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
144 store i32 %mul, i32* %arrayidx2, align 4
145 - %inc = add nsw i32 %i.0, 1
147 + %inc = add nsw i32 %i.02, 1
148 + %cmp = icmp slt i32 %inc, %n
149 + br i1 %cmp, label %for.body, label %for.end
151 -for.end: ; preds = %for.cond
152 +for.end: ; preds = %for.body, %entry
156 @@ -89,25 +85,23 @@ define void @bar1(i32 addrspace(1)* %a, i32* %b, i
161 + %cmp1 = icmp slt i32 0, %n
162 + br i1 %cmp1, label %for.body, label %for.end
164 -for.cond: ; preds = %for.body, %entry
165 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
166 - %cmp = icmp slt i32 %i.0, %n
167 - br i1 %cmp, label %for.body, label %for.end
169 -for.body: ; preds = %for.cond
170 - %idxprom = sext i32 %i.0 to i64
171 +for.body: ; preds = %entry, %for.body
172 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
173 + %idxprom = sext i32 %i.02 to i64
174 %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
175 %0 = load i32* %arrayidx, align 4
176 %mul = mul nsw i32 %0, 3
177 - %idxprom1 = sext i32 %i.0 to i64
178 + %idxprom1 = sext i32 %i.02 to i64
179 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
180 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
181 - %inc = add nsw i32 %i.0, 1
183 + %inc = add nsw i32 %i.02, 1
184 + %cmp = icmp slt i32 %inc, %n
185 + br i1 %cmp, label %for.body, label %for.end
187 -for.end: ; preds = %for.cond
188 +for.end: ; preds = %for.body, %entry
192 @@ -119,25 +113,23 @@ define void @bar2(i32* noalias %a, i32 addrspace(1
197 + %cmp1 = icmp slt i32 0, %n
198 + br i1 %cmp1, label %for.body, label %for.end
200 -for.cond: ; preds = %for.body, %entry
201 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
202 - %cmp = icmp slt i32 %i.0, %n
203 - br i1 %cmp, label %for.body, label %for.end
205 -for.body: ; preds = %for.cond
206 - %idxprom = sext i32 %i.0 to i64
207 +for.body: ; preds = %entry, %for.body
208 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
209 + %idxprom = sext i32 %i.02 to i64
210 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
211 %0 = load i32 addrspace(1)* %arrayidx, align 4
212 %mul = mul nsw i32 %0, 3
213 - %idxprom1 = sext i32 %i.0 to i64
214 + %idxprom1 = sext i32 %i.02 to i64
215 %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
216 store i32 %mul, i32* %arrayidx2, align 4
217 - %inc = add nsw i32 %i.0, 1
219 + %inc = add nsw i32 %i.02, 1
220 + %cmp = icmp slt i32 %inc, %n
221 + br i1 %cmp, label %for.body, label %for.end
223 -for.end: ; preds = %for.cond
224 +for.end: ; preds = %for.body, %entry
228 @@ -149,25 +141,23 @@ define void @arst0(i32* %b, i32 %n) #0 {
233 + %cmp1 = icmp slt i32 0, %n
234 + br i1 %cmp1, label %for.body, label %for.end
236 -for.cond: ; preds = %for.body, %entry
237 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
238 - %cmp = icmp slt i32 %i.0, %n
239 - br i1 %cmp, label %for.body, label %for.end
241 -for.body: ; preds = %for.cond
242 - %idxprom = sext i32 %i.0 to i64
243 +for.body: ; preds = %entry, %for.body
244 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
245 + %idxprom = sext i32 %i.02 to i64
246 %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
247 %0 = load i32* %arrayidx, align 4
248 %mul = mul nsw i32 %0, 3
249 - %idxprom1 = sext i32 %i.0 to i64
250 + %idxprom1 = sext i32 %i.02 to i64
251 %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
252 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
253 - %inc = add nsw i32 %i.0, 1
255 + %inc = add nsw i32 %i.02, 1
256 + %cmp = icmp slt i32 %inc, %n
257 + br i1 %cmp, label %for.body, label %for.end
259 -for.end: ; preds = %for.cond
260 +for.end: ; preds = %for.body, %entry
264 @@ -180,25 +170,23 @@ define void @arst1(i32* %b, i32 %n) #0 {
269 + %cmp1 = icmp slt i32 0, %n
270 + br i1 %cmp1, label %for.body, label %for.end
272 -for.cond: ; preds = %for.body, %entry
273 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
274 - %cmp = icmp slt i32 %i.0, %n
275 - br i1 %cmp, label %for.body, label %for.end
277 -for.body: ; preds = %for.cond
278 - %idxprom = sext i32 %i.0 to i64
279 +for.body: ; preds = %entry, %for.body
280 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
281 + %idxprom = sext i32 %i.02 to i64
282 %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
283 %0 = load i32 addrspace(1)* %arrayidx, align 4
284 %mul = mul nsw i32 %0, 3
285 - %idxprom1 = sext i32 %i.0 to i64
286 + %idxprom1 = sext i32 %i.02 to i64
287 %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
288 store i32 %mul, i32* %arrayidx2, align 4
289 - %inc = add nsw i32 %i.0, 1
291 + %inc = add nsw i32 %i.02, 1
292 + %cmp = icmp slt i32 %inc, %n
293 + br i1 %cmp, label %for.body, label %for.end
295 -for.end: ; preds = %for.cond
296 +for.end: ; preds = %for.body, %entry
300 @@ -210,25 +198,23 @@ define void @aoeu(i32 %n) #0 {
305 + %cmp1 = icmp slt i32 0, %n
306 + br i1 %cmp1, label %for.body, label %for.end
308 -for.cond: ; preds = %for.body, %entry
309 - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
310 - %cmp = icmp slt i32 %i.0, %n
311 - br i1 %cmp, label %for.body, label %for.end
313 -for.body: ; preds = %for.cond
314 - %idxprom = sext i32 %i.0 to i64
315 +for.body: ; preds = %entry, %for.body
316 + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
317 + %idxprom = sext i32 %i.02 to i64
318 %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
319 %0 = load i32 addrspace(2)* %arrayidx, align 4
320 %mul = mul nsw i32 %0, 3
321 - %idxprom1 = sext i32 %i.0 to i64
322 + %idxprom1 = sext i32 %i.02 to i64
323 %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
324 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
325 - %inc = add nsw i32 %i.0, 1
327 + %inc = add nsw i32 %i.02, 1
328 + %cmp = icmp slt i32 %inc, %n
329 + br i1 %cmp, label %for.body, label %for.end
331 -for.end: ; preds = %for.cond
332 +for.end: ; preds = %for.body, %entry
336 Index: test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
337 ===================================================================
338 --- test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
339 +++ test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
340 @@ -8,26 +8,24 @@ define void @add_ints_1_1_1(i32 addrspace(1)* %a,
341 ; CHECK-LABEL: @add_ints_1_1_1(
349 -for.cond: ; preds = %for.body, %entry
350 - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
351 - %cmp = icmp ult i64 %i.0, 200
352 - br i1 %cmp, label %for.body, label %for.end
354 -for.body: ; preds = %for.cond
355 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
356 +for.body: ; preds = %entry, %for.body
357 + %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
358 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
359 %0 = load i32 addrspace(1)* %arrayidx, align 4
360 - %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
361 + %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
362 %1 = load i32 addrspace(1)* %arrayidx1, align 4
363 %add = add nsw i32 %0, %1
364 - %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
365 + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
366 store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
367 - %inc = add i64 %i.0, 1
369 + %inc = add i64 %i.01, 1
370 + %cmp = icmp ult i64 %inc, 200
371 + br i1 %cmp, label %for.body, label %for.end
373 -for.end: ; preds = %for.cond
374 +for.end: ; preds = %for.body
378 @@ -35,26 +33,24 @@ define void @add_ints_as_1_0_0(i32 addrspace(1)* %
379 ; CHECK-LABEL: @add_ints_as_1_0_0(
380 ; CHECK-NOT: <4 x i32>
387 -for.cond: ; preds = %for.body, %entry
388 - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
389 - %cmp = icmp ult i64 %i.0, 200
390 - br i1 %cmp, label %for.body, label %for.end
392 -for.body: ; preds = %for.cond
393 - %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
394 +for.body: ; preds = %entry, %for.body
395 + %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
396 + %arrayidx = getelementptr inbounds i32* %b, i64 %i.01
397 %0 = load i32* %arrayidx, align 4
398 - %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
399 + %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
400 %1 = load i32* %arrayidx1, align 4
401 %add = add nsw i32 %0, %1
402 - %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
403 + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
404 store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
405 - %inc = add i64 %i.0, 1
407 + %inc = add i64 %i.01, 1
408 + %cmp = icmp ult i64 %inc, 200
409 + br i1 %cmp, label %for.body, label %for.end
411 -for.end: ; preds = %for.cond
412 +for.end: ; preds = %for.body
416 @@ -62,26 +58,24 @@ define void @add_ints_as_0_1_0(i32* %a, i32 addrsp
417 ; CHECK-LABEL: @add_ints_as_0_1_0(
418 ; CHECK-NOT: <4 x i32>
425 -for.cond: ; preds = %for.body, %entry
426 - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
427 - %cmp = icmp ult i64 %i.0, 200
428 - br i1 %cmp, label %for.body, label %for.end
430 -for.body: ; preds = %for.cond
431 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
432 +for.body: ; preds = %entry, %for.body
433 + %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
434 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
435 %0 = load i32 addrspace(1)* %arrayidx, align 4
436 - %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
437 + %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
438 %1 = load i32* %arrayidx1, align 4
439 %add = add nsw i32 %0, %1
440 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
441 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
442 store i32 %add, i32* %arrayidx2, align 4
443 - %inc = add i64 %i.0, 1
445 + %inc = add i64 %i.01, 1
446 + %cmp = icmp ult i64 %inc, 200
447 + br i1 %cmp, label %for.body, label %for.end
449 -for.end: ; preds = %for.cond
450 +for.end: ; preds = %for.body
454 @@ -89,26 +83,24 @@ define void @add_ints_as_0_1_1(i32* %a, i32 addrsp
455 ; CHECK-LABEL: @add_ints_as_0_1_1(
456 ; CHECK-NOT: <4 x i32>
463 -for.cond: ; preds = %for.body, %entry
464 - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
465 - %cmp = icmp ult i64 %i.0, 200
466 - br i1 %cmp, label %for.body, label %for.end
468 -for.body: ; preds = %for.cond
469 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
470 +for.body: ; preds = %entry, %for.body
471 + %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
472 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
473 %0 = load i32 addrspace(1)* %arrayidx, align 4
474 - %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
475 + %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
476 %1 = load i32 addrspace(1)* %arrayidx1, align 4
477 %add = add nsw i32 %0, %1
478 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
479 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
480 store i32 %add, i32* %arrayidx2, align 4
481 - %inc = add i64 %i.0, 1
483 + %inc = add i64 %i.01, 1
484 + %cmp = icmp ult i64 %inc, 200
485 + br i1 %cmp, label %for.body, label %for.end
487 -for.end: ; preds = %for.cond
488 +for.end: ; preds = %for.body
492 @@ -116,26 +108,24 @@ define void @add_ints_as_0_1_2(i32* %a, i32 addrsp
493 ; CHECK-LABEL: @add_ints_as_0_1_2(
494 ; CHECK-NOT: <4 x i32>
501 -for.cond: ; preds = %for.body, %entry
502 - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
503 - %cmp = icmp ult i64 %i.0, 200
504 - br i1 %cmp, label %for.body, label %for.end
506 -for.body: ; preds = %for.cond
507 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
508 +for.body: ; preds = %entry, %for.body
509 + %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
510 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
511 %0 = load i32 addrspace(1)* %arrayidx, align 4
512 - %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
513 + %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.01
514 %1 = load i32 addrspace(2)* %arrayidx1, align 4
515 %add = add nsw i32 %0, %1
516 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
517 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
518 store i32 %add, i32* %arrayidx2, align 4
519 - %inc = add i64 %i.0, 1
521 + %inc = add i64 %i.01, 1
522 + %cmp = icmp ult i64 %inc, 200
523 + br i1 %cmp, label %for.body, label %for.end
525 -for.end: ; preds = %for.cond
526 +for.end: ; preds = %for.body