1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
59 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
60 static tree vect_get_loop_niters (struct loop *, tree *);
61 static bool vect_analyze_data_ref_dependence
62 (struct data_dependence_relation *, loop_vec_info);
63 static bool vect_compute_data_ref_alignment (struct data_reference *);
64 static bool vect_analyze_data_ref_access (struct data_reference *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static void vect_update_misalignment_for_peel
67 (struct data_reference *, struct data_reference *, int npeel);
70 /* Function vect_determine_vectorization_factor
72 Determine the vectorization factor (VF). VF is the number of data elements
73 that are operated upon in parallel in a single iteration of the vectorized
74 loop. For example, when vectorizing a loop that operates on 4byte elements,
75 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
76 elements can fit in a single vector register.
78 We currently support vectorization of loops in which all types operated upon
79 are of the same size. Therefore this function currently sets VF according to
80 the size of the types operated upon, and fails if there are multiple sizes
83 VF is also the factor by which the loop iterations are strip-mined, e.g.:
90 for (i=0; i<N; i+=VF){
91 a[i:VF] = b[i:VF] + c[i:VF];
96 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
98 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
99 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
100 int nbbs = loop->num_nodes;
101 block_stmt_iterator si;
102 unsigned int vectorization_factor = 0;
106 if (vect_print_dump_info (REPORT_DETAILS))
107 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
109 for (i = 0; i < nbbs; i++)
111 basic_block bb = bbs[i];
113 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
115 tree stmt = bsi_stmt (si);
117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
120 if (vect_print_dump_info (REPORT_DETAILS))
122 fprintf (vect_dump, "==> examining statement: ");
123 print_generic_expr (vect_dump, stmt, TDF_SLIM);
126 gcc_assert (stmt_info);
127 /* skip stmts which do not need to be vectorized. */
128 if (!STMT_VINFO_RELEVANT_P (stmt_info)
129 && !STMT_VINFO_LIVE_P (stmt_info))
131 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "skip.");
136 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
138 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
140 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
141 print_generic_expr (vect_dump, stmt, TDF_SLIM);
146 if (STMT_VINFO_VECTYPE (stmt_info))
148 vectype = STMT_VINFO_VECTYPE (stmt_info);
149 scalar_type = TREE_TYPE (vectype);
153 if (STMT_VINFO_DATA_REF (stmt_info))
155 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
156 else if (TREE_CODE (stmt) == MODIFY_EXPR)
157 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
159 scalar_type = TREE_TYPE (stmt);
161 if (vect_print_dump_info (REPORT_DETAILS))
163 fprintf (vect_dump, "get vectype for scalar type: ");
164 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
167 vectype = get_vectype_for_scalar_type (scalar_type);
170 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
173 "not vectorized: unsupported data-type ");
174 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
178 STMT_VINFO_VECTYPE (stmt_info) = vectype;
181 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "vectype: ");
184 print_generic_expr (vect_dump, vectype, TDF_SLIM);
187 nunits = TYPE_VECTOR_SUBPARTS (vectype);
188 if (vect_print_dump_info (REPORT_DETAILS))
189 fprintf (vect_dump, "nunits = %d", nunits);
191 if (vectorization_factor)
193 /* FORNOW: don't allow mixed units.
194 This restriction will be relaxed in the future. */
195 if (nunits != vectorization_factor)
197 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
198 fprintf (vect_dump, "not vectorized: mixed data-types");
203 vectorization_factor = nunits;
205 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
206 * vectorization_factor == UNITS_PER_SIMD_WORD);
210 /* TODO: Analyze cost. Decide if worth while to vectorize. */
212 if (vectorization_factor <= 1)
214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
215 fprintf (vect_dump, "not vectorized: unsupported data-type");
218 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
224 /* Function vect_analyze_operations.
226 Scan the loop stmts and make sure they are all vectorizable. */
229 vect_analyze_operations (loop_vec_info loop_vinfo)
231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
233 int nbbs = loop->num_nodes;
234 block_stmt_iterator si;
235 unsigned int vectorization_factor = 0;
239 stmt_vec_info stmt_info;
240 bool need_to_vectorize = false;
242 if (vect_print_dump_info (REPORT_DETAILS))
243 fprintf (vect_dump, "=== vect_analyze_operations ===");
245 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
246 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
248 for (i = 0; i < nbbs; i++)
250 basic_block bb = bbs[i];
252 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
254 stmt_info = vinfo_for_stmt (phi);
255 if (vect_print_dump_info (REPORT_DETAILS))
257 fprintf (vect_dump, "examining phi: ");
258 print_generic_expr (vect_dump, phi, TDF_SLIM);
261 gcc_assert (stmt_info);
263 if (STMT_VINFO_LIVE_P (stmt_info))
265 /* FORNOW: not yet supported. */
266 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
267 fprintf (vect_dump, "not vectorized: value used after loop.");
271 if (STMT_VINFO_RELEVANT_P (stmt_info))
273 /* Most likely a reduction-like computation that is used
275 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
276 fprintf (vect_dump, "not vectorized: unsupported pattern.");
281 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
283 tree stmt = bsi_stmt (si);
284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
286 if (vect_print_dump_info (REPORT_DETAILS))
288 fprintf (vect_dump, "==> examining statement: ");
289 print_generic_expr (vect_dump, stmt, TDF_SLIM);
292 gcc_assert (stmt_info);
294 /* skip stmts which do not need to be vectorized.
295 this is expected to include:
296 - the COND_EXPR which is the loop exit condition
297 - any LABEL_EXPRs in the loop
298 - computations that are used only for array indexing or loop
301 if (!STMT_VINFO_RELEVANT_P (stmt_info)
302 && !STMT_VINFO_LIVE_P (stmt_info))
304 if (vect_print_dump_info (REPORT_DETAILS))
305 fprintf (vect_dump, "irrelevant.");
309 if (STMT_VINFO_RELEVANT_P (stmt_info))
311 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
312 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
314 ok = (vectorizable_operation (stmt, NULL, NULL)
315 || vectorizable_assignment (stmt, NULL, NULL)
316 || vectorizable_load (stmt, NULL, NULL)
317 || vectorizable_store (stmt, NULL, NULL)
318 || vectorizable_condition (stmt, NULL, NULL));
322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
325 "not vectorized: relevant stmt not supported: ");
326 print_generic_expr (vect_dump, stmt, TDF_SLIM);
330 need_to_vectorize = true;
333 if (STMT_VINFO_LIVE_P (stmt_info))
335 ok = vectorizable_reduction (stmt, NULL, NULL);
338 need_to_vectorize = true;
340 ok = vectorizable_live_operation (stmt, NULL, NULL);
344 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
347 "not vectorized: live stmt not supported: ");
348 print_generic_expr (vect_dump, stmt, TDF_SLIM);
356 /* TODO: Analyze cost. Decide if worth while to vectorize. */
358 /* All operations in the loop are either irrelevant (deal with loop
359 control, or dead), or only used outside the loop and can be moved
360 out of the loop (e.g. invariants, inductions). The loop can be
361 optimized away by scalar optimizations. We're better off not
362 touching this loop. */
363 if (!need_to_vectorize)
365 if (vect_print_dump_info (REPORT_DETAILS))
367 "All the computation can be taken out of the loop.");
368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
370 "not vectorized: redundant loop. no profit to vectorize.");
374 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
375 && vect_print_dump_info (REPORT_DETAILS))
377 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
378 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
380 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
381 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: iteration count too small.");
388 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
389 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
390 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
392 if (vect_print_dump_info (REPORT_DETAILS))
393 fprintf (vect_dump, "epilog loop required.");
394 if (!vect_can_advance_ivs_p (loop_vinfo))
396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
398 "not vectorized: can't create epilog loop 1.");
401 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 "not vectorized: can't create epilog loop 2.");
414 /* Function exist_non_indexing_operands_for_use_p
416 USE is one of the uses attached to STMT. Check if USE is
417 used in STMT for anything other than indexing an array. */
420 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
431 /* STMT has a data_ref. FORNOW this means that its of one of
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
447 operand = TREE_OPERAND (stmt, 1);
449 if (TREE_CODE (operand) != SSA_NAME)
459 /* Function vect_analyze_scalar_cycles.
461 Examine the cross iteration def-use cycles of scalar variables, by
462 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
463 following: invariant, induction, reduction, unknown.
465 Some forms of scalar cycles are not yet supported.
467 Example1: reduction: (unsupported yet)
473 Example2: induction: (unsupported yet)
479 Note: the following loop *is* vectorizable:
485 even though it has a def-use cycle caused by the induction variable i:
487 loop: i_2 = PHI (i_0, i_1)
492 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
493 it does not need to be vectorized because it is only used for array
494 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
495 loop2 on the other hand is relevant (it is being written to memory).
499 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
502 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
503 basic_block bb = loop->header;
506 if (vect_print_dump_info (REPORT_DETAILS))
507 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
509 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
511 tree access_fn = NULL;
512 tree def = PHI_RESULT (phi);
513 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
516 if (vect_print_dump_info (REPORT_DETAILS))
518 fprintf (vect_dump, "Analyze phi: ");
519 print_generic_expr (vect_dump, phi, TDF_SLIM);
522 /* Skip virtual phi's. The data dependences that are associated with
523 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
525 if (!is_gimple_reg (SSA_NAME_VAR (def)))
527 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "virtual phi. skip.");
532 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
534 /* Analyze the evolution function. */
536 access_fn = analyze_scalar_evolution (loop, def);
541 if (vect_print_dump_info (REPORT_DETAILS))
543 fprintf (vect_dump, "Access function of PHI: ");
544 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
547 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
549 if (vect_print_dump_info (REPORT_DETAILS))
550 fprintf (vect_dump, "Detected induction.");
551 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
555 /* TODO: handle invariant phis */
557 reduc_stmt = vect_is_simple_reduction (loop, phi);
560 if (vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump, "Detected reduction.");
562 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
563 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
567 if (vect_print_dump_info (REPORT_DETAILS))
568 fprintf (vect_dump, "Unknown def-use cycle pattern.");
576 /* Function vect_analyze_data_ref_dependence.
578 Return TRUE if there (might) exist a dependence between a memory-reference
579 DRA and a memory-reference DRB. */
582 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
583 loop_vec_info loop_vinfo)
586 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
587 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
588 struct data_reference *dra = DDR_A (ddr);
589 struct data_reference *drb = DDR_B (ddr);
590 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
591 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
592 lambda_vector dist_v;
593 unsigned int loop_depth;
595 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
600 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
603 "not vectorized: can't determine dependence between ");
604 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605 fprintf (vect_dump, " and ");
606 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
611 if (DDR_NUM_DIST_VECTS (ddr) == 0)
613 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
615 fprintf (vect_dump, "not vectorized: bad dist vector for ");
616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617 fprintf (vect_dump, " and ");
618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
623 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
624 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
626 int dist = dist_v[loop_depth];
628 if (vect_print_dump_info (REPORT_DR_DETAILS))
629 fprintf (vect_dump, "dependence distance = %d.", dist);
631 /* Same loop iteration. */
632 if (dist % vectorization_factor == 0)
634 /* Two references with distance zero have the same alignment. */
635 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
636 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
637 if (vect_print_dump_info (REPORT_ALIGNMENT))
638 fprintf (vect_dump, "accesses have the same alignment.");
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
641 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 fprintf (vect_dump, " and ");
644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
649 if (abs (dist) >= vectorization_factor)
651 /* Dependence distance does not create dependence, as far as vectorization
652 is concerned, in this case. */
653 if (vect_print_dump_info (REPORT_DR_DETAILS))
654 fprintf (vect_dump, "dependence distance >= VF.");
658 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
661 "not vectorized: possible dependence between data-refs ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
674 /* Function vect_analyze_data_ref_dependences.
676 Examine all the data references in the loop, and make sure there do not
677 exist any data dependences between them. */
680 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
683 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
684 struct data_dependence_relation *ddr;
686 if (vect_print_dump_info (REPORT_DETAILS))
687 fprintf (vect_dump, "=== vect_analyze_dependences ===");
689 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
690 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
697 /* Function vect_compute_data_ref_alignment
699 Compute the misalignment of the data reference DR.
702 1. If during the misalignment computation it is found that the data reference
703 cannot be vectorized then false is returned.
704 2. DR_MISALIGNMENT (DR) is defined.
706 FOR NOW: No analysis is actually performed. Misalignment is calculated
707 only for trivial cases. TODO. */
710 vect_compute_data_ref_alignment (struct data_reference *dr)
712 tree stmt = DR_STMT (dr);
713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
714 tree ref = DR_REF (dr);
716 tree base, base_addr;
719 tree aligned_to, alignment;
721 if (vect_print_dump_info (REPORT_DETAILS))
722 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
724 /* Initialize misalignment to unknown. */
725 DR_MISALIGNMENT (dr) = -1;
727 misalign = DR_OFFSET_MISALIGNMENT (dr);
728 aligned_to = DR_ALIGNED_TO (dr);
729 base_addr = DR_BASE_ADDRESS (dr);
730 base = build_fold_indirect_ref (base_addr);
731 vectype = STMT_VINFO_VECTYPE (stmt_info);
732 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
734 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
737 if (vect_print_dump_info (REPORT_DETAILS))
739 fprintf (vect_dump, "Unknown alignment for access: ");
740 print_generic_expr (vect_dump, base, TDF_SLIM);
746 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
748 || (TREE_CODE (base_addr) == SSA_NAME
749 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
750 TREE_TYPE (base_addr)))),
754 base_aligned = false;
758 /* Do not change the alignment of global variables if
759 flag_section_anchors is enabled. */
760 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
761 || (TREE_STATIC (base) && flag_section_anchors))
763 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "can't force alignment of ref: ");
766 print_generic_expr (vect_dump, ref, TDF_SLIM);
771 /* Force the alignment of the decl.
772 NOTE: This is the only change to the code we make during
773 the analysis phase, before deciding to vectorize the loop. */
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "force alignment");
776 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
777 DECL_USER_ALIGN (base) = 1;
780 /* At this point we assume that the base is aligned. */
781 gcc_assert (base_aligned
782 || (TREE_CODE (base) == VAR_DECL
783 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
785 /* Modulo alignment. */
786 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
788 if (!host_integerp (misalign, 1))
790 /* Negative or overflowed misalignment value. */
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "unexpected misalign value");
796 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
798 if (vect_print_dump_info (REPORT_DETAILS))
800 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
801 print_generic_expr (vect_dump, ref, TDF_SLIM);
808 /* Function vect_compute_data_refs_alignment
810 Compute the misalignment of data references in the loop.
811 Return FALSE if a data reference is found that cannot be vectorized. */
814 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
816 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
817 struct data_reference *dr;
820 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
821 if (!vect_compute_data_ref_alignment (dr))
828 /* Function vect_update_misalignment_for_peel
830 DR - the data reference whose misalignment is to be adjusted.
831 DR_PEEL - the data reference whose misalignment is being made
832 zero in the vector loop by the peel.
833 NPEEL - the number of iterations in the peel loop if the misalignment
834 of DR_PEEL is known at compile time. */
837 vect_update_misalignment_for_peel (struct data_reference *dr,
838 struct data_reference *dr_peel, int npeel)
842 VEC(dr_p,heap) *same_align_drs;
843 struct data_reference *current_dr;
845 if (known_alignment_for_access_p (dr)
846 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
848 DR_MISALIGNMENT (dr) = 0;
852 /* It can be assumed that the data refs with the same alignment as dr_peel
853 are aligned in the vector loop. */
855 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
856 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
858 if (current_dr != dr)
860 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
861 DR_MISALIGNMENT (dr) = 0;
865 if (known_alignment_for_access_p (dr)
866 && known_alignment_for_access_p (dr_peel))
868 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869 DR_MISALIGNMENT (dr) += npeel * drsize;
870 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
874 DR_MISALIGNMENT (dr) = -1;
878 /* Function vect_verify_datarefs_alignment
880 Return TRUE if all data references in the loop can be
881 handled with respect to alignment. */
884 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
886 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
887 struct data_reference *dr;
888 enum dr_alignment_support supportable_dr_alignment;
891 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
893 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
894 if (!supportable_dr_alignment)
896 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
900 "not vectorized: unsupported unaligned load.");
903 "not vectorized: unsupported unaligned store.");
907 if (supportable_dr_alignment != dr_aligned
908 && vect_print_dump_info (REPORT_ALIGNMENT))
909 fprintf (vect_dump, "Vectorizing an unaligned access.");
915 /* Function vector_alignment_reachable_p
917 Return true if vector alignment for DR is reachable by peeling
918 a few loop iterations. Return false otherwise. */
921 vector_alignment_reachable_p (struct data_reference *dr)
923 tree stmt = DR_STMT (dr);
924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
927 /* If misalignment is known at the compile time then allow peeling
928 only if natural alignment is reachable through peeling. */
929 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
931 HOST_WIDE_INT elmsize =
932 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
933 if (vect_print_dump_info (REPORT_DETAILS))
935 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
936 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
938 if (DR_MISALIGNMENT (dr) % elmsize)
940 if (vect_print_dump_info (REPORT_DETAILS))
941 fprintf (vect_dump, "data size does not divide the misalignment.\n");
946 if (!known_alignment_for_access_p (dr))
948 tree type = (TREE_TYPE (DR_REF (dr)));
949 tree ba = DR_BASE_OBJECT (dr);
950 bool is_packed = false;
953 is_packed = contains_packed_reference (ba);
955 if (vect_print_dump_info (REPORT_DETAILS))
956 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
957 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
966 /* Function vect_enhance_data_refs_alignment
968 This pass will use loop versioning and loop peeling in order to enhance
969 the alignment of data references in the loop.
971 FOR NOW: we assume that whatever versioning/peeling takes place, only the
972 original loop is to be vectorized; Any other loops that are created by
973 the transformations performed in this pass - are not supposed to be
974 vectorized. This restriction will be relaxed.
976 This pass will require a cost model to guide it whether to apply peeling
977 or versioning or a combination of the two. For example, the scheme that
978 intel uses when given a loop with several memory accesses, is as follows:
979 choose one memory access ('p') which alignment you want to force by doing
980 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
981 other accesses are not necessarily aligned, or (2) use loop versioning to
982 generate one loop in which all accesses are aligned, and another loop in
983 which only 'p' is necessarily aligned.
985 ("Automatic Intra-Register Vectorization for the Intel Architecture",
986 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
987 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
989 Devising a cost model is the most critical aspect of this work. It will
990 guide us on which access to peel for, whether to use loop versioning, how
991 many versions to create, etc. The cost model will probably consist of
992 generic considerations as well as target specific considerations (on
993 powerpc for example, misaligned stores are more painful than misaligned
996 Here are the general steps involved in alignment enhancements:
998 -- original loop, before alignment analysis:
1000 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1001 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1004 -- After vect_compute_data_refs_alignment:
1005 for (i=0; i<N; i++){
1006 x = q[i]; # DR_MISALIGNMENT(q) = 3
1007 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1010 -- Possibility 1: we do loop versioning:
1012 for (i=0; i<N; i++){ # loop 1A
1013 x = q[i]; # DR_MISALIGNMENT(q) = 3
1014 p[i] = y; # DR_MISALIGNMENT(p) = 0
1018 for (i=0; i<N; i++){ # loop 1B
1019 x = q[i]; # DR_MISALIGNMENT(q) = 3
1020 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1024 -- Possibility 2: we do loop peeling:
1025 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1029 for (i = 3; i < N; i++){ # loop 2A
1030 x = q[i]; # DR_MISALIGNMENT(q) = 0
1031 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1034 -- Possibility 3: combination of loop peeling and versioning:
1035 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1040 for (i = 3; i<N; i++){ # loop 3A
1041 x = q[i]; # DR_MISALIGNMENT(q) = 0
1042 p[i] = y; # DR_MISALIGNMENT(p) = 0
1046 for (i = 3; i<N; i++){ # loop 3B
1047 x = q[i]; # DR_MISALIGNMENT(q) = 0
1048 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1052 These loops are later passed to loop_transform to be vectorized. The
1053 vectorizer will use the alignment information to guide the transformation
1054 (whether to generate regular loads/stores, or with special handling for
1058 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1060 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061 enum dr_alignment_support supportable_dr_alignment;
1062 struct data_reference *dr0 = NULL;
1063 struct data_reference *dr;
1065 bool do_peeling = false;
1066 bool do_versioning = false;
1069 /* While cost model enhancements are expected in the future, the high level
1070 view of the code at this time is as follows:
1072 A) If there is a misaligned write then see if peeling to align this write
1073 can make all data references satisfy vect_supportable_dr_alignment.
1074 If so, update data structures as needed and return true. Note that
1075 at this time vect_supportable_dr_alignment is known to return false
1076 for a a misaligned write.
1078 B) If peeling wasn't possible and there is a data reference with an
1079 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1080 then see if loop versioning checks can be used to make all data
1081 references satisfy vect_supportable_dr_alignment. If so, update
1082 data structures as needed and return true.
1084 C) If neither peeling nor versioning were successful then return false if
1085 any data reference does not satisfy vect_supportable_dr_alignment.
1087 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1089 Note, Possibility 3 above (which is peeling and versioning together) is not
1090 being done at this time. */
1092 /* (1) Peeling to force alignment. */
1094 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1096 + How many accesses will become aligned due to the peeling
1097 - How many accesses will become unaligned due to the peeling,
1098 and the cost of misaligned accesses.
1099 - The cost of peeling (the extra runtime checks, the increase
1102 The scheme we use FORNOW: peel to force the alignment of the first
1103 misaligned store in the loop.
1104 Rationale: misaligned stores are not yet supported.
1106 TODO: Use a cost model. */
1108 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1109 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1111 do_peeling = vector_alignment_reachable_p (dr);
1114 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1115 fprintf (vect_dump, "vector alignment may not be reachable");
1119 /* Often peeling for alignment will require peeling for loop-bound, which in
1120 turn requires that we know how to adjust the loop ivs after the loop. */
1121 if (!vect_can_advance_ivs_p (loop_vinfo))
1129 if (known_alignment_for_access_p (dr0))
1131 /* Since it's known at compile time, compute the number of iterations
1132 in the peeled loop (the peeling factor) for use in updating
1133 DR_MISALIGNMENT values. The peeling factor is the vectorization
1134 factor minus the misalignment as an element count. */
1135 mis = DR_MISALIGNMENT (dr0);
1136 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1137 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1140 /* Ensure that all data refs can be vectorized after the peel. */
1141 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1143 int save_misalignment;
1148 save_misalignment = DR_MISALIGNMENT (dr);
1149 vect_update_misalignment_for_peel (dr, dr0, npeel);
1150 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1151 DR_MISALIGNMENT (dr) = save_misalignment;
1153 if (!supportable_dr_alignment)
1162 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1163 If the misalignment of DR_i is identical to that of dr0 then set
1164 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1165 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1166 by the peeling factor times the element size of DR_i (MOD the
1167 vectorization factor times the size). Otherwise, the
1168 misalignment of DR_i must be set to unknown. */
1169 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1171 vect_update_misalignment_for_peel (dr, dr0, npeel);
1173 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1174 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1175 DR_MISALIGNMENT (dr0) = 0;
1176 if (vect_print_dump_info (REPORT_ALIGNMENT))
1177 fprintf (vect_dump, "Alignment of access forced using peeling.");
1179 if (vect_print_dump_info (REPORT_DETAILS))
1180 fprintf (vect_dump, "Peeling for alignment will be applied.");
1182 stat = vect_verify_datarefs_alignment (loop_vinfo);
1189 /* (2) Versioning to force alignment. */
1191 /* Try versioning if:
1192 1) flag_tree_vect_loop_version is TRUE
1193 2) optimize_size is FALSE
1194 3) there is at least one unsupported misaligned data ref with an unknown
1196 4) all misaligned data refs with a known misalignment are supported, and
1197 5) the number of runtime alignment checks is within reason. */
1199 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1203 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1205 if (aligned_access_p (dr))
1208 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1210 if (!supportable_dr_alignment)
1216 if (known_alignment_for_access_p (dr)
1217 || VEC_length (tree,
1218 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1219 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1221 do_versioning = false;
1225 stmt = DR_STMT (dr);
1226 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1227 gcc_assert (vectype);
1229 /* The rightmost bits of an aligned address must be zeros.
1230 Construct the mask needed for this test. For example,
1231 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1232 mask must be 15 = 0xf. */
1233 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1235 /* FORNOW: use the same mask to test all potentially unaligned
1236 references in the loop. The vectorizer currently supports
1237 a single vector size, see the reference to
1238 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1239 vectorization factor is computed. */
1240 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1241 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1242 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1243 VEC_safe_push (tree, heap,
1244 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1249 /* Versioning requires at least one misaligned data reference. */
1250 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1251 do_versioning = false;
1252 else if (!do_versioning)
1253 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1258 VEC(tree,heap) *may_misalign_stmts
1259 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1262 /* It can now be assumed that the data references in the statements
1263 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1264 of the loop being vectorized. */
1265 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1268 dr = STMT_VINFO_DATA_REF (stmt_info);
1269 DR_MISALIGNMENT (dr) = 0;
1270 if (vect_print_dump_info (REPORT_ALIGNMENT))
1271 fprintf (vect_dump, "Alignment of access forced using versioning.");
1274 if (vect_print_dump_info (REPORT_DETAILS))
1275 fprintf (vect_dump, "Versioning for alignment will be applied.");
1277 /* Peeling and versioning can't be done together at this time. */
1278 gcc_assert (! (do_peeling && do_versioning));
1280 stat = vect_verify_datarefs_alignment (loop_vinfo);
1285 /* This point is reached if neither peeling nor versioning is being done. */
1286 gcc_assert (! (do_peeling || do_versioning));
1288 stat = vect_verify_datarefs_alignment (loop_vinfo);
1293 /* Function vect_analyze_data_refs_alignment
1295 Analyze the alignment of the data-references in the loop.
1296 Return FALSE if a data reference is found that cannot be vectorized. */
1299 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1301 if (vect_print_dump_info (REPORT_DETAILS))
1302 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1304 if (!vect_compute_data_refs_alignment (loop_vinfo))
1306 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1308 "not vectorized: can't calculate alignment for data ref.");
1316 /* Function vect_analyze_data_ref_access.
1318 Analyze the access pattern of the data-reference DR. For now, a data access
1319 has to be consecutive to be considered vectorizable. */
1322 vect_analyze_data_ref_access (struct data_reference *dr)
1324 tree step = DR_STEP (dr);
1325 tree scalar_type = TREE_TYPE (DR_REF (dr));
1327 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1329 if (vect_print_dump_info (REPORT_DETAILS))
1330 fprintf (vect_dump, "not consecutive access");
1337 /* Function vect_analyze_data_ref_accesses.
1339 Analyze the access pattern of all the data references in the loop.
1341 FORNOW: the only access pattern that is considered vectorizable is a
1342 simple step 1 (consecutive) access.
1344 FORNOW: handle only arrays and pointer accesses. */
1347 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1350 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1351 struct data_reference *dr;
1353 if (vect_print_dump_info (REPORT_DETAILS))
1354 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1356 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1357 if (!vect_analyze_data_ref_access (dr))
1359 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1360 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1368 /* Function vect_analyze_data_refs.
1370 Find all the data references in the loop.
1372 The general structure of the analysis of data refs in the vectorizer is as
1374 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1375 find and analyze all data-refs in the loop and their dependences.
1376 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1377 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1378 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1383 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1385 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1387 VEC (data_reference_p, heap) *datarefs;
1388 struct data_reference *dr;
1391 if (vect_print_dump_info (REPORT_DETAILS))
1392 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1394 compute_data_dependences_for_loop (loop, false,
1395 &LOOP_VINFO_DATAREFS (loop_vinfo),
1396 &LOOP_VINFO_DDRS (loop_vinfo));
1398 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1399 from stmt_vec_info struct to DR and vectype. */
1400 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1402 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1405 stmt_vec_info stmt_info;
1407 if (!dr || !DR_REF (dr))
1409 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1414 /* Update DR field in stmt_vec_info struct. */
1415 stmt = DR_STMT (dr);
1416 stmt_info = vinfo_for_stmt (stmt);
1418 if (STMT_VINFO_DATA_REF (stmt_info))
1420 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1423 "not vectorized: more than one data ref in stmt: ");
1424 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1428 STMT_VINFO_DATA_REF (stmt_info) = dr;
1430 /* Check that analysis of the data-ref succeeded. */
1431 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1436 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1441 if (!DR_MEMTAG (dr))
1443 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1445 fprintf (vect_dump, "not vectorized: no memory tag for ");
1446 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1451 /* Set vectype for STMT. */
1452 scalar_type = TREE_TYPE (DR_REF (dr));
1453 STMT_VINFO_VECTYPE (stmt_info) =
1454 get_vectype_for_scalar_type (scalar_type);
1455 if (!STMT_VINFO_VECTYPE (stmt_info))
1457 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1460 "not vectorized: no vectype for stmt: ");
1461 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1462 fprintf (vect_dump, " scalar_type: ");
1463 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1473 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1475 /* Function vect_mark_relevant.
1477 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1480 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1481 bool relevant_p, bool live_p)
1483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1484 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1485 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1487 if (vect_print_dump_info (REPORT_DETAILS))
1488 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1490 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1494 /* This is the last stmt in a sequence that was detected as a
1495 pattern that can potentially be vectorized. Don't mark the stmt
1496 as relevant/live because it's not going to vectorized.
1497 Instead mark the pattern-stmt that replaces it. */
1498 if (vect_print_dump_info (REPORT_DETAILS))
1499 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1500 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1501 stmt_info = vinfo_for_stmt (pattern_stmt);
1502 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1503 save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1504 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1505 stmt = pattern_stmt;
1508 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1509 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1511 if (TREE_CODE (stmt) == PHI_NODE)
1512 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1513 or live will fail vectorization later on. */
1516 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1517 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1519 if (vect_print_dump_info (REPORT_DETAILS))
1520 fprintf (vect_dump, "already marked relevant/live.");
1524 VEC_safe_push (tree, heap, *worklist, stmt);
1528 /* Function vect_stmt_relevant_p.
1530 Return true if STMT in loop that is represented by LOOP_VINFO is
1531 "relevant for vectorization".
1533 A stmt is considered "relevant for vectorization" if:
1534 - it has uses outside the loop.
1535 - it has vdefs (it alters memory).
1536 - control stmts in the loop (except for the exit condition).
1538 CHECKME: what other side effects would the vectorizer allow? */
1541 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1542 bool *relevant_p, bool *live_p)
1544 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1545 ssa_op_iter op_iter;
1546 imm_use_iterator imm_iter;
1547 use_operand_p use_p;
1548 def_operand_p def_p;
1550 *relevant_p = false;
1553 /* cond stmt other than loop exit cond. */
1554 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1557 /* changing memory. */
1558 if (TREE_CODE (stmt) != PHI_NODE)
1559 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1561 if (vect_print_dump_info (REPORT_DETAILS))
1562 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1566 /* uses outside the loop. */
1567 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1569 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1571 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1572 if (!flow_bb_inside_loop_p (loop, bb))
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1577 /* We expect all such uses to be in the loop exit phis
1578 (because of loop closed form) */
1579 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1580 gcc_assert (bb == loop->single_exit->dest);
1587 return (*live_p || *relevant_p);
1591 /* Function vect_mark_stmts_to_be_vectorized.
1593 Not all stmts in the loop need to be vectorized. For example:
1602 Stmt 1 and 3 do not need to be vectorized, because loop control and
1603 addressing of vectorized data-refs are handled differently.
1605 This pass detects such stmts. */
1608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1610 VEC(tree,heap) *worklist;
1611 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1612 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1613 unsigned int nbbs = loop->num_nodes;
1614 block_stmt_iterator si;
1619 stmt_vec_info stmt_vinfo;
1622 bool relevant_p, live_p;
1624 enum vect_def_type dt;
1626 if (vect_print_dump_info (REPORT_DETAILS))
1627 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1629 worklist = VEC_alloc (tree, heap, 64);
1631 /* 1. Init worklist. */
1634 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1636 if (vect_print_dump_info (REPORT_DETAILS))
1638 fprintf (vect_dump, "init: phi relevant? ");
1639 print_generic_expr (vect_dump, phi, TDF_SLIM);
1642 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1643 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1646 for (i = 0; i < nbbs; i++)
1649 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1651 stmt = bsi_stmt (si);
1653 if (vect_print_dump_info (REPORT_DETAILS))
1655 fprintf (vect_dump, "init: stmt relevant? ");
1656 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1659 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1660 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1665 /* 2. Process_worklist */
1667 while (VEC_length (tree, worklist) > 0)
1669 stmt = VEC_pop (tree, worklist);
1671 if (vect_print_dump_info (REPORT_DETAILS))
1673 fprintf (vect_dump, "worklist: examine stmt: ");
1674 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1677 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1678 in the loop, mark the stmt that defines it (DEF_STMT) as
1679 relevant/irrelevant and live/dead according to the liveness and
1680 relevance properties of STMT.
1683 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1685 ann = stmt_ann (stmt);
1686 stmt_vinfo = vinfo_for_stmt (stmt);
1688 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1689 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1691 /* Generally, the liveness and relevance properties of STMT are
1692 propagated to the DEF_STMTs of its USEs:
1693 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1694 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1699 If USE is used only for address computations (e.g. array indexing),
1700 which does not need to be directly vectorized, then the
1701 liveness/relevance of the respective DEF_STMT is left unchanged.
1704 If STMT has been identified as defining a reduction variable, then
1707 The last use of STMT is the reduction-variable, which is defined
1708 by a loop-header-phi. We don't want to mark the phi as live or
1709 relevant (because it does not need to be vectorized, it is handled
1710 as part of the vectorization of the reduction), so in this case we
1711 skip the call to vect_mark_relevant.
1713 The rest of the uses of STMT are defined in the loop body. For
1714 the def_stmt of these uses we want to set liveness/relevance
1716 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1717 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1718 because even though STMT is classified as live (since it defines a
1719 value that is used across loop iterations) and irrelevant (since it
1720 is not used inside the loop), it will be vectorized, and therefore
1721 the corresponding DEF_STMTs need to marked as relevant.
1725 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1727 gcc_assert (!relevant_p && live_p);
1732 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1734 /* case 1: we are only interested in uses that need to be vectorized.
1735 Uses that are used for address computation are not considered
1738 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1741 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1743 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1744 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1745 VEC_free (tree, heap, worklist);
1749 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1752 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "worklist: examine use %d: ", i);
1755 print_generic_expr (vect_dump, use, TDF_SLIM);
1758 bb = bb_for_stmt (def_stmt);
1759 if (!flow_bb_inside_loop_p (loop, bb))
1762 /* case 2.1: the reduction-use does not mark the defining-phi
1764 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1765 && TREE_CODE (def_stmt) == PHI_NODE)
1768 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1770 } /* while worklist */
1772 VEC_free (tree, heap, worklist);
1777 /* Function vect_can_advance_ivs_p
1779 In case the number of iterations that LOOP iterates is unknown at compile
1780 time, an epilog loop will be generated, and the loop induction variables
1781 (IVs) will be "advanced" to the value they are supposed to take just before
1782 the epilog loop. Here we check that the access function of the loop IVs
1783 and the expression that represents the loop bound are simple enough.
1784 These restrictions will be relaxed in the future. */
1787 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1789 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1790 basic_block bb = loop->header;
1793 /* Analyze phi functions of the loop header. */
1795 if (vect_print_dump_info (REPORT_DETAILS))
1796 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1798 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1800 tree access_fn = NULL;
1801 tree evolution_part;
1803 if (vect_print_dump_info (REPORT_DETAILS))
1805 fprintf (vect_dump, "Analyze phi: ");
1806 print_generic_expr (vect_dump, phi, TDF_SLIM);
1809 /* Skip virtual phi's. The data dependences that are associated with
1810 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1812 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1814 if (vect_print_dump_info (REPORT_DETAILS))
1815 fprintf (vect_dump, "virtual phi. skip.");
1819 /* Skip reduction phis. */
1821 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1823 if (vect_print_dump_info (REPORT_DETAILS))
1824 fprintf (vect_dump, "reduc phi. skip.");
1828 /* Analyze the evolution function. */
1830 access_fn = instantiate_parameters
1831 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1835 if (vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "No Access function.");
1840 if (vect_print_dump_info (REPORT_DETAILS))
1842 fprintf (vect_dump, "Access function of PHI: ");
1843 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1846 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1848 if (evolution_part == NULL_TREE)
1850 if (vect_print_dump_info (REPORT_DETAILS))
1851 fprintf (vect_dump, "No evolution.");
1855 /* FORNOW: We do not transform initial conditions of IVs
1856 which evolution functions are a polynomial of degree >= 2. */
1858 if (tree_is_chrec (evolution_part))
1866 /* Function vect_get_loop_niters.
1868 Determine how many iterations the loop is executed.
1869 If an expression that represents the number of iterations
1870 can be constructed, place it in NUMBER_OF_ITERATIONS.
1871 Return the loop exit condition. */
1874 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1878 if (vect_print_dump_info (REPORT_DETAILS))
1879 fprintf (vect_dump, "=== get_loop_niters ===");
1881 niters = number_of_iterations_in_loop (loop);
1883 if (niters != NULL_TREE
1884 && niters != chrec_dont_know)
1886 *number_of_iterations = niters;
1888 if (vect_print_dump_info (REPORT_DETAILS))
1890 fprintf (vect_dump, "==> get_loop_niters:" );
1891 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1895 return get_loop_exit_condition (loop);
1899 /* Function vect_analyze_loop_form.
1901 Verify the following restrictions (some may be relaxed in the future):
1902 - it's an inner-most loop
1903 - number of BBs = 2 (which are the loop header and the latch)
1904 - the loop has a pre-header
1905 - the loop has a single entry and exit
1906 - the loop exit condition is simple enough, and the number of iterations
1907 can be analyzed (a countable loop). */
1909 static loop_vec_info
1910 vect_analyze_loop_form (struct loop *loop)
1912 loop_vec_info loop_vinfo;
1914 tree number_of_iterations = NULL;
1916 if (vect_print_dump_info (REPORT_DETAILS))
1917 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1921 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1922 fprintf (vect_dump, "not vectorized: nested loop.");
1926 if (!loop->single_exit
1927 || loop->num_nodes != 2
1928 || EDGE_COUNT (loop->header->preds) != 2)
1930 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1932 if (!loop->single_exit)
1933 fprintf (vect_dump, "not vectorized: multiple exits.");
1934 else if (loop->num_nodes != 2)
1935 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1936 else if (EDGE_COUNT (loop->header->preds) != 2)
1937 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1943 /* We assume that the loop exit condition is at the end of the loop. i.e,
1944 that the loop is represented as a do-while (with a proper if-guard
1945 before the loop if needed), where the loop header contains all the
1946 executable statements, and the latch is empty. */
1947 if (!empty_block_p (loop->latch)
1948 || phi_nodes (loop->latch))
1950 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1951 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1955 /* Make sure there exists a single-predecessor exit bb: */
1956 if (!single_pred_p (loop->single_exit->dest))
1958 edge e = loop->single_exit;
1959 if (!(e->flags & EDGE_ABNORMAL))
1961 split_loop_exit_edge (e);
1962 if (vect_print_dump_info (REPORT_DETAILS))
1963 fprintf (vect_dump, "split exit edge.");
1967 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1968 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1973 if (empty_block_p (loop->header))
1975 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1976 fprintf (vect_dump, "not vectorized: empty loop.");
1980 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1983 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1984 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1988 if (!number_of_iterations)
1990 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1992 "not vectorized: number of iterations cannot be computed.");
1996 if (chrec_contains_undetermined (number_of_iterations))
1998 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1999 fprintf (vect_dump, "Infinite number of iterations.");
2003 loop_vinfo = new_loop_vec_info (loop);
2004 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2006 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2008 if (vect_print_dump_info (REPORT_DETAILS))
2010 fprintf (vect_dump, "Symbolic number of iterations is ");
2011 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2015 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2017 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2018 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2022 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2028 /* Function vect_analyze_loop.
2030 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2031 for it. The different analyses will record information in the
2032 loop_vec_info struct. */
2034 vect_analyze_loop (struct loop *loop)
2037 loop_vec_info loop_vinfo;
2039 if (vect_print_dump_info (REPORT_DETAILS))
2040 fprintf (vect_dump, "===== analyze_loop_nest =====");
2042 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2044 loop_vinfo = vect_analyze_loop_form (loop);
2047 if (vect_print_dump_info (REPORT_DETAILS))
2048 fprintf (vect_dump, "bad loop form.");
2052 /* Find all data references in the loop (which correspond to vdefs/vuses)
2053 and analyze their evolution in the loop.
2055 FORNOW: Handle only simple, array references, which
2056 alignment can be forced, and aligned pointer-references. */
2058 ok = vect_analyze_data_refs (loop_vinfo);
2061 if (vect_print_dump_info (REPORT_DETAILS))
2062 fprintf (vect_dump, "bad data references.");
2063 destroy_loop_vec_info (loop_vinfo);
2067 /* Classify all cross-iteration scalar data-flow cycles.
2068 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2070 vect_analyze_scalar_cycles (loop_vinfo);
2072 vect_pattern_recog (loop_vinfo);
2074 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2076 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2079 if (vect_print_dump_info (REPORT_DETAILS))
2080 fprintf (vect_dump, "unexpected pattern.");
2081 destroy_loop_vec_info (loop_vinfo);
2085 /* Analyze the alignment of the data-refs in the loop.
2086 Fail if a data reference is found that cannot be vectorized. */
2088 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2091 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "bad data alignment.");
2093 destroy_loop_vec_info (loop_vinfo);
2097 ok = vect_determine_vectorization_factor (loop_vinfo);
2100 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "can't determine vectorization factor.");
2102 destroy_loop_vec_info (loop_vinfo);
2106 /* Analyze data dependences between the data-refs in the loop.
2107 FORNOW: fail at the first data dependence that we encounter. */
2109 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2112 if (vect_print_dump_info (REPORT_DETAILS))
2113 fprintf (vect_dump, "bad data dependence.");
2114 destroy_loop_vec_info (loop_vinfo);
2118 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2119 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2121 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2124 if (vect_print_dump_info (REPORT_DETAILS))
2125 fprintf (vect_dump, "bad data access.");
2126 destroy_loop_vec_info (loop_vinfo);
2130 /* This pass will decide on using loop versioning and/or loop peeling in
2131 order to enhance the alignment of data references in the loop. */
2133 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2136 if (vect_print_dump_info (REPORT_DETAILS))
2137 fprintf (vect_dump, "bad data alignment.");
2138 destroy_loop_vec_info (loop_vinfo);
2142 /* Scan all the operations in the loop and make sure they are
2145 ok = vect_analyze_operations (loop_vinfo);
2148 if (vect_print_dump_info (REPORT_DETAILS))
2149 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2150 destroy_loop_vec_info (loop_vinfo);
2154 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;