1 /* Reassociation for trees.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-inline.h"
32 #include "tree-flow.h"
33 #include "tree-gimple.h"
34 #include "tree-dump.h"
36 #include "tree-iterator.h"
37 #include "tree-pass.h"
38 #include "alloc-pool.h"
40 #include "langhooks.h"
42 /* This is a simple global reassociation pass. It is, in part, based
43 on the LLVM pass of the same name (They do some things more/less
44 than we do, in different orders, etc).
46 It consists of five steps:
48 1. Breaking up subtract operations into addition + negate, where
49 it would promote the reassociation of adds.
51 2. Left linearization of the expression trees, so that (A+B)+(C+D)
52 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
53 During linearization, we place the operands of the binary
54 expressions into a vector of operand_entry_t
56 3. Optimization of the operand lists, eliminating things like a +
59 4. Rewrite the expression trees we linearized and optimized so
60 they are in proper rank order.
62 5. Repropagate negates, as nothing else will clean it up ATM.
64 A bit of theory on #4, since nobody seems to write anything down
65 about why it makes sense to do it the way they do it:
67 We could do this much nicer theoretically, but don't (for reasons
68 explained after how to do it theoretically nice :P).
70 In order to promote the most redundancy elimination, you want
71 binary expressions whose operands are the same rank (or
72 preferably, the same value) exposed to the redundancy eliminator,
73 for possible elimination.
75 So the way to do this if we really cared, is to build the new op
76 tree from the leaves to the roots, merging as you go, and putting the
77 new op on the end of the worklist, until you are left with one
78 thing on the worklist.
80 IE if you have to rewrite the following set of operands (listed with
81 rank in parentheses), with opcode PLUS_EXPR:
83 a (1), b (1), c (1), d (2), e (2)
86 We start with our merge worklist empty, and the ops list with all of
89 You want to first merge all leaves of the same rank, as much as
92 So first build a binary op of
94 mergetmp = a + b, and put "mergetmp" on the merge worklist.
96 Because there is no three operand form of PLUS_EXPR, c is not going to
97 be exposed to redundancy elimination as a rank 1 operand.
99 So you might as well throw it on the merge worklist (you could also
100 consider it to now be a rank two operand, and merge it with d and e,
101 but in this case, you then have evicted e from a binary op. So at
102 least in this situation, you can't win.)
104 Then build a binary op of d + e
107 and put mergetmp2 on the merge worklist.
109 so merge worklist = {mergetmp, c, mergetmp2}
111 Continue building binary ops of these operations until you have only
112 one operation left on the worklist.
117 mergetmp3 = mergetmp + c
119 worklist = {mergetmp2, mergetmp3}
121 mergetmp4 = mergetmp2 + mergetmp3
123 worklist = {mergetmp4}
125 because we have one operation left, we can now just set the original
126 statement equal to the result of that operation.
128 This will at least expose a + b and d + e to redundancy elimination
129 as binary operations.
131 For extra points, you can reuse the old statements to build the
132 mergetmps, since you shouldn't run out.
134 So why don't we do this?
136 Because it's expensive, and rarely will help. Most trees we are
137 reassociating have 3 or less ops. If they have 2 ops, they already
138 will be written into a nice single binary op. If you have 3 ops, a
139 single simple check suffices to tell you whether the first two are of the
140 same rank. If so, you know to order it
143 newstmt = mergetmp + op3
147 newstmt = mergetmp + op1
149 If all three are of the same rank, you can't expose them all in a
150 single binary operator anyway, so the above is *still* the best you
153 Thus, this is what we do. When we have three ops left, we check to see
154 what order to put them in, and call it a day. As a nod to vector sum
155 reduction, we check if any of ops are a really a phi node that is a
156 destructive update for the associating op, and keep the destructive
157 update together for vector sum reduction recognition. */
164 int constants_eliminated;
169 /* Operator, rank pair. */
170 typedef struct operand_entry
176 static alloc_pool operand_entry_pool;
179 /* Starting rank number for a given basic block, so that we can rank
180 operations using unmovable instructions in that BB based on the bb
182 static unsigned int *bb_rank;
184 /* Operand->rank hashtable. */
185 static htab_t operand_rank;
188 /* Look up the operand rank structure for expression E. */
190 static operand_entry_t
191 find_operand_rank (tree e)
194 struct operand_entry vrd;
197 slot = htab_find_slot (operand_rank, &vrd, NO_INSERT);
200 return ((operand_entry_t) *slot);
203 /* Insert {E,RANK} into the operand rank hashtable. */
206 insert_operand_rank (tree e, unsigned int rank)
209 operand_entry_t new_pair = pool_alloc (operand_entry_pool);
212 new_pair->rank = rank;
213 slot = htab_find_slot (operand_rank, new_pair, INSERT);
214 gcc_assert (*slot == NULL);
218 /* Return the hash value for a operand rank structure */
221 operand_entry_hash (const void *p)
223 const operand_entry_t vr = (operand_entry_t) p;
224 return iterative_hash_expr (vr->op, 0);
227 /* Return true if two operand rank structures are equal. */
230 operand_entry_eq (const void *p1, const void *p2)
232 const operand_entry_t vr1 = (operand_entry_t) p1;
233 const operand_entry_t vr2 = (operand_entry_t) p2;
234 return vr1->op == vr2->op;
237 /* Given an expression E, return the rank of the expression. */
244 /* Constants have rank 0. */
245 if (is_gimple_min_invariant (e))
248 /* SSA_NAME's have the rank of the expression they are the result
250 For globals and uninitialized values, the rank is 0.
251 For function arguments, use the pre-setup rank.
252 For PHI nodes, stores, asm statements, etc, we use the rank of
254 For simple operations, the rank is the maximum rank of any of
255 its operands, or the bb_rank, whichever is less.
256 I make no claims that this is optimal, however, it gives good
259 if (TREE_CODE (e) == SSA_NAME)
263 unsigned int rank, maxrank;
266 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
267 && e == default_def (SSA_NAME_VAR (e)))
268 return find_operand_rank (e)->rank;
270 stmt = SSA_NAME_DEF_STMT (e);
271 if (bb_for_stmt (stmt) == NULL)
274 if (TREE_CODE (stmt) != MODIFY_EXPR
275 || !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
276 return bb_rank[bb_for_stmt (stmt)->index];
278 /* If we already have a rank for this expression, use that. */
279 vr = find_operand_rank (e);
283 /* Otherwise, find the maximum rank for the operands, or the bb
284 rank, whichever is less. */
286 maxrank = bb_rank[bb_for_stmt(stmt)->index];
287 rhs = TREE_OPERAND (stmt, 1);
288 if (TREE_CODE_LENGTH (TREE_CODE (rhs)) == 0)
289 rank = MAX (rank, get_rank (rhs));
293 i < TREE_CODE_LENGTH (TREE_CODE (rhs))
294 && TREE_OPERAND (rhs, i)
297 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
300 if (dump_file && (dump_flags & TDF_DETAILS))
302 fprintf (dump_file, "Rank for ");
303 print_generic_expr (dump_file, e, 0);
304 fprintf (dump_file, " is %d\n", (rank + 1));
307 /* Note the rank in the hashtable so we don't recompute it. */
308 insert_operand_rank (e, (rank + 1));
312 /* Globals, etc, are rank 0 */
316 DEF_VEC_P(operand_entry_t);
317 DEF_VEC_ALLOC_P(operand_entry_t, heap);
319 /* We want integer ones to end up last no matter what, since they are
320 the ones we can do the most with. */
321 #define INTEGER_CONST_TYPE 1 << 3
322 #define FLOAT_CONST_TYPE 1 << 2
323 #define OTHER_CONST_TYPE 1 << 1
325 /* Classify an invariant tree into integer, float, or other, so that
326 we can sort them to be near other constants of the same type. */
328 constant_type (tree t)
330 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
331 return INTEGER_CONST_TYPE;
332 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
333 return FLOAT_CONST_TYPE;
335 return OTHER_CONST_TYPE;
338 /* qsort comparison function to sort operand entries PA and PB by rank
339 so that the sorted array is ordered by rank in decreasing order. */
341 sort_by_operand_rank (const void *pa, const void *pb)
343 const operand_entry_t oea = *(const operand_entry_t *)pa;
344 const operand_entry_t oeb = *(const operand_entry_t *)pb;
346 /* It's nicer for optimize_expression if constants that are likely
347 to fold when added/multiplied//whatever are put next to each
348 other. Since all constants have rank 0, order them by type. */
349 if (oeb->rank == 0 && oea->rank == 0)
350 return constant_type (oeb->op) - constant_type (oea->op);
352 /* Lastly, make sure the versions that are the same go next to each
353 other. We use SSA_NAME_VERSION because it's stable. */
354 if ((oeb->rank - oea->rank == 0)
355 && TREE_CODE (oea->op) == SSA_NAME
356 && TREE_CODE (oeb->op) == SSA_NAME)
357 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
359 return oeb->rank - oea->rank;
362 /* Add an operand entry to *OPS for the tree operand OP. */
365 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
367 operand_entry_t oe = pool_alloc (operand_entry_pool);
370 oe->rank = get_rank (op);
371 VEC_safe_push (operand_entry_t, heap, *ops, oe);
374 /* Return true if STMT is reassociable operation containing a binary
375 operation with tree code CODE. */
378 is_reassociable_op (tree stmt, enum tree_code code)
380 if (!IS_EMPTY_STMT (stmt)
381 && TREE_CODE (stmt) == MODIFY_EXPR
382 && TREE_CODE (TREE_OPERAND (stmt, 1)) == code
383 && has_single_use (TREE_OPERAND (stmt, 0)))
389 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
390 operand of the negate operation. Otherwise, return NULL. */
393 get_unary_op (tree name, enum tree_code opcode)
395 tree stmt = SSA_NAME_DEF_STMT (name);
398 if (TREE_CODE (stmt) != MODIFY_EXPR)
401 rhs = TREE_OPERAND (stmt, 1);
402 if (TREE_CODE (rhs) == opcode)
403 return TREE_OPERAND (rhs, 0);
407 /* If CURR and LAST are a pair of ops that OPCODE allows us to
408 eliminate through equivalences, do so, remove them from OPS, and
409 return true. Otherwise, return false. */
412 eliminate_duplicate_pair (enum tree_code opcode,
413 VEC (operand_entry_t, heap) **ops,
416 operand_entry_t curr,
417 operand_entry_t last)
420 /* If we have two of the same op, and the opcode is & |, min, or max,
421 we can eliminate one of them.
422 If we have two of the same op, and the opcode is ^, we can
423 eliminate both of them. */
425 if (last && last->op == curr->op)
433 if (dump_file && (dump_flags & TDF_DETAILS))
435 fprintf (dump_file, "Equivalence: ");
436 print_generic_expr (dump_file, curr->op, 0);
437 fprintf (dump_file, " [&|minmax] ");
438 print_generic_expr (dump_file, last->op, 0);
439 fprintf (dump_file, " -> ");
440 print_generic_stmt (dump_file, last->op, 0);
443 VEC_ordered_remove (operand_entry_t, *ops, i);
444 reassociate_stats.ops_eliminated ++;
449 if (dump_file && (dump_flags & TDF_DETAILS))
451 fprintf (dump_file, "Equivalence: ");
452 print_generic_expr (dump_file, curr->op, 0);
453 fprintf (dump_file, " ^ ");
454 print_generic_expr (dump_file, last->op, 0);
455 fprintf (dump_file, " -> nothing\n");
458 reassociate_stats.ops_eliminated += 2;
460 if (VEC_length (operand_entry_t, *ops) == 2)
462 VEC_free (operand_entry_t, heap, *ops);
464 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
470 VEC_ordered_remove (operand_entry_t, *ops, i-1);
471 VEC_ordered_remove (operand_entry_t, *ops, i-1);
483 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
484 look in OPS for a corresponding positive operation to cancel it
485 out. If we find one, remove the other from OPS, replace
486 OPS[CURRINDEX] with 0, and return true. Otherwise, return
490 eliminate_plus_minus_pair (enum tree_code opcode,
491 VEC (operand_entry_t, heap) **ops,
492 unsigned int currindex,
493 operand_entry_t curr)
499 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
502 negateop = get_unary_op (curr->op, NEGATE_EXPR);
503 if (negateop == NULL_TREE)
506 /* Any non-negated version will have a rank that is one less than
507 the current rank. So once we hit those ranks, if we don't find
510 for (i = currindex + 1;
511 VEC_iterate (operand_entry_t, *ops, i, oe)
512 && oe->rank >= curr->rank - 1 ;
515 if (oe->op == negateop)
518 if (dump_file && (dump_flags & TDF_DETAILS))
520 fprintf (dump_file, "Equivalence: ");
521 print_generic_expr (dump_file, negateop, 0);
522 fprintf (dump_file, " + -");
523 print_generic_expr (dump_file, oe->op, 0);
524 fprintf (dump_file, " -> 0\n");
527 VEC_ordered_remove (operand_entry_t, *ops, i);
528 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
530 VEC_ordered_remove (operand_entry_t, *ops, currindex);
531 reassociate_stats.ops_eliminated ++;
540 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
541 bitwise not expression, look in OPS for a corresponding operand to
542 cancel it out. If we find one, remove the other from OPS, replace
543 OPS[CURRINDEX] with 0, and return true. Otherwise, return
547 eliminate_not_pairs (enum tree_code opcode,
548 VEC (operand_entry_t, heap) **ops,
549 unsigned int currindex,
550 operand_entry_t curr)
556 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
557 || TREE_CODE (curr->op) != SSA_NAME)
560 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
561 if (notop == NULL_TREE)
564 /* Any non-not version will have a rank that is one less than
565 the current rank. So once we hit those ranks, if we don't find
568 for (i = currindex + 1;
569 VEC_iterate (operand_entry_t, *ops, i, oe)
570 && oe->rank >= curr->rank - 1;
575 if (dump_file && (dump_flags & TDF_DETAILS))
577 fprintf (dump_file, "Equivalence: ");
578 print_generic_expr (dump_file, notop, 0);
579 if (opcode == BIT_AND_EXPR)
580 fprintf (dump_file, " & ~");
581 else if (opcode == BIT_IOR_EXPR)
582 fprintf (dump_file, " | ~");
583 print_generic_expr (dump_file, oe->op, 0);
584 if (opcode == BIT_AND_EXPR)
585 fprintf (dump_file, " -> 0\n");
586 else if (opcode == BIT_IOR_EXPR)
587 fprintf (dump_file, " -> -1\n");
590 if (opcode == BIT_AND_EXPR)
591 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
592 else if (opcode == BIT_IOR_EXPR)
593 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
594 TYPE_PRECISION (TREE_TYPE (oe->op)));
596 reassociate_stats.ops_eliminated
597 += VEC_length (operand_entry_t, *ops) - 1;
598 VEC_free (operand_entry_t, heap, *ops);
600 VEC_safe_push (operand_entry_t, heap, *ops, oe);
608 /* Use constant value that may be present in OPS to try to eliminate
609 operands. Note that this function is only really used when we've
610 eliminated ops for other reasons, or merged constants. Across
611 single statements, fold already does all of this, plus more. There
612 is little point in duplicating logic, so I've only included the
613 identities that I could ever construct testcases to trigger. */
616 eliminate_using_constants (enum tree_code opcode,
617 VEC(operand_entry_t, heap) **ops)
619 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
621 if (oelast->rank == 0 && INTEGRAL_TYPE_P (TREE_TYPE (oelast->op)))
626 if (integer_zerop (oelast->op))
628 if (VEC_length (operand_entry_t, *ops) != 1)
630 if (dump_file && (dump_flags & TDF_DETAILS))
631 fprintf (dump_file, "Found & 0, removing all other ops\n");
633 reassociate_stats.ops_eliminated
634 += VEC_length (operand_entry_t, *ops) - 1;
636 VEC_free (operand_entry_t, heap, *ops);
638 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
642 else if (integer_all_onesp (oelast->op))
644 if (VEC_length (operand_entry_t, *ops) != 1)
646 if (dump_file && (dump_flags & TDF_DETAILS))
647 fprintf (dump_file, "Found & -1, removing\n");
648 VEC_pop (operand_entry_t, *ops);
649 reassociate_stats.ops_eliminated++;
654 if (integer_all_onesp (oelast->op))
656 if (VEC_length (operand_entry_t, *ops) != 1)
658 if (dump_file && (dump_flags & TDF_DETAILS))
659 fprintf (dump_file, "Found | -1, removing all other ops\n");
661 reassociate_stats.ops_eliminated
662 += VEC_length (operand_entry_t, *ops) - 1;
664 VEC_free (operand_entry_t, heap, *ops);
666 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
670 else if (integer_zerop (oelast->op))
672 if (VEC_length (operand_entry_t, *ops) != 1)
674 if (dump_file && (dump_flags & TDF_DETAILS))
675 fprintf (dump_file, "Found | 0, removing\n");
676 VEC_pop (operand_entry_t, *ops);
677 reassociate_stats.ops_eliminated++;
682 if (integer_zerop (oelast->op))
684 if (VEC_length (operand_entry_t, *ops) != 1)
686 if (dump_file && (dump_flags & TDF_DETAILS))
687 fprintf (dump_file, "Found * 0, removing all other ops\n");
689 reassociate_stats.ops_eliminated
690 += VEC_length (operand_entry_t, *ops) - 1;
691 VEC_free (operand_entry_t, heap, *ops);
693 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
697 else if (integer_onep (oelast->op))
699 if (VEC_length (operand_entry_t, *ops) != 1)
701 if (dump_file && (dump_flags & TDF_DETAILS))
702 fprintf (dump_file, "Found * 1, removing\n");
703 VEC_pop (operand_entry_t, *ops);
704 reassociate_stats.ops_eliminated++;
712 if (integer_zerop (oelast->op))
714 if (VEC_length (operand_entry_t, *ops) != 1)
716 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "Found [|^+] 0, removing\n");
718 VEC_pop (operand_entry_t, *ops);
719 reassociate_stats.ops_eliminated++;
730 /* Perform various identities and other optimizations on the list of
731 operand entries, stored in OPS. The tree code for the binary
732 operation between all the operands is OPCODE. */
735 optimize_ops_list (enum tree_code opcode,
736 VEC (operand_entry_t, heap) **ops)
738 unsigned int length = VEC_length (operand_entry_t, *ops);
741 operand_entry_t oelast = NULL;
742 bool iterate = false;
747 oelast = VEC_last (operand_entry_t, *ops);
749 /* If the last two are constants, pop the constants off, merge them
750 and try the next two. */
751 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
753 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
756 && is_gimple_min_invariant (oelm1->op)
757 && lang_hooks.types_compatible_p (TREE_TYPE (oelm1->op),
758 TREE_TYPE (oelast->op)))
760 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
761 oelm1->op, oelast->op);
763 if (folded && is_gimple_min_invariant (folded))
765 if (dump_file && (dump_flags & TDF_DETAILS))
766 fprintf (dump_file, "Merging constants\n");
768 VEC_pop (operand_entry_t, *ops);
769 VEC_pop (operand_entry_t, *ops);
771 add_to_ops_vec (ops, folded);
772 reassociate_stats.constants_eliminated++;
774 optimize_ops_list (opcode, ops);
780 eliminate_using_constants (opcode, ops);
783 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
787 if (eliminate_not_pairs (opcode, ops, i, oe))
789 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
790 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
802 length = VEC_length (operand_entry_t, *ops);
803 oelast = VEC_last (operand_entry_t, *ops);
806 optimize_ops_list (opcode, ops);
809 /* Return true if OPERAND is defined by a PHI node which uses the LHS
810 of STMT in it's operands. This is also known as a "destructive
811 update" operation. */
814 is_phi_for_stmt (tree stmt, tree operand)
817 tree lhs = TREE_OPERAND (stmt, 0);
821 if (TREE_CODE (operand) != SSA_NAME)
824 def_stmt = SSA_NAME_DEF_STMT (operand);
825 if (TREE_CODE (def_stmt) != PHI_NODE)
828 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
829 if (lhs == USE_FROM_PTR (arg_p))
834 /* Recursively rewrite our linearized statements so that the operators
835 match those in OPS[OPINDEX], putting the computation in rank
839 rewrite_expr_tree (tree stmt, unsigned int opindex,
840 VEC(operand_entry_t, heap) * ops)
842 tree rhs = TREE_OPERAND (stmt, 1);
845 /* If we have three operands left, then we want to make sure the one
846 that gets the double binary op are the ones with the same rank.
848 The alternative we try is to see if this is a destructive
849 update style statement, which is like:
852 In that case, we want to use the destructive update form to
853 expose the possible vectorizer sum reduction opportunity.
854 In that case, the third operand will be the phi node.
856 We could, of course, try to be better as noted above, and do a
857 lot of work to try to find these opportunities in >3 operand
858 cases, but it is unlikely to be worth it. */
859 if (opindex + 3 == VEC_length (operand_entry_t, ops))
861 operand_entry_t oe1, oe2, oe3;
863 oe1 = VEC_index (operand_entry_t, ops, opindex);
864 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
865 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
867 if ((oe1->rank == oe2->rank
868 && oe2->rank != oe3->rank)
869 || (is_phi_for_stmt (stmt, oe3->op)
870 && !is_phi_for_stmt (stmt, oe1->op)
871 && !is_phi_for_stmt (stmt, oe2->op)))
873 struct operand_entry temp = *oe3;
875 oe3->rank = oe1->rank;
877 oe1->rank= temp.rank;
881 /* The final recursion case for this function is that you have
882 exactly two operations left.
883 If we had one exactly one op in the entire list to start with, we
884 would have never called this function, and the tail recursion
885 rewrites them one at a time. */
886 if (opindex + 2 == VEC_length (operand_entry_t, ops))
888 operand_entry_t oe1, oe2;
890 oe1 = VEC_index (operand_entry_t, ops, opindex);
891 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
893 if (TREE_OPERAND (rhs, 0) != oe1->op
894 || TREE_OPERAND (rhs, 1) != oe2->op)
897 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Transforming ");
900 print_generic_expr (dump_file, rhs, 0);
903 TREE_OPERAND (rhs, 0) = oe1->op;
904 TREE_OPERAND (rhs, 1) = oe2->op;
907 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file, " into ");
910 print_generic_stmt (dump_file, rhs, 0);
917 /* If we hit here, we should have 3 or more ops left. */
918 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
920 /* Rewrite the next operator. */
921 oe = VEC_index (operand_entry_t, ops, opindex);
923 if (oe->op != TREE_OPERAND (rhs, 1))
926 if (dump_file && (dump_flags & TDF_DETAILS))
928 fprintf (dump_file, "Transforming ");
929 print_generic_expr (dump_file, rhs, 0);
932 TREE_OPERAND (rhs, 1) = oe->op;
935 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, " into ");
938 print_generic_stmt (dump_file, rhs, 0);
941 /* Recurse on the LHS of the binary operator, which is guaranteed to
942 be the non-leaf side. */
943 rewrite_expr_tree (SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)),
947 /* Transform STMT, which is really (A +B) + (C + D) into the left
948 linear form, ((A+B)+C)+D.
949 Recurse on D if necessary. */
952 linearize_expr (tree stmt)
954 block_stmt_iterator bsinow, bsirhs;
955 tree rhs = TREE_OPERAND (stmt, 1);
956 enum tree_code rhscode = TREE_CODE (rhs);
957 tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
958 tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
959 tree newbinrhs = NULL_TREE;
961 gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs))
962 && is_reassociable_op (binrhs, TREE_CODE (rhs)));
964 bsinow = bsi_for_stmt (stmt);
965 bsirhs = bsi_for_stmt (binrhs);
966 bsi_move_before (&bsirhs, &bsinow);
968 TREE_OPERAND (rhs, 1) = TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0);
969 if (TREE_CODE (TREE_OPERAND (rhs, 1)) == SSA_NAME)
970 newbinrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
971 TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0) = TREE_OPERAND (binlhs, 0);
972 TREE_OPERAND (rhs, 0) = TREE_OPERAND (binrhs, 0);
974 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Linearized: ");
977 print_generic_stmt (dump_file, rhs, 0);
980 reassociate_stats.linearized++;
981 update_stmt (binrhs);
982 update_stmt (binlhs);
984 TREE_VISITED (binrhs) = 1;
985 TREE_VISITED (binlhs) = 1;
986 TREE_VISITED (stmt) = 1;
988 /* Tail recurse on the new rhs if it still needs reassociation. */
989 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode))
990 linearize_expr (stmt);
994 /* If LHS has a single immediate use that is a MODIFY_EXPR, return
995 it. Otherwise, return NULL. */
998 get_single_immediate_use (tree lhs)
1000 use_operand_p immuse;
1003 if (TREE_CODE (lhs) == SSA_NAME
1004 && single_imm_use (lhs, &immuse, &immusestmt))
1006 if (TREE_CODE (immusestmt) == RETURN_EXPR)
1007 immusestmt = TREE_OPERAND (immusestmt, 0);
1008 if (TREE_CODE (immusestmt) == MODIFY_EXPR)
1013 static VEC(tree, heap) *broken_up_subtracts;
1016 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1017 representing the negated value. Insertions of any necessary
1018 instructions go before BSI.
1019 This function is recursive in that, if you hand it "a_5" as the
1020 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1021 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1024 negate_value (tree tonegate, block_stmt_iterator *bsi)
1026 tree negatedef = tonegate;
1027 tree resultofnegate;
1029 if (TREE_CODE (tonegate) == SSA_NAME)
1030 negatedef = SSA_NAME_DEF_STMT (tonegate);
1032 /* If we are trying to negate a name, defined by an add, negate the
1033 add operands instead. */
1034 if (TREE_CODE (tonegate) == SSA_NAME
1035 && TREE_CODE (negatedef) == MODIFY_EXPR
1036 && TREE_CODE (TREE_OPERAND (negatedef, 0)) == SSA_NAME
1037 && has_single_use (TREE_OPERAND (negatedef, 0))
1038 && TREE_CODE (TREE_OPERAND (negatedef, 1)) == PLUS_EXPR)
1040 block_stmt_iterator bsi;
1041 tree binop = TREE_OPERAND (negatedef, 1);
1043 bsi = bsi_for_stmt (negatedef);
1044 TREE_OPERAND (binop, 0) = negate_value (TREE_OPERAND (binop, 0),
1046 bsi = bsi_for_stmt (negatedef);
1047 TREE_OPERAND (binop, 1) = negate_value (TREE_OPERAND (binop, 1),
1049 update_stmt (negatedef);
1050 return TREE_OPERAND (negatedef, 0);
1053 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1054 resultofnegate = force_gimple_operand_bsi (bsi, tonegate, true,
1056 VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate);
1057 return resultofnegate;
1061 /* Return true if we should break up the subtract in STMT into an add
1062 with negate. This is true when we the subtract operands are really
1063 adds, or the subtract itself is used in an add expression. In
1064 either case, breaking up the subtract into an add with negate
1065 exposes the adds to reassociation. */
1068 should_break_up_subtract (tree stmt)
1071 tree lhs = TREE_OPERAND (stmt, 0);
1072 tree rhs = TREE_OPERAND (stmt, 1);
1073 tree binlhs = TREE_OPERAND (rhs, 0);
1074 tree binrhs = TREE_OPERAND (rhs, 1);
1077 if (TREE_CODE (binlhs) == SSA_NAME
1078 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR))
1081 if (TREE_CODE (binrhs) == SSA_NAME
1082 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR))
1085 if (TREE_CODE (lhs) == SSA_NAME
1086 && (immusestmt = get_single_immediate_use (lhs))
1087 && TREE_CODE (TREE_OPERAND (immusestmt, 1)) == PLUS_EXPR)
1093 /* Transform STMT from A - B into A + -B. */
1096 break_up_subtract (tree stmt, block_stmt_iterator *bsi)
1098 tree rhs = TREE_OPERAND (stmt, 1);
1100 if (dump_file && (dump_flags & TDF_DETAILS))
1102 fprintf (dump_file, "Breaking up subtract ");
1103 print_generic_stmt (dump_file, stmt, 0);
1106 TREE_SET_CODE (TREE_OPERAND (stmt, 1), PLUS_EXPR);
1107 TREE_OPERAND (rhs, 1) = negate_value (TREE_OPERAND (rhs, 1), bsi);
1112 /* Recursively linearize a binary expression that is the RHS of STMT.
1113 Place the operands of the expression tree in the vector named OPS. */
1116 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, tree stmt)
1118 block_stmt_iterator bsinow, bsilhs;
1119 tree rhs = TREE_OPERAND (stmt, 1);
1120 tree binrhs = TREE_OPERAND (rhs, 1);
1121 tree binlhs = TREE_OPERAND (rhs, 0);
1122 tree binlhsdef, binrhsdef;
1123 bool binlhsisreassoc = false;
1124 bool binrhsisreassoc = false;
1125 enum tree_code rhscode = TREE_CODE (rhs);
1127 TREE_VISITED (stmt) = 1;
1129 if (TREE_CODE (binlhs) == SSA_NAME)
1131 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1132 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode);
1135 if (TREE_CODE (binrhs) == SSA_NAME)
1137 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1138 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode);
1141 /* If the LHS is not reassociable, but the RHS is, we need to swap
1142 them. If neither is reassociable, there is nothing we can do, so
1143 just put them in the ops vector. If the LHS is reassociable,
1144 linearize it. If both are reassociable, then linearize the RHS
1147 if (!binlhsisreassoc)
1151 if (!binrhsisreassoc)
1153 add_to_ops_vec (ops, binrhs);
1154 add_to_ops_vec (ops, binlhs);
1158 if (dump_file && (dump_flags & TDF_DETAILS))
1160 fprintf (dump_file, "swapping operands of ");
1161 print_generic_expr (dump_file, stmt, 0);
1164 swap_tree_operands (stmt, &TREE_OPERAND (rhs, 0),
1165 &TREE_OPERAND (rhs, 1));
1168 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, " is now ");
1171 print_generic_stmt (dump_file, stmt, 0);
1174 /* We want to make it so the lhs is always the reassociative op,
1180 else if (binrhsisreassoc)
1182 linearize_expr (stmt);
1183 gcc_assert (rhs == TREE_OPERAND (stmt, 1));
1184 binlhs = TREE_OPERAND (rhs, 0);
1185 binrhs = TREE_OPERAND (rhs, 1);
1188 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1189 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode));
1190 bsinow = bsi_for_stmt (stmt);
1191 bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
1192 bsi_move_before (&bsilhs, &bsinow);
1193 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs));
1194 add_to_ops_vec (ops, binrhs);
1197 /* Repropagate the negates back into subtracts, since no other pass
1198 currently does it. */
1201 repropagate_negates (void)
1206 for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++)
1208 tree user = get_single_immediate_use (negate);
1210 /* The negate operand can be either operand of a PLUS_EXPR
1211 (it can be the LHS if the RHS is a constant for example).
1213 Force the negate operand to the RHS of the PLUS_EXPR, then
1214 transform the PLUS_EXPR into a MINUS_EXPR. */
1216 && TREE_CODE (user) == MODIFY_EXPR
1217 && TREE_CODE (TREE_OPERAND (user, 1)) == PLUS_EXPR)
1219 tree rhs = TREE_OPERAND (user, 1);
1221 /* If the negated operand appears on the LHS of the
1222 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1223 to force the negated operand to the RHS of the PLUS_EXPR. */
1224 if (TREE_OPERAND (TREE_OPERAND (user, 1), 0) == negate)
1226 tree temp = TREE_OPERAND (rhs, 0);
1227 TREE_OPERAND (rhs, 0) = TREE_OPERAND (rhs, 1);
1228 TREE_OPERAND (rhs, 1) = temp;
1231 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1232 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1233 if (TREE_OPERAND (TREE_OPERAND (user, 1), 1) == negate)
1235 TREE_SET_CODE (rhs, MINUS_EXPR);
1236 TREE_OPERAND (rhs, 1) = get_unary_op (negate, NEGATE_EXPR);
1243 /* Break up subtract operations in block BB.
1245 We do this top down because we don't know whether the subtract is
1246 part of a possible chain of reassociation except at the top.
1255 we want to break up k = t - q, but we won't until we've transformed q
1256 = b - r, which won't be broken up until we transform b = c - d. */
1259 break_up_subtract_bb (basic_block bb)
1261 block_stmt_iterator bsi;
1264 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1266 tree stmt = bsi_stmt (bsi);
1268 if (TREE_CODE (stmt) == MODIFY_EXPR)
1270 tree lhs = TREE_OPERAND (stmt, 0);
1271 tree rhs = TREE_OPERAND (stmt, 1);
1273 TREE_VISITED (stmt) = 0;
1274 /* If unsafe math optimizations we can do reassociation for
1275 non-integral types. */
1276 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1277 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
1278 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
1279 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
1280 || !flag_unsafe_math_optimizations))
1283 /* Check for a subtract used only in an addition. If this
1284 is the case, transform it into add of a negate for better
1285 reassociation. IE transform C = A-B into C = A + -B if C
1286 is only used in an addition. */
1287 if (TREE_CODE (rhs) == MINUS_EXPR)
1288 if (should_break_up_subtract (stmt))
1289 break_up_subtract (stmt, &bsi);
1292 for (son = first_dom_son (CDI_DOMINATORS, bb);
1294 son = next_dom_son (CDI_DOMINATORS, son))
1295 break_up_subtract_bb (son);
1298 /* Reassociate expressions in basic block BB and its post-dominator as
1302 reassociate_bb (basic_block bb)
1304 block_stmt_iterator bsi;
1307 for (bsi = bsi_last (bb); !bsi_end_p (bsi); bsi_prev (&bsi))
1309 tree stmt = bsi_stmt (bsi);
1311 if (TREE_CODE (stmt) == MODIFY_EXPR)
1313 tree lhs = TREE_OPERAND (stmt, 0);
1314 tree rhs = TREE_OPERAND (stmt, 1);
1316 /* If this was part of an already processed tree, we don't
1317 need to touch it again. */
1318 if (TREE_VISITED (stmt))
1321 /* If unsafe math optimizations we can do reassociation for
1322 non-integral types. */
1323 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1324 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
1325 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
1326 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
1327 || !flag_unsafe_math_optimizations))
1330 if (associative_tree_code (TREE_CODE (rhs)))
1332 VEC(operand_entry_t, heap) *ops = NULL;
1334 /* There may be no immediate uses left by the time we
1335 get here because we may have eliminated them all. */
1336 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
1339 TREE_VISITED (stmt) = 1;
1340 linearize_expr_tree (&ops, stmt);
1341 qsort (VEC_address (operand_entry_t, ops),
1342 VEC_length (operand_entry_t, ops),
1343 sizeof (operand_entry_t),
1344 sort_by_operand_rank);
1345 optimize_ops_list (TREE_CODE (rhs), &ops);
1347 if (VEC_length (operand_entry_t, ops) == 1)
1349 if (dump_file && (dump_flags & TDF_DETAILS))
1351 fprintf (dump_file, "Transforming ");
1352 print_generic_expr (dump_file, rhs, 0);
1354 TREE_OPERAND (stmt, 1) = VEC_last (operand_entry_t, ops)->op;
1357 if (dump_file && (dump_flags & TDF_DETAILS))
1359 fprintf (dump_file, " into ");
1360 print_generic_stmt (dump_file,
1361 TREE_OPERAND (stmt, 1), 0);
1366 rewrite_expr_tree (stmt, 0, ops);
1369 VEC_free (operand_entry_t, heap, ops);
1373 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
1375 son = next_dom_son (CDI_POST_DOMINATORS, son))
1376 reassociate_bb (son);
1379 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
1380 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
1382 /* Dump the operand entry vector OPS to FILE. */
1385 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
1390 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
1392 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
1393 print_generic_stmt (file, oe->op, 0);
1397 /* Dump the operand entry vector OPS to STDERR. */
1400 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
1402 dump_ops_vector (stderr, ops);
1408 break_up_subtract_bb (ENTRY_BLOCK_PTR);
1409 reassociate_bb (EXIT_BLOCK_PTR);
1412 /* Initialize the reassociation pass. */
1418 unsigned int rank = 2;
1420 int *bbs = XNEWVEC (int, last_basic_block + 1);
1422 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
1424 operand_entry_pool = create_alloc_pool ("operand entry pool",
1425 sizeof (struct operand_entry), 30);
1427 /* Reverse RPO (Reverse Post Order) will give us something where
1428 deeper loops come later. */
1429 pre_and_rev_post_order_compute (NULL, bbs, false);
1430 bb_rank = XCNEWVEC (unsigned int, last_basic_block + 1);
1432 operand_rank = htab_create (511, operand_entry_hash,
1433 operand_entry_eq, 0);
1435 /* Give each argument a distinct rank. */
1436 for (param = DECL_ARGUMENTS (current_function_decl);
1438 param = TREE_CHAIN (param))
1440 if (default_def (param) != NULL)
1442 tree def = default_def (param);
1443 insert_operand_rank (def, ++rank);
1447 /* Give the chain decl a distinct rank. */
1448 if (cfun->static_chain_decl != NULL)
1450 tree def = default_def (cfun->static_chain_decl);
1452 insert_operand_rank (def, ++rank);
1455 /* Set up rank for each BB */
1456 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
1457 bb_rank[bbs[i]] = ++rank << 16;
1460 calculate_dominance_info (CDI_DOMINATORS);
1461 calculate_dominance_info (CDI_POST_DOMINATORS);
1462 broken_up_subtracts = NULL;
1465 /* Cleanup after the reassociation pass, and print stats if
1472 if (dump_file && (dump_flags & TDF_STATS))
1474 fprintf (dump_file, "Reassociation stats:\n");
1475 fprintf (dump_file, "Linearized: %d\n",
1476 reassociate_stats.linearized);
1477 fprintf (dump_file, "Constants eliminated: %d\n",
1478 reassociate_stats.constants_eliminated);
1479 fprintf (dump_file, "Ops eliminated: %d\n",
1480 reassociate_stats.ops_eliminated);
1481 fprintf (dump_file, "Statements rewritten: %d\n",
1482 reassociate_stats.rewritten);
1484 htab_delete (operand_rank);
1486 free_alloc_pool (operand_entry_pool);
1488 VEC_free (tree, heap, broken_up_subtracts);
1489 free_dominance_info (CDI_POST_DOMINATORS);
1492 /* Gate and execute functions for Reassociation. */
1495 execute_reassoc (void)
1500 repropagate_negates ();
1506 struct tree_opt_pass pass_reassoc =
1508 "reassoc", /* name */
1510 execute_reassoc, /* execute */
1513 0, /* static_pass_number */
1514 TV_TREE_REASSOC, /* tv_id */
1515 PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
1516 0, /* properties_provided */
1517 0, /* properties_destroyed */
1518 0, /* todo_flags_start */
1519 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */