1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "cfglayout.h"
64 #include "sched-int.h"
66 /* Define when we want to do count REG_DEAD notes before and after scheduling
67 for sanity checking. We can't do that when conditional execution is used,
68 as REG_DEAD exist only for unconditional deaths. */
70 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
71 #define CHECK_DEAD_NOTES 1
73 #define CHECK_DEAD_NOTES 0
77 #ifdef INSN_SCHEDULING
78 /* Some accessor macros for h_i_d members only used within this file. */
79 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
80 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
81 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
83 #define MAX_RGN_BLOCKS 10
84 #define MAX_RGN_INSNS 100
86 /* nr_inter/spec counts interblock/speculative motion for the function. */
87 static int nr_inter, nr_spec;
89 /* Control flow graph edges are kept in circular lists. */
98 static haifa_edge *edge_table;
100 #define NEXT_IN(edge) (edge_table[edge].next_in)
101 #define NEXT_OUT(edge) (edge_table[edge].next_out)
102 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
103 #define TO_BLOCK(edge) (edge_table[edge].to_block)
105 /* Number of edges in the control flow graph. (In fact, larger than
106 that by 1, since edge 0 is unused.) */
109 /* Circular list of incoming/outgoing edges of a block. */
110 static int *in_edges;
111 static int *out_edges;
113 #define IN_EDGES(block) (in_edges[block])
114 #define OUT_EDGES(block) (out_edges[block])
116 static int is_cfg_nonregular PARAMS ((void));
117 static int build_control_flow PARAMS ((struct edge_list *));
118 static void new_edge PARAMS ((int, int));
120 /* A region is the main entity for interblock scheduling: insns
121 are allowed to move between blocks in the same region, along
122 control flow graph edges, in the 'up' direction. */
125 int rgn_nr_blocks; /* Number of blocks in region. */
126 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
130 /* Number of regions in the procedure. */
131 static int nr_regions;
133 /* Table of region descriptions. */
134 static region *rgn_table;
136 /* Array of lists of regions' blocks. */
137 static int *rgn_bb_table;
139 /* Topological order of blocks in the region (if b2 is reachable from
140 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
141 always referred to by either block or b, while its topological
142 order name (in the region) is refered to by bb. */
143 static int *block_to_bb;
145 /* The number of the region containing a block. */
146 static int *containing_rgn;
148 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
149 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
150 #define BLOCK_TO_BB(block) (block_to_bb[block])
151 #define CONTAINING_RGN(block) (containing_rgn[block])
153 void debug_regions PARAMS ((void));
154 static void find_single_block_region PARAMS ((void));
155 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
156 static int too_large PARAMS ((int, int *, int *));
158 extern void debug_live PARAMS ((int, int));
160 /* Blocks of the current region being scheduled. */
161 static int current_nr_blocks;
162 static int current_blocks;
164 /* The mapping from bb to block. */
165 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
169 int *first_member; /* Pointer to the list start in bitlst_table. */
170 int nr_members; /* The number of members of the bit list. */
174 static int bitlst_table_last;
175 static int bitlst_table_size;
176 static int *bitlst_table;
178 static void extract_bitlst PARAMS ((sbitmap, bitlst *));
180 /* Target info declarations.
182 The block currently being scheduled is referred to as the "target" block,
183 while other blocks in the region from which insns can be moved to the
184 target are called "source" blocks. The candidate structure holds info
185 about such sources: are they valid? Speculative? Etc. */
186 typedef bitlst bblst;
197 static candidate *candidate_table;
199 /* A speculative motion requires checking live information on the path
200 from 'source' to 'target'. The split blocks are those to be checked.
201 After a speculative motion, live information should be modified in
204 Lists of split and update blocks for each candidate of the current
205 target are in array bblst_table. */
206 static int *bblst_table, bblst_size, bblst_last;
208 #define IS_VALID(src) ( candidate_table[src].is_valid )
209 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
210 #define SRC_PROB(src) ( candidate_table[src].src_prob )
212 /* The bb being currently scheduled. */
213 static int target_bb;
216 typedef bitlst edgelst;
218 /* Target info functions. */
219 static void split_edges PARAMS ((int, int, edgelst *));
220 static void compute_trg_info PARAMS ((int));
221 void debug_candidate PARAMS ((int));
222 void debug_candidates PARAMS ((int));
224 /* Dominators array: dom[i] contains the sbitmap of dominators of
225 bb i in the region. */
228 /* bb 0 is the only region entry. */
229 #define IS_RGN_ENTRY(bb) (!bb)
231 /* Is bb_src dominated by bb_trg. */
232 #define IS_DOMINATED(bb_src, bb_trg) \
233 ( TEST_BIT (dom[bb_src], bb_trg) )
235 /* Probability: Prob[i] is a float in [0, 1] which is the probability
236 of bb i relative to the region entry. */
239 /* The probability of bb_src, relative to bb_trg. Note, that while the
240 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
245 /* Bit-set of edges, where bit i stands for edge i. */
246 typedef sbitmap edgeset;
248 /* Number of edges in the region. */
249 static int rgn_nr_edges;
251 /* Array of size rgn_nr_edges. */
252 static int *rgn_edges;
255 /* Mapping from each edge in the graph to its number in the rgn. */
256 static int *edge_to_bit;
257 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
259 /* The split edges of a source bb is different for each target
260 bb. In order to compute this efficiently, the 'potential-split edges'
261 are computed for each bb prior to scheduling a region. This is actually
262 the split edges of each bb relative to the region entry.
264 pot_split[bb] is the set of potential split edges of bb. */
265 static edgeset *pot_split;
267 /* For every bb, a set of its ancestor edges. */
268 static edgeset *ancestor_edges;
270 static void compute_dom_prob_ps PARAMS ((int));
272 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
273 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
277 /* Parameters affecting the decision of rank_for_schedule().
278 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
279 #define MIN_DIFF_PRIORITY 2
280 #define MIN_PROBABILITY 40
281 #define MIN_PROB_DIFF 10
283 /* Speculative scheduling functions. */
284 static int check_live_1 PARAMS ((int, rtx));
285 static void update_live_1 PARAMS ((int, rtx));
286 static int check_live PARAMS ((rtx, int));
287 static void update_live PARAMS ((rtx, int));
288 static void set_spec_fed PARAMS ((rtx));
289 static int is_pfree PARAMS ((rtx, int, int));
290 static int find_conditional_protection PARAMS ((rtx, int));
291 static int is_conditionally_protected PARAMS ((rtx, int, int));
292 static int may_trap_exp PARAMS ((rtx, int));
293 static int haifa_classify_insn PARAMS ((rtx));
294 static int is_prisky PARAMS ((rtx, int, int));
295 static int is_exception_free PARAMS ((rtx, int, int));
297 static void add_branch_dependences PARAMS ((rtx, rtx));
298 static void compute_block_backward_dependences PARAMS ((int));
299 void debug_dependencies PARAMS ((void));
301 static void init_regions PARAMS ((void));
302 static void schedule_region PARAMS ((int));
303 static rtx concat_INSN_LIST PARAMS ((rtx, rtx));
304 static void concat_insn_mem_list PARAMS ((rtx, rtx, rtx *, rtx *));
305 static void propagate_deps PARAMS ((int, struct deps *));
306 static void free_pending_lists PARAMS ((void));
308 /* Functions for construction of the control flow graph. */
310 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
312 We decide not to build the control flow graph if there is possibly more
313 than one entry to the function, if computed branches exist, of if we
314 have nonlocal gotos. */
323 /* If we have a label that could be the target of a nonlocal goto, then
324 the cfg is not well structured. */
325 if (nonlocal_goto_handler_labels)
328 /* If we have any forced labels, then the cfg is not well structured. */
332 /* If this function has a computed jump, then we consider the cfg
333 not well structured. */
334 if (current_function_has_computed_jump)
337 /* If we have exception handlers, then we consider the cfg not well
338 structured. ?!? We should be able to handle this now that flow.c
339 computes an accurate cfg for EH. */
340 if (exception_handler_labels)
343 /* If we have non-jumping insns which refer to labels, then we consider
344 the cfg not well structured. */
345 /* Check for labels referred to other thn by jumps. */
346 for (b = 0; b < n_basic_blocks; b++)
347 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
349 code = GET_CODE (insn);
350 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
352 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
355 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
356 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
361 if (insn == BLOCK_END (b))
365 /* All the tests passed. Consider the cfg well structured. */
369 /* Build the control flow graph and set nr_edges.
371 Instead of trying to build a cfg ourselves, we rely on flow to
372 do it for us. Stamp out useless code (and bug) duplication.
374 Return nonzero if an irregularity in the cfg is found which would
375 prevent cross block scheduling. */
378 build_control_flow (edge_list)
379 struct edge_list *edge_list;
381 int i, unreachable, num_edges;
383 /* This already accounts for entry/exit edges. */
384 num_edges = NUM_EDGES (edge_list);
386 /* Unreachable loops with more than one basic block are detected
387 during the DFS traversal in find_rgns.
389 Unreachable loops with a single block are detected here. This
390 test is redundant with the one in find_rgns, but it's much
391 cheaper to go ahead and catch the trivial case here. */
393 for (i = 0; i < n_basic_blocks; i++)
395 basic_block b = BASIC_BLOCK (i);
398 || (b->pred->src == b
399 && b->pred->pred_next == NULL))
403 /* ??? We can kill these soon. */
404 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
405 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
406 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
409 for (i = 0; i < num_edges; i++)
411 edge e = INDEX_EDGE (edge_list, i);
413 if (e->dest != EXIT_BLOCK_PTR
414 && e->src != ENTRY_BLOCK_PTR)
415 new_edge (e->src->index, e->dest->index);
418 /* Increment by 1, since edge 0 is unused. */
424 /* Record an edge in the control flow graph from SOURCE to TARGET.
426 In theory, this is redundant with the s_succs computed above, but
427 we have not converted all of haifa to use information from the
431 new_edge (source, target)
435 int curr_edge, fst_edge;
437 /* Check for duplicates. */
438 fst_edge = curr_edge = OUT_EDGES (source);
441 if (FROM_BLOCK (curr_edge) == source
442 && TO_BLOCK (curr_edge) == target)
447 curr_edge = NEXT_OUT (curr_edge);
449 if (fst_edge == curr_edge)
455 FROM_BLOCK (e) = source;
456 TO_BLOCK (e) = target;
458 if (OUT_EDGES (source))
460 next_edge = NEXT_OUT (OUT_EDGES (source));
461 NEXT_OUT (OUT_EDGES (source)) = e;
462 NEXT_OUT (e) = next_edge;
466 OUT_EDGES (source) = e;
470 if (IN_EDGES (target))
472 next_edge = NEXT_IN (IN_EDGES (target));
473 NEXT_IN (IN_EDGES (target)) = e;
474 NEXT_IN (e) = next_edge;
478 IN_EDGES (target) = e;
483 /* Translate a bit-set SET to a list BL of the bit-set members. */
486 extract_bitlst (set, bl)
492 /* bblst table space is reused in each call to extract_bitlst. */
493 bitlst_table_last = 0;
495 bl->first_member = &bitlst_table[bitlst_table_last];
498 /* Iterate over each word in the bitset. */
499 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
501 bitlst_table[bitlst_table_last++] = i;
507 /* Functions for the construction of regions. */
509 /* Print the regions, for debugging purposes. Callable from debugger. */
516 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
517 for (rgn = 0; rgn < nr_regions; rgn++)
519 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
520 rgn_table[rgn].rgn_nr_blocks);
521 fprintf (sched_dump, ";;\tbb/block: ");
523 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
525 current_blocks = RGN_BLOCKS (rgn);
527 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
530 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
533 fprintf (sched_dump, "\n\n");
537 /* Build a single block region for each basic block in the function.
538 This allows for using the same code for interblock and basic block
542 find_single_block_region ()
546 for (i = 0; i < n_basic_blocks; i++)
549 RGN_NR_BLOCKS (i) = 1;
551 CONTAINING_RGN (i) = i;
554 nr_regions = n_basic_blocks;
557 /* Update number of blocks and the estimate for number of insns
558 in the region. Return 1 if the region is "too large" for interblock
559 scheduling (compile time considerations), otherwise return 0. */
562 too_large (block, num_bbs, num_insns)
563 int block, *num_bbs, *num_insns;
566 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
567 INSN_LUID (BLOCK_HEAD (block)));
568 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
574 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
575 is still an inner loop. Put in max_hdr[blk] the header of the most inner
576 loop containing blk. */
577 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
579 if (max_hdr[blk] == -1) \
580 max_hdr[blk] = hdr; \
581 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
582 RESET_BIT (inner, hdr); \
583 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
585 RESET_BIT (inner,max_hdr[blk]); \
586 max_hdr[blk] = hdr; \
590 /* Find regions for interblock scheduling.
592 A region for scheduling can be:
594 * A loop-free procedure, or
596 * A reducible inner loop, or
598 * A basic block not contained in any other region.
600 ?!? In theory we could build other regions based on extended basic
601 blocks or reverse extended basic blocks. Is it worth the trouble?
603 Loop blocks that form a region are put into the region's block list
604 in topological order.
606 This procedure stores its results into the following global (ick) variables
614 We use dominator relationships to avoid making regions out of non-reducible
617 This procedure needs to be converted to work on pred/succ lists instead
618 of edge tables. That would simplify it somewhat. */
621 find_rgns (edge_list, dom)
622 struct edge_list *edge_list;
625 int *max_hdr, *dfs_nr, *stack, *degree;
627 int node, child, loop_head, i, head, tail;
628 int count = 0, sp, idx = 0, current_edge = out_edges[0];
629 int num_bbs, num_insns, unreachable;
630 int too_large_failure;
632 /* Note if an edge has been passed. */
635 /* Note if a block is a natural loop header. */
638 /* Note if a block is an natural inner loop header. */
641 /* Note if a block is in the block queue. */
644 /* Note if a block is in the block queue. */
647 int num_edges = NUM_EDGES (edge_list);
649 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
650 and a mapping from block to its loop header (if the block is contained
653 Store results in HEADER, INNER, and MAX_HDR respectively, these will
654 be used as inputs to the second traversal.
656 STACK, SP and DFS_NR are only used during the first traversal. */
658 /* Allocate and initialize variables for the first traversal. */
659 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
660 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
661 stack = (int *) xmalloc (nr_edges * sizeof (int));
663 inner = sbitmap_alloc (n_basic_blocks);
664 sbitmap_ones (inner);
666 header = sbitmap_alloc (n_basic_blocks);
667 sbitmap_zero (header);
669 passed = sbitmap_alloc (nr_edges);
670 sbitmap_zero (passed);
672 in_queue = sbitmap_alloc (n_basic_blocks);
673 sbitmap_zero (in_queue);
675 in_stack = sbitmap_alloc (n_basic_blocks);
676 sbitmap_zero (in_stack);
678 for (i = 0; i < n_basic_blocks; i++)
681 /* DFS traversal to find inner loops in the cfg. */
686 if (current_edge == 0 || TEST_BIT (passed, current_edge))
688 /* We have reached a leaf node or a node that was already
689 processed. Pop edges off the stack until we find
690 an edge that has not yet been processed. */
692 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
694 /* Pop entry off the stack. */
695 current_edge = stack[sp--];
696 node = FROM_BLOCK (current_edge);
697 child = TO_BLOCK (current_edge);
698 RESET_BIT (in_stack, child);
699 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
700 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
701 current_edge = NEXT_OUT (current_edge);
704 /* See if have finished the DFS tree traversal. */
705 if (sp < 0 && TEST_BIT (passed, current_edge))
708 /* Nope, continue the traversal with the popped node. */
712 /* Process a node. */
713 node = FROM_BLOCK (current_edge);
714 child = TO_BLOCK (current_edge);
715 SET_BIT (in_stack, node);
716 dfs_nr[node] = ++count;
718 /* If the successor is in the stack, then we've found a loop.
719 Mark the loop, if it is not a natural loop, then it will
720 be rejected during the second traversal. */
721 if (TEST_BIT (in_stack, child))
724 SET_BIT (header, child);
725 UPDATE_LOOP_RELATIONS (node, child);
726 SET_BIT (passed, current_edge);
727 current_edge = NEXT_OUT (current_edge);
731 /* If the child was already visited, then there is no need to visit
732 it again. Just update the loop relationships and restart
736 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
737 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
738 SET_BIT (passed, current_edge);
739 current_edge = NEXT_OUT (current_edge);
743 /* Push an entry on the stack and continue DFS traversal. */
744 stack[++sp] = current_edge;
745 SET_BIT (passed, current_edge);
746 current_edge = OUT_EDGES (child);
748 /* This is temporary until haifa is converted to use rth's new
749 cfg routines which have true entry/exit blocks and the
750 appropriate edges from/to those blocks.
752 Generally we update dfs_nr for a node when we process its
753 out edge. However, if the node has no out edge then we will
754 not set dfs_nr for that node. This can confuse the scheduler
755 into thinking that we have unreachable blocks, which in turn
756 disables cross block scheduling.
758 So, if we have a node with no out edges, go ahead and mark it
760 if (current_edge == 0)
761 dfs_nr[child] = ++count;
764 /* Another check for unreachable blocks. The earlier test in
765 is_cfg_nonregular only finds unreachable blocks that do not
768 The DFS traversal will mark every block that is reachable from
769 the entry node by placing a nonzero value in dfs_nr. Thus if
770 dfs_nr is zero for any block, then it must be unreachable. */
772 for (i = 0; i < n_basic_blocks; i++)
779 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
780 to hold degree counts. */
783 for (i = 0; i < n_basic_blocks; i++)
785 for (i = 0; i < num_edges; i++)
787 edge e = INDEX_EDGE (edge_list, i);
789 if (e->dest != EXIT_BLOCK_PTR)
790 degree[e->dest->index]++;
793 /* Do not perform region scheduling if there are any unreachable
802 /* Second travsersal:find reducible inner loops and topologically sort
803 block of each region. */
805 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
807 /* Find blocks which are inner loop headers. We still have non-reducible
808 loops to consider at this point. */
809 for (i = 0; i < n_basic_blocks; i++)
811 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
816 /* Now check that the loop is reducible. We do this separate
817 from finding inner loops so that we do not find a reducible
818 loop which contains an inner non-reducible loop.
820 A simple way to find reducible/natural loops is to verify
821 that each block in the loop is dominated by the loop
824 If there exists a block that is not dominated by the loop
825 header, then the block is reachable from outside the loop
826 and thus the loop is not a natural loop. */
827 for (j = 0; j < n_basic_blocks; j++)
829 /* First identify blocks in the loop, except for the loop
831 if (i == max_hdr[j] && i != j)
833 /* Now verify that the block is dominated by the loop
835 if (!TEST_BIT (dom[j], i))
840 /* If we exited the loop early, then I is the header of
841 a non-reducible loop and we should quit processing it
843 if (j != n_basic_blocks)
846 /* I is a header of an inner loop, or block 0 in a subroutine
847 with no loops at all. */
849 too_large_failure = 0;
850 loop_head = max_hdr[i];
852 /* Decrease degree of all I's successors for topological
854 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
855 if (e->dest != EXIT_BLOCK_PTR)
856 --degree[e->dest->index];
858 /* Estimate # insns, and count # blocks in the region. */
860 num_insns = (INSN_LUID (BLOCK_END (i))
861 - INSN_LUID (BLOCK_HEAD (i)));
863 /* Find all loop latches (blocks with back edges to the loop
864 header) or all the leaf blocks in the cfg has no loops.
866 Place those blocks into the queue. */
869 for (j = 0; j < n_basic_blocks; j++)
870 /* Leaf nodes have only a single successor which must
872 if (BASIC_BLOCK (j)->succ
873 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
874 && BASIC_BLOCK (j)->succ->succ_next == NULL)
877 SET_BIT (in_queue, j);
879 if (too_large (j, &num_bbs, &num_insns))
881 too_large_failure = 1;
890 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
892 if (e->src == ENTRY_BLOCK_PTR)
895 node = e->src->index;
897 if (max_hdr[node] == loop_head && node != i)
899 /* This is a loop latch. */
900 queue[++tail] = node;
901 SET_BIT (in_queue, node);
903 if (too_large (node, &num_bbs, &num_insns))
905 too_large_failure = 1;
912 /* Now add all the blocks in the loop to the queue.
914 We know the loop is a natural loop; however the algorithm
915 above will not always mark certain blocks as being in the
923 The algorithm in the DFS traversal may not mark B & D as part
924 of the loop (ie they will not have max_hdr set to A).
926 We know they can not be loop latches (else they would have
927 had max_hdr set since they'd have a backedge to a dominator
928 block). So we don't need them on the initial queue.
930 We know they are part of the loop because they are dominated
931 by the loop header and can be reached by a backwards walk of
932 the edges starting with nodes on the initial queue.
934 It is safe and desirable to include those nodes in the
935 loop/scheduling region. To do so we would need to decrease
936 the degree of a node if it is the target of a backedge
937 within the loop itself as the node is placed in the queue.
939 We do not do this because I'm not sure that the actual
940 scheduling code will properly handle this case. ?!? */
942 while (head < tail && !too_large_failure)
945 child = queue[++head];
947 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
949 node = e->src->index;
951 /* See discussion above about nodes not marked as in
952 this loop during the initial DFS traversal. */
953 if (e->src == ENTRY_BLOCK_PTR
954 || max_hdr[node] != loop_head)
959 else if (!TEST_BIT (in_queue, node) && node != i)
961 queue[++tail] = node;
962 SET_BIT (in_queue, node);
964 if (too_large (node, &num_bbs, &num_insns))
966 too_large_failure = 1;
973 if (tail >= 0 && !too_large_failure)
975 /* Place the loop header into list of region blocks. */
977 rgn_bb_table[idx] = i;
978 RGN_NR_BLOCKS (nr_regions) = num_bbs;
979 RGN_BLOCKS (nr_regions) = idx++;
980 CONTAINING_RGN (i) = nr_regions;
981 BLOCK_TO_BB (i) = count = 0;
983 /* Remove blocks from queue[] when their in degree
984 becomes zero. Repeat until no blocks are left on the
985 list. This produces a topological list of blocks in
992 if (degree[child] == 0)
997 rgn_bb_table[idx++] = child;
998 BLOCK_TO_BB (child) = ++count;
999 CONTAINING_RGN (child) = nr_regions;
1000 queue[head] = queue[tail--];
1002 for (e = BASIC_BLOCK (child)->succ;
1005 if (e->dest != EXIT_BLOCK_PTR)
1006 --degree[e->dest->index];
1018 /* Any block that did not end up in a region is placed into a region
1020 for (i = 0; i < n_basic_blocks; i++)
1023 rgn_bb_table[idx] = i;
1024 RGN_NR_BLOCKS (nr_regions) = 1;
1025 RGN_BLOCKS (nr_regions) = idx++;
1026 CONTAINING_RGN (i) = nr_regions++;
1027 BLOCK_TO_BB (i) = 0;
1033 sbitmap_free (passed);
1034 sbitmap_free (header);
1035 sbitmap_free (inner);
1036 sbitmap_free (in_queue);
1037 sbitmap_free (in_stack);
1040 /* Functions for regions scheduling information. */
1042 /* Compute dominators, probability, and potential-split-edges of bb.
1043 Assume that these values were already computed for bb's predecessors. */
1046 compute_dom_prob_ps (bb)
1049 int nxt_in_edge, fst_in_edge, pred;
1050 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1053 if (IS_RGN_ENTRY (bb))
1055 SET_BIT (dom[bb], 0);
1060 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1062 /* Initialize dom[bb] to '111..1'. */
1063 sbitmap_ones (dom[bb]);
1067 pred = FROM_BLOCK (nxt_in_edge);
1068 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1069 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1071 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1074 nr_rgn_out_edges = 0;
1075 fst_out_edge = OUT_EDGES (pred);
1076 nxt_out_edge = NEXT_OUT (fst_out_edge);
1078 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1080 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1082 /* The successor doesn't belong in the region? */
1083 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1084 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1087 while (fst_out_edge != nxt_out_edge)
1090 /* The successor doesn't belong in the region? */
1091 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1092 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1094 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1095 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1099 /* Now nr_rgn_out_edges is the number of region-exit edges from
1100 pred, and nr_out_edges will be the number of pred out edges
1101 not leaving the region. */
1102 nr_out_edges -= nr_rgn_out_edges;
1103 if (nr_rgn_out_edges > 0)
1104 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1106 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1107 nxt_in_edge = NEXT_IN (nxt_in_edge);
1109 while (fst_in_edge != nxt_in_edge);
1111 SET_BIT (dom[bb], bb);
1112 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1114 if (sched_verbose >= 2)
1115 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1116 (int) (100.0 * prob[bb]));
1119 /* Functions for target info. */
1121 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1122 Note that bb_trg dominates bb_src. */
1125 split_edges (bb_src, bb_trg, bl)
1130 sbitmap src = (edgeset) sbitmap_alloc (pot_split[bb_src]->n_bits);
1131 sbitmap_copy (src, pot_split[bb_src]);
1133 sbitmap_difference (src, src, pot_split[bb_trg]);
1134 extract_bitlst (src, bl);
1138 /* Find the valid candidate-source-blocks for the target block TRG, compute
1139 their probability, and check if they are speculative or not.
1140 For speculative sources, compute their update-blocks and split-blocks. */
1143 compute_trg_info (trg)
1148 int check_block, update_idx;
1149 int i, j, k, fst_edge, nxt_edge;
1151 /* Define some of the fields for the target bb as well. */
1152 sp = candidate_table + trg;
1154 sp->is_speculative = 0;
1157 for (i = trg + 1; i < current_nr_blocks; i++)
1159 sp = candidate_table + i;
1161 sp->is_valid = IS_DOMINATED (i, trg);
1164 sp->src_prob = GET_SRC_PROB (i, trg);
1165 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1170 split_edges (i, trg, &el);
1171 sp->is_speculative = (el.nr_members) ? 1 : 0;
1172 if (sp->is_speculative && !flag_schedule_speculative)
1178 char *update_blocks;
1180 /* Compute split blocks and store them in bblst_table.
1181 The TO block of every split edge is a split block. */
1182 sp->split_bbs.first_member = &bblst_table[bblst_last];
1183 sp->split_bbs.nr_members = el.nr_members;
1184 for (j = 0; j < el.nr_members; bblst_last++, j++)
1185 bblst_table[bblst_last] =
1186 TO_BLOCK (rgn_edges[el.first_member[j]]);
1187 sp->update_bbs.first_member = &bblst_table[bblst_last];
1189 /* Compute update blocks and store them in bblst_table.
1190 For every split edge, look at the FROM block, and check
1191 all out edges. For each out edge that is not a split edge,
1192 add the TO block to the update block list. This list can end
1193 up with a lot of duplicates. We need to weed them out to avoid
1194 overrunning the end of the bblst_table. */
1195 update_blocks = (char *) alloca (n_basic_blocks);
1196 memset (update_blocks, 0, n_basic_blocks);
1199 for (j = 0; j < el.nr_members; j++)
1201 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1202 fst_edge = nxt_edge = OUT_EDGES (check_block);
1205 if (! update_blocks[TO_BLOCK (nxt_edge)])
1207 for (k = 0; k < el.nr_members; k++)
1208 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1211 if (k >= el.nr_members)
1213 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1214 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1219 nxt_edge = NEXT_OUT (nxt_edge);
1221 while (fst_edge != nxt_edge);
1223 sp->update_bbs.nr_members = update_idx;
1225 /* Make sure we didn't overrun the end of bblst_table. */
1226 if (bblst_last > bblst_size)
1231 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1233 sp->is_speculative = 0;
1239 /* Print candidates info, for debugging purposes. Callable from debugger. */
1245 if (!candidate_table[i].is_valid)
1248 if (candidate_table[i].is_speculative)
1251 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1253 fprintf (sched_dump, "split path: ");
1254 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1256 int b = candidate_table[i].split_bbs.first_member[j];
1258 fprintf (sched_dump, " %d ", b);
1260 fprintf (sched_dump, "\n");
1262 fprintf (sched_dump, "update path: ");
1263 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1265 int b = candidate_table[i].update_bbs.first_member[j];
1267 fprintf (sched_dump, " %d ", b);
1269 fprintf (sched_dump, "\n");
1273 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1277 /* Print candidates info, for debugging purposes. Callable from debugger. */
1280 debug_candidates (trg)
1285 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1286 BB_TO_BLOCK (trg), trg);
1287 for (i = trg + 1; i < current_nr_blocks; i++)
1288 debug_candidate (i);
1291 /* Functions for speculative scheduing. */
1293 /* Return 0 if x is a set of a register alive in the beginning of one
1294 of the split-blocks of src, otherwise return 1. */
1297 check_live_1 (src, x)
1303 rtx reg = SET_DEST (x);
1308 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1309 || GET_CODE (reg) == SIGN_EXTRACT
1310 || GET_CODE (reg) == STRICT_LOW_PART)
1311 reg = XEXP (reg, 0);
1313 if (GET_CODE (reg) == PARALLEL)
1317 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1318 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1319 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1325 if (GET_CODE (reg) != REG)
1328 regno = REGNO (reg);
1330 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1332 /* Global registers are assumed live. */
1337 if (regno < FIRST_PSEUDO_REGISTER)
1339 /* Check for hard registers. */
1340 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1343 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1345 int b = candidate_table[src].split_bbs.first_member[i];
1347 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1357 /* Check for psuedo registers. */
1358 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1360 int b = candidate_table[src].split_bbs.first_member[i];
1362 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1373 /* If x is a set of a register R, mark that R is alive in the beginning
1374 of every update-block of src. */
1377 update_live_1 (src, x)
1383 rtx reg = SET_DEST (x);
1388 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1389 || GET_CODE (reg) == SIGN_EXTRACT
1390 || GET_CODE (reg) == STRICT_LOW_PART)
1391 reg = XEXP (reg, 0);
1393 if (GET_CODE (reg) == PARALLEL)
1397 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1398 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1399 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1404 if (GET_CODE (reg) != REG)
1407 /* Global registers are always live, so the code below does not apply
1410 regno = REGNO (reg);
1412 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1414 if (regno < FIRST_PSEUDO_REGISTER)
1416 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1419 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1421 int b = candidate_table[src].update_bbs.first_member[i];
1423 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1430 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1432 int b = candidate_table[src].update_bbs.first_member[i];
1434 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1440 /* Return 1 if insn can be speculatively moved from block src to trg,
1441 otherwise return 0. Called before first insertion of insn to
1442 ready-list or before the scheduling. */
1445 check_live (insn, src)
1449 /* Find the registers set by instruction. */
1450 if (GET_CODE (PATTERN (insn)) == SET
1451 || GET_CODE (PATTERN (insn)) == CLOBBER)
1452 return check_live_1 (src, PATTERN (insn));
1453 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1456 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1457 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1458 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1459 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1468 /* Update the live registers info after insn was moved speculatively from
1469 block src to trg. */
1472 update_live (insn, src)
1476 /* Find the registers set by instruction. */
1477 if (GET_CODE (PATTERN (insn)) == SET
1478 || GET_CODE (PATTERN (insn)) == CLOBBER)
1479 update_live_1 (src, PATTERN (insn));
1480 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1483 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1484 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1485 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1486 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1490 /* Exception Free Loads:
1492 We define five classes of speculative loads: IFREE, IRISKY,
1493 PFREE, PRISKY, and MFREE.
1495 IFREE loads are loads that are proved to be exception-free, just
1496 by examining the load insn. Examples for such loads are loads
1497 from TOC and loads of global data.
1499 IRISKY loads are loads that are proved to be exception-risky,
1500 just by examining the load insn. Examples for such loads are
1501 volatile loads and loads from shared memory.
1503 PFREE loads are loads for which we can prove, by examining other
1504 insns, that they are exception-free. Currently, this class consists
1505 of loads for which we are able to find a "similar load", either in
1506 the target block, or, if only one split-block exists, in that split
1507 block. Load2 is similar to load1 if both have same single base
1508 register. We identify only part of the similar loads, by finding
1509 an insn upon which both load1 and load2 have a DEF-USE dependence.
1511 PRISKY loads are loads for which we can prove, by examining other
1512 insns, that they are exception-risky. Currently we have two proofs for
1513 such loads. The first proof detects loads that are probably guarded by a
1514 test on the memory address. This proof is based on the
1515 backward and forward data dependence information for the region.
1516 Let load-insn be the examined load.
1517 Load-insn is PRISKY iff ALL the following hold:
1519 - insn1 is not in the same block as load-insn
1520 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1521 - test-insn is either a compare or a branch, not in the same block
1523 - load-insn is reachable from test-insn
1524 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1526 This proof might fail when the compare and the load are fed
1527 by an insn not in the region. To solve this, we will add to this
1528 group all loads that have no input DEF-USE dependence.
1530 The second proof detects loads that are directly or indirectly
1531 fed by a speculative load. This proof is affected by the
1532 scheduling process. We will use the flag fed_by_spec_load.
1533 Initially, all insns have this flag reset. After a speculative
1534 motion of an insn, if insn is either a load, or marked as
1535 fed_by_spec_load, we will also mark as fed_by_spec_load every
1536 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1537 load which is fed_by_spec_load is also PRISKY.
1539 MFREE (maybe-free) loads are all the remaining loads. They may be
1540 exception-free, but we cannot prove it.
1542 Now, all loads in IFREE and PFREE classes are considered
1543 exception-free, while all loads in IRISKY and PRISKY classes are
1544 considered exception-risky. As for loads in the MFREE class,
1545 these are considered either exception-free or exception-risky,
1546 depending on whether we are pessimistic or optimistic. We have
1547 to take the pessimistic approach to assure the safety of
1548 speculative scheduling, but we can take the optimistic approach
1549 by invoking the -fsched_spec_load_dangerous option. */
1551 enum INSN_TRAP_CLASS
1553 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1554 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1557 #define WORST_CLASS(class1, class2) \
1558 ((class1 > class2) ? class1 : class2)
1560 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1561 #define IS_REACHABLE(bb_from, bb_to) \
1563 || IS_RGN_ENTRY (bb_from) \
1564 || (TEST_BIT (ancestor_edges[bb_to], \
1565 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1567 /* Non-zero iff the address is comprised from at most 1 register. */
1568 #define CONST_BASED_ADDRESS_P(x) \
1569 (GET_CODE (x) == REG \
1570 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1571 || (GET_CODE (x) == LO_SUM)) \
1572 && (CONSTANT_P (XEXP (x, 0)) \
1573 || CONSTANT_P (XEXP (x, 1)))))
1575 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1578 set_spec_fed (load_insn)
1583 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1584 if (GET_MODE (link) == VOIDmode)
1585 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1586 } /* set_spec_fed */
1588 /* On the path from the insn to load_insn_bb, find a conditional
1589 branch depending on insn, that guards the speculative load. */
1592 find_conditional_protection (insn, load_insn_bb)
1598 /* Iterate through DEF-USE forward dependences. */
1599 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1601 rtx next = XEXP (link, 0);
1602 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1603 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1604 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1605 && load_insn_bb != INSN_BB (next)
1606 && GET_MODE (link) == VOIDmode
1607 && (GET_CODE (next) == JUMP_INSN
1608 || find_conditional_protection (next, load_insn_bb)))
1612 } /* find_conditional_protection */
1614 /* Returns 1 if the same insn1 that participates in the computation
1615 of load_insn's address is feeding a conditional branch that is
1616 guarding on load_insn. This is true if we find a the two DEF-USE
1618 insn1 -> ... -> conditional-branch
1619 insn1 -> ... -> load_insn,
1620 and if a flow path exist:
1621 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1622 and if insn1 is on the path
1623 region-entry -> ... -> bb_trg -> ... load_insn.
1625 Locate insn1 by climbing on LOG_LINKS from load_insn.
1626 Locate the branch by following INSN_DEPEND from insn1. */
1629 is_conditionally_protected (load_insn, bb_src, bb_trg)
1635 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1637 rtx insn1 = XEXP (link, 0);
1639 /* Must be a DEF-USE dependence upon non-branch. */
1640 if (GET_MODE (link) != VOIDmode
1641 || GET_CODE (insn1) == JUMP_INSN)
1644 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1645 if (INSN_BB (insn1) == bb_src
1646 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1647 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1648 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1649 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1652 /* Now search for the conditional-branch. */
1653 if (find_conditional_protection (insn1, bb_src))
1656 /* Recursive step: search another insn1, "above" current insn1. */
1657 return is_conditionally_protected (insn1, bb_src, bb_trg);
1660 /* The chain does not exist. */
1662 } /* is_conditionally_protected */
1664 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1665 load_insn can move speculatively from bb_src to bb_trg. All the
1666 following must hold:
1668 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1669 (2) load_insn and load1 have a def-use dependence upon
1670 the same insn 'insn1'.
1671 (3) either load2 is in bb_trg, or:
1672 - there's only one split-block, and
1673 - load1 is on the escape path, and
1675 From all these we can conclude that the two loads access memory
1676 addresses that differ at most by a constant, and hence if moving
1677 load_insn would cause an exception, it would have been caused by
1681 is_pfree (load_insn, bb_src, bb_trg)
1686 candidate *candp = candidate_table + bb_src;
1688 if (candp->split_bbs.nr_members != 1)
1689 /* Must have exactly one escape block. */
1692 for (back_link = LOG_LINKS (load_insn);
1693 back_link; back_link = XEXP (back_link, 1))
1695 rtx insn1 = XEXP (back_link, 0);
1697 if (GET_MODE (back_link) == VOIDmode)
1699 /* Found a DEF-USE dependence (insn1, load_insn). */
1702 for (fore_link = INSN_DEPEND (insn1);
1703 fore_link; fore_link = XEXP (fore_link, 1))
1705 rtx insn2 = XEXP (fore_link, 0);
1706 if (GET_MODE (fore_link) == VOIDmode)
1708 /* Found a DEF-USE dependence (insn1, insn2). */
1709 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1710 /* insn2 not guaranteed to be a 1 base reg load. */
1713 if (INSN_BB (insn2) == bb_trg)
1714 /* insn2 is the similar load, in the target block. */
1717 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1718 /* insn2 is a similar load, in a split-block. */
1725 /* Couldn't find a similar load. */
1729 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1730 as found by analyzing insn's expression. */
1733 may_trap_exp (x, is_store)
1741 code = GET_CODE (x);
1744 if (code == MEM && may_trap_p (x))
1751 /* The insn uses memory: a volatile load. */
1752 if (MEM_VOLATILE_P (x))
1754 /* An exception-free load. */
1755 if (!may_trap_p (x))
1757 /* A load with 1 base register, to be further checked. */
1758 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1759 return PFREE_CANDIDATE;
1760 /* No info on the load, to be further checked. */
1761 return PRISKY_CANDIDATE;
1766 int i, insn_class = TRAP_FREE;
1768 /* Neither store nor load, check if it may cause a trap. */
1771 /* Recursive step: walk the insn... */
1772 fmt = GET_RTX_FORMAT (code);
1773 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1777 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1778 insn_class = WORST_CLASS (insn_class, tmp_class);
1780 else if (fmt[i] == 'E')
1783 for (j = 0; j < XVECLEN (x, i); j++)
1785 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1786 insn_class = WORST_CLASS (insn_class, tmp_class);
1787 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1791 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1798 /* Classifies insn for the purpose of verifying that it can be
1799 moved speculatively, by examining it's patterns, returning:
1800 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1801 TRAP_FREE: non-load insn.
1802 IFREE: load from a globaly safe location.
1803 IRISKY: volatile load.
1804 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1805 being either PFREE or PRISKY. */
1808 haifa_classify_insn (insn)
1811 rtx pat = PATTERN (insn);
1812 int tmp_class = TRAP_FREE;
1813 int insn_class = TRAP_FREE;
1816 if (GET_CODE (pat) == PARALLEL)
1818 int i, len = XVECLEN (pat, 0);
1820 for (i = len - 1; i >= 0; i--)
1822 code = GET_CODE (XVECEXP (pat, 0, i));
1826 /* Test if it is a 'store'. */
1827 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1830 /* Test if it is a store. */
1831 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1832 if (tmp_class == TRAP_RISKY)
1834 /* Test if it is a load. */
1836 = WORST_CLASS (tmp_class,
1837 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1842 tmp_class = TRAP_RISKY;
1847 insn_class = WORST_CLASS (insn_class, tmp_class);
1848 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1854 code = GET_CODE (pat);
1858 /* Test if it is a 'store'. */
1859 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1862 /* Test if it is a store. */
1863 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1864 if (tmp_class == TRAP_RISKY)
1866 /* Test if it is a load. */
1868 WORST_CLASS (tmp_class,
1869 may_trap_exp (SET_SRC (pat), 0));
1873 tmp_class = TRAP_RISKY;
1877 insn_class = tmp_class;
1883 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1884 a load moved speculatively, or if load_insn is protected by
1885 a compare on load_insn's address). */
1888 is_prisky (load_insn, bb_src, bb_trg)
1892 if (FED_BY_SPEC_LOAD (load_insn))
1895 if (LOG_LINKS (load_insn) == NULL)
1896 /* Dependence may 'hide' out of the region. */
1899 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1905 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1906 Return 1 if insn is exception-free (and the motion is valid)
1910 is_exception_free (insn, bb_src, bb_trg)
1914 int insn_class = haifa_classify_insn (insn);
1916 /* Handle non-load insns. */
1927 if (!flag_schedule_speculative_load)
1929 IS_LOAD_INSN (insn) = 1;
1936 case PFREE_CANDIDATE:
1937 if (is_pfree (insn, bb_src, bb_trg))
1939 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1940 case PRISKY_CANDIDATE:
1941 if (!flag_schedule_speculative_load_dangerous
1942 || is_prisky (insn, bb_src, bb_trg))
1948 return flag_schedule_speculative_load_dangerous;
1951 /* The number of insns from the current block scheduled so far. */
1952 static int sched_target_n_insns;
1953 /* The number of insns from the current block to be scheduled in total. */
1954 static int target_n_insns;
1955 /* The number of insns from the entire region scheduled so far. */
1956 static int sched_n_insns;
1957 /* Nonzero if the last scheduled insn was a jump. */
1958 static int last_was_jump;
1960 /* Implementations of the sched_info functions for region scheduling. */
1961 static void init_ready_list PARAMS ((struct ready_list *));
1962 static int can_schedule_ready_p PARAMS ((rtx));
1963 static int new_ready PARAMS ((rtx));
1964 static int schedule_more_p PARAMS ((void));
1965 static const char *rgn_print_insn PARAMS ((rtx, int));
1966 static int rgn_rank PARAMS ((rtx, rtx));
1967 static int contributes_to_priority PARAMS ((rtx, rtx));
1968 static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
1970 /* Return nonzero if there are more insns that should be scheduled. */
1975 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1978 /* Add all insns that are initially ready to the ready list READY. Called
1979 once before scheduling a set of insns. */
1982 init_ready_list (ready)
1983 struct ready_list *ready;
1985 rtx prev_head = current_sched_info->prev_head;
1986 rtx next_tail = current_sched_info->next_tail;
1991 sched_target_n_insns = 0;
1995 /* Print debugging information. */
1996 if (sched_verbose >= 5)
1997 debug_dependencies ();
1999 /* Prepare current target block info. */
2000 if (current_nr_blocks > 1)
2002 candidate_table = (candidate *) xmalloc (current_nr_blocks
2003 * sizeof (candidate));
2006 /* bblst_table holds split blocks and update blocks for each block after
2007 the current one in the region. split blocks and update blocks are
2008 the TO blocks of region edges, so there can be at most rgn_nr_edges
2010 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2011 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2013 bitlst_table_last = 0;
2014 bitlst_table_size = rgn_nr_edges;
2015 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2017 compute_trg_info (target_bb);
2020 /* Initialize ready list with all 'ready' insns in target block.
2021 Count number of insns in the target block being scheduled. */
2022 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2026 if (! INSN_P (insn))
2028 next = NEXT_INSN (insn);
2030 if (INSN_DEP_COUNT (insn) == 0
2031 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2032 ready_add (ready, insn);
2033 if (!(SCHED_GROUP_P (insn)))
2037 /* Add to ready list all 'ready' insns in valid source blocks.
2038 For speculative insns, check-live, exception-free, and
2040 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2041 if (IS_VALID (bb_src))
2047 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2048 src_next_tail = NEXT_INSN (tail);
2051 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2053 if (! INSN_P (insn))
2056 if (!CANT_MOVE (insn)
2057 && (!IS_SPECULATIVE_INSN (insn)
2058 || (insn_issue_delay (insn) <= 3
2059 && check_live (insn, bb_src)
2060 && is_exception_free (insn, bb_src, target_bb))))
2064 /* Note that we haven't squirreled away the notes for
2065 blocks other than the current. So if this is a
2066 speculative insn, NEXT might otherwise be a note. */
2067 next = next_nonnote_insn (insn);
2068 if (INSN_DEP_COUNT (insn) == 0
2070 || SCHED_GROUP_P (next) == 0
2071 || ! INSN_P (next)))
2072 ready_add (ready, insn);
2078 /* Called after taking INSN from the ready list. Returns nonzero if this
2079 insn can be scheduled, nonzero if we should silently discard it. */
2082 can_schedule_ready_p (insn)
2085 if (GET_CODE (insn) == JUMP_INSN)
2088 /* An interblock motion? */
2089 if (INSN_BB (insn) != target_bb)
2094 if (IS_SPECULATIVE_INSN (insn))
2096 if (!check_live (insn, INSN_BB (insn)))
2098 update_live (insn, INSN_BB (insn));
2100 /* For speculative load, mark insns fed by it. */
2101 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2102 set_spec_fed (insn);
2108 /* Find the beginning of the scheduling group. */
2109 /* ??? Ought to update basic block here, but later bits of
2110 schedule_block assumes the original insn block is
2114 while (SCHED_GROUP_P (temp))
2115 temp = PREV_INSN (temp);
2117 /* Update source block boundaries. */
2118 b1 = BLOCK_FOR_INSN (temp);
2119 if (temp == b1->head && insn == b1->end)
2121 /* We moved all the insns in the basic block.
2122 Emit a note after the last insn and update the
2123 begin/end boundaries to point to the note. */
2124 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2128 else if (insn == b1->end)
2130 /* We took insns from the end of the basic block,
2131 so update the end of block boundary so that it
2132 points to the first insn we did not move. */
2133 b1->end = PREV_INSN (temp);
2135 else if (temp == b1->head)
2137 /* We took insns from the start of the basic block,
2138 so update the start of block boundary so that
2139 it points to the first insn we did not move. */
2140 b1->head = NEXT_INSN (insn);
2145 /* In block motion. */
2146 sched_target_n_insns++;
2153 /* Called after INSN has all its dependencies resolved. Return nonzero
2154 if it should be moved to the ready list or the queue, or zero if we
2155 should silently discard it. */
2160 /* For speculative insns, before inserting to ready/queue,
2161 check live, exception-free, and issue-delay. */
2162 if (INSN_BB (next) != target_bb
2163 && (!IS_VALID (INSN_BB (next))
2165 || (IS_SPECULATIVE_INSN (next)
2166 && (insn_issue_delay (next) > 3
2167 || !check_live (next, INSN_BB (next))
2168 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2173 /* Return a string that contains the insn uid and optionally anything else
2174 necessary to identify this insn in an output. It's valid to use a
2175 static buffer for this. The ALIGNED parameter should cause the string
2176 to be formatted so that multiple output lines will line up nicely. */
2179 rgn_print_insn (insn, aligned)
2183 static char tmp[80];
2186 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2189 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2190 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2192 sprintf (tmp, "%d", INSN_UID (insn));
2197 /* Compare priority of two insns. Return a positive number if the second
2198 insn is to be preferred for scheduling, and a negative one if the first
2199 is to be preferred. Zero if they are equally good. */
2202 rgn_rank (insn1, insn2)
2205 /* Some comparison make sense in interblock scheduling only. */
2206 if (INSN_BB (insn1) != INSN_BB (insn2))
2208 int spec_val, prob_val;
2210 /* Prefer an inblock motion on an interblock motion. */
2211 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2213 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2216 /* Prefer a useful motion on a speculative one. */
2217 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2221 /* Prefer a more probable (speculative) insn. */
2222 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2229 /* NEXT is an instruction that depends on INSN (a backward dependence);
2230 return nonzero if we should include this dependence in priority
2234 contributes_to_priority (next, insn)
2237 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2240 /* INSN is a JUMP_INSN. Store the set of registers that must be considered
2241 to be set by this jump in SET. */
2244 compute_jump_reg_dependencies (insn, set)
2245 rtx insn ATTRIBUTE_UNUSED;
2246 regset set ATTRIBUTE_UNUSED;
2248 /* Nothing to do here, since we postprocess jumps in
2249 add_branch_dependences. */
2252 /* Used in schedule_insns to initialize current_sched_info for scheduling
2253 regions (or single basic blocks). */
2255 static struct sched_info region_sched_info =
2258 can_schedule_ready_p,
2263 contributes_to_priority,
2264 compute_jump_reg_dependencies,
2271 /* Add dependences so that branches are scheduled to run last in their
2275 add_branch_dependences (head, tail)
2280 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2281 to remain in order at the end of the block by adding dependencies and
2282 giving the last a high priority. There may be notes present, and
2283 prev_head may also be a note.
2285 Branches must obviously remain at the end. Calls should remain at the
2286 end since moving them results in worse register allocation. Uses remain
2287 at the end to ensure proper register allocation. cc0 setters remaim
2288 at the end because they can't be moved away from their cc0 user. */
2291 while (GET_CODE (insn) == CALL_INSN
2292 || GET_CODE (insn) == JUMP_INSN
2293 || (GET_CODE (insn) == INSN
2294 && (GET_CODE (PATTERN (insn)) == USE
2295 || GET_CODE (PATTERN (insn)) == CLOBBER
2297 || sets_cc0_p (PATTERN (insn))
2300 || GET_CODE (insn) == NOTE)
2302 if (GET_CODE (insn) != NOTE)
2304 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2306 add_dependence (last, insn, REG_DEP_ANTI);
2307 INSN_REF_COUNT (insn)++;
2310 CANT_MOVE (insn) = 1;
2313 /* Skip over insns that are part of a group.
2314 Make each insn explicitly depend on the previous insn.
2315 This ensures that only the group header will ever enter
2316 the ready queue (and, when scheduled, will automatically
2317 schedule the SCHED_GROUP_P block). */
2318 while (SCHED_GROUP_P (insn))
2320 rtx temp = prev_nonnote_insn (insn);
2321 add_dependence (insn, temp, REG_DEP_ANTI);
2326 /* Don't overrun the bounds of the basic block. */
2330 insn = PREV_INSN (insn);
2333 /* Make sure these insns are scheduled last in their block. */
2336 while (insn != head)
2338 insn = prev_nonnote_insn (insn);
2340 if (INSN_REF_COUNT (insn) != 0)
2343 add_dependence (last, insn, REG_DEP_ANTI);
2344 INSN_REF_COUNT (insn) = 1;
2346 /* Skip over insns that are part of a group. */
2347 while (SCHED_GROUP_P (insn))
2348 insn = prev_nonnote_insn (insn);
2352 /* Data structures for the computation of data dependences in a regions. We
2353 keep one `deps' structure for every basic block. Before analyzing the
2354 data dependences for a bb, its variables are initialized as a function of
2355 the variables of its predecessors. When the analysis for a bb completes,
2356 we save the contents to the corresponding bb_deps[bb] variable. */
2358 static struct deps *bb_deps;
2360 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2363 concat_INSN_LIST (copy, old)
2367 for (; copy ; copy = XEXP (copy, 1))
2368 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2373 concat_insn_mem_list (copy_insns, copy_mems, old_insns_p, old_mems_p)
2374 rtx copy_insns, copy_mems;
2375 rtx *old_insns_p, *old_mems_p;
2377 rtx new_insns = *old_insns_p;
2378 rtx new_mems = *old_mems_p;
2382 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2383 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2384 copy_insns = XEXP (copy_insns, 1);
2385 copy_mems = XEXP (copy_mems, 1);
2388 *old_insns_p = new_insns;
2389 *old_mems_p = new_mems;
2392 /* After computing the dependencies for block BB, propagate the dependencies
2393 found in TMP_DEPS to the successors of the block. */
2395 propagate_deps (bb, pred_deps)
2397 struct deps *pred_deps;
2399 int b = BB_TO_BLOCK (bb);
2402 /* bb's structures are inherited by its successors. */
2403 first_edge = e = OUT_EDGES (b);
2407 int b_succ = TO_BLOCK (e);
2408 int bb_succ = BLOCK_TO_BB (b_succ);
2409 struct deps *succ_deps = bb_deps + bb_succ;
2412 /* Only bbs "below" bb, in the same region, are interesting. */
2413 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2420 /* The reg_last lists are inherited by bb_succ. */
2421 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2423 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2424 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2426 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2427 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2428 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2430 succ_rl->uses_length += pred_rl->uses_length;
2431 succ_rl->clobbers_length += pred_rl->clobbers_length;
2433 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2435 /* Mem read/write lists are inherited by bb_succ. */
2436 concat_insn_mem_list (pred_deps->pending_read_insns,
2437 pred_deps->pending_read_mems,
2438 &succ_deps->pending_read_insns,
2439 &succ_deps->pending_read_mems);
2440 concat_insn_mem_list (pred_deps->pending_write_insns,
2441 pred_deps->pending_write_mems,
2442 &succ_deps->pending_write_insns,
2443 &succ_deps->pending_write_mems);
2445 succ_deps->last_pending_memory_flush
2446 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2447 succ_deps->last_pending_memory_flush);
2449 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2450 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2452 /* last_function_call is inherited by bb_succ. */
2453 succ_deps->last_function_call
2454 = concat_INSN_LIST (pred_deps->last_function_call,
2455 succ_deps->last_function_call);
2457 /* sched_before_next_call is inherited by bb_succ. */
2458 succ_deps->sched_before_next_call
2459 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2460 succ_deps->sched_before_next_call);
2464 while (e != first_edge);
2466 /* These lists should point to the right place, for correct
2468 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2469 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2470 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2471 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2473 /* Can't allow these to be freed twice. */
2474 pred_deps->pending_read_insns = 0;
2475 pred_deps->pending_read_mems = 0;
2476 pred_deps->pending_write_insns = 0;
2477 pred_deps->pending_write_mems = 0;
2480 /* Compute backward dependences inside bb. In a multiple blocks region:
2481 (1) a bb is analyzed after its predecessors, and (2) the lists in
2482 effect at the end of bb (after analyzing for bb) are inherited by
2485 Specifically for reg-reg data dependences, the block insns are
2486 scanned by sched_analyze () top-to-bottom. Two lists are
2487 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2488 and reg_last[].uses for register USEs.
2490 When analysis is completed for bb, we update for its successors:
2491 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2492 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2494 The mechanism for computing mem-mem data dependence is very
2495 similar, and the result is interblock dependences in the region. */
2498 compute_block_backward_dependences (bb)
2502 struct deps tmp_deps;
2504 tmp_deps = bb_deps[bb];
2506 /* Do the analysis for this block. */
2507 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2508 sched_analyze (&tmp_deps, head, tail);
2509 add_branch_dependences (head, tail);
2511 if (current_nr_blocks > 1)
2512 propagate_deps (bb, &tmp_deps);
2514 /* Free up the INSN_LISTs. */
2515 free_deps (&tmp_deps);
2518 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2519 them to the unused_*_list variables, so that they can be reused. */
2522 free_pending_lists ()
2526 for (bb = 0; bb < current_nr_blocks; bb++)
2528 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2529 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2530 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2531 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2535 /* Print dependences for debugging, callable from debugger. */
2538 debug_dependencies ()
2542 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2543 for (bb = 0; bb < current_nr_blocks; bb++)
2551 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2552 next_tail = NEXT_INSN (tail);
2553 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2554 BB_TO_BLOCK (bb), bb);
2556 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2557 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2558 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2559 "----", "----", "--", "---", "----", "----", "--------", "-----");
2560 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2565 if (! INSN_P (insn))
2568 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2569 if (GET_CODE (insn) == NOTE)
2571 n = NOTE_LINE_NUMBER (insn);
2573 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2575 fprintf (sched_dump, "line %d, file %s\n", n,
2576 NOTE_SOURCE_FILE (insn));
2579 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2583 unit = insn_unit (insn);
2585 || function_units[unit].blockage_range_function == 0) ? 0 :
2586 function_units[unit].blockage_range_function (insn);
2587 fprintf (sched_dump,
2588 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2589 (SCHED_GROUP_P (insn) ? "+" : " "),
2593 INSN_DEP_COUNT (insn),
2594 INSN_PRIORITY (insn),
2595 insn_cost (insn, 0, 0),
2596 (int) MIN_BLOCKAGE_COST (range),
2597 (int) MAX_BLOCKAGE_COST (range));
2598 insn_print_units (insn);
2599 fprintf (sched_dump, "\t: ");
2600 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2601 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2602 fprintf (sched_dump, "\n");
2606 fprintf (sched_dump, "\n");
2609 /* Schedule a region. A region is either an inner loop, a loop-free
2610 subroutine, or a single basic block. Each bb in the region is
2611 scheduled after its flow predecessors. */
2614 schedule_region (rgn)
2618 int rgn_n_insns = 0;
2619 int sched_rgn_n_insns = 0;
2621 /* Set variables for the current region. */
2622 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2623 current_blocks = RGN_BLOCKS (rgn);
2625 init_deps_global ();
2627 /* Initializations for region data dependence analyisis. */
2628 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2629 for (bb = 0; bb < current_nr_blocks; bb++)
2630 init_deps (bb_deps + bb);
2632 /* Compute LOG_LINKS. */
2633 for (bb = 0; bb < current_nr_blocks; bb++)
2634 compute_block_backward_dependences (bb);
2636 /* Compute INSN_DEPEND. */
2637 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2640 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2642 compute_forward_dependences (head, tail);
2645 /* Set priorities. */
2646 for (bb = 0; bb < current_nr_blocks; bb++)
2649 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2651 rgn_n_insns += set_priorities (head, tail);
2654 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2655 if (current_nr_blocks > 1)
2659 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2661 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2662 sbitmap_vector_zero (dom, current_nr_blocks);
2665 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2666 for (i = 1; i < nr_edges; i++)
2667 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2668 EDGE_TO_BIT (i) = rgn_nr_edges++;
2669 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2672 for (i = 1; i < nr_edges; i++)
2673 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2674 rgn_edges[rgn_nr_edges++] = i;
2677 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2678 sbitmap_vector_zero (pot_split, current_nr_blocks);
2679 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2680 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2682 /* Compute probabilities, dominators, split_edges. */
2683 for (bb = 0; bb < current_nr_blocks; bb++)
2684 compute_dom_prob_ps (bb);
2687 /* Now we can schedule all blocks. */
2688 for (bb = 0; bb < current_nr_blocks; bb++)
2691 int b = BB_TO_BLOCK (bb);
2693 get_block_head_tail (b, &head, &tail);
2695 if (no_real_insns_p (head, tail))
2698 current_sched_info->prev_head = PREV_INSN (head);
2699 current_sched_info->next_tail = NEXT_INSN (tail);
2701 if (write_symbols != NO_DEBUG)
2703 save_line_notes (b, head, tail);
2704 rm_line_notes (head, tail);
2707 /* rm_other_notes only removes notes which are _inside_ the
2708 block---that is, it won't remove notes before the first real insn
2709 or after the last real insn of the block. So if the first insn
2710 has a REG_SAVE_NOTE which would otherwise be emitted before the
2711 insn, it is redundant with the note before the start of the
2712 block, and so we have to take it out. */
2717 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2718 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2720 remove_note (head, note);
2721 note = XEXP (note, 1);
2722 remove_note (head, note);
2726 /* Remove remaining note insns from the block, save them in
2727 note_list. These notes are restored at the end of
2728 schedule_block (). */
2729 rm_other_notes (head, tail);
2733 current_sched_info->queue_must_finish_empty
2734 = current_nr_blocks > 1 && !flag_schedule_interblock;
2736 schedule_block (b, rgn_n_insns);
2737 sched_rgn_n_insns += sched_n_insns;
2739 /* Update target block boundaries. */
2740 if (head == BLOCK_HEAD (b))
2741 BLOCK_HEAD (b) = current_sched_info->head;
2742 if (tail == BLOCK_END (b))
2743 BLOCK_END (b) = current_sched_info->tail;
2746 if (current_nr_blocks > 1)
2748 free (candidate_table);
2750 free (bitlst_table);
2754 /* Sanity check: verify that all region insns were scheduled. */
2755 if (sched_rgn_n_insns != rgn_n_insns)
2758 /* Restore line notes. */
2759 if (write_symbols != NO_DEBUG)
2761 for (bb = 0; bb < current_nr_blocks; bb++)
2764 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2765 restore_line_notes (head, tail);
2769 /* Done with this region. */
2770 free_pending_lists ();
2772 finish_deps_global ();
2776 if (current_nr_blocks > 1)
2779 sbitmap_vector_free (dom);
2780 sbitmap_vector_free (pot_split);
2781 sbitmap_vector_free (ancestor_edges);
2787 /* Indexed by region, holds the number of death notes found in that region.
2788 Used for consistency checks. */
2789 static int *deaths_in_region;
2791 /* Initialize data structures for region scheduling. */
2800 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2801 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2802 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2803 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2805 /* Compute regions for scheduling. */
2806 if (reload_completed
2807 || n_basic_blocks == 1
2808 || !flag_schedule_interblock)
2810 find_single_block_region ();
2814 /* Verify that a 'good' control flow graph can be built. */
2815 if (is_cfg_nonregular ())
2817 find_single_block_region ();
2822 struct edge_list *edge_list;
2824 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2826 /* The scheduler runs after flow; therefore, we can't blindly call
2827 back into find_basic_blocks since doing so could invalidate the
2828 info in global_live_at_start.
2830 Consider a block consisting entirely of dead stores; after life
2831 analysis it would be a block of NOTE_INSN_DELETED notes. If
2832 we call find_basic_blocks again, then the block would be removed
2833 entirely and invalidate our the register live information.
2835 We could (should?) recompute register live information. Doing
2836 so may even be beneficial. */
2837 edge_list = create_edge_list ();
2839 /* Compute the dominators and post dominators. */
2840 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
2842 /* build_control_flow will return nonzero if it detects unreachable
2843 blocks or any other irregularity with the cfg which prevents
2844 cross block scheduling. */
2845 if (build_control_flow (edge_list) != 0)
2846 find_single_block_region ();
2848 find_rgns (edge_list, dom);
2850 if (sched_verbose >= 3)
2853 /* We are done with flow's edge list. */
2854 free_edge_list (edge_list);
2856 /* For now. This will move as more and more of haifa is converted
2857 to using the cfg code in flow.c. */
2863 if (CHECK_DEAD_NOTES)
2865 blocks = sbitmap_alloc (n_basic_blocks);
2866 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2867 /* Remove all death notes from the subroutine. */
2868 for (rgn = 0; rgn < nr_regions; rgn++)
2872 sbitmap_zero (blocks);
2873 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2874 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2876 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2878 sbitmap_free (blocks);
2881 count_or_remove_death_notes (NULL, 1);
2884 /* The one entry point in this file. DUMP_FILE is the dump file for
2888 schedule_insns (dump_file)
2891 sbitmap large_region_blocks, blocks;
2893 int any_large_regions;
2895 /* Taking care of this degenerate case makes the rest of
2896 this code simpler. */
2897 if (n_basic_blocks == 0)
2900 scope_to_insns_initialize ();
2905 sched_init (dump_file);
2909 current_sched_info = ®ion_sched_info;
2911 /* Schedule every region in the subroutine. */
2912 for (rgn = 0; rgn < nr_regions; rgn++)
2913 schedule_region (rgn);
2915 /* Update life analysis for the subroutine. Do single block regions
2916 first so that we can verify that live_at_start didn't change. Then
2917 do all other blocks. */
2918 /* ??? There is an outside possibility that update_life_info, or more
2919 to the point propagate_block, could get called with non-zero flags
2920 more than once for one basic block. This would be kinda bad if it
2921 were to happen, since REG_INFO would be accumulated twice for the
2922 block, and we'd have twice the REG_DEAD notes.
2924 I'm fairly certain that this _shouldn't_ happen, since I don't think
2925 that live_at_start should change at region heads. Not sure what the
2926 best way to test for this kind of thing... */
2928 allocate_reg_life_data ();
2929 compute_bb_for_insn (get_max_uid ());
2931 any_large_regions = 0;
2932 large_region_blocks = sbitmap_alloc (n_basic_blocks);
2933 sbitmap_ones (large_region_blocks);
2935 blocks = sbitmap_alloc (n_basic_blocks);
2936 sbitmap_zero (blocks);
2938 /* Update life information. For regions consisting of multiple blocks
2939 we've possibly done interblock scheduling that affects global liveness.
2940 For regions consisting of single blocks we need to do only local
2942 for (rgn = 0; rgn < nr_regions; rgn++)
2943 if (RGN_NR_BLOCKS (rgn) > 1)
2944 any_large_regions = 1;
2947 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2948 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2951 /* Don't update reg info after reload, since that affects
2952 regs_ever_live, which should not change after reload. */
2953 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2954 (reload_completed ? PROP_DEATH_NOTES
2955 : PROP_DEATH_NOTES | PROP_REG_INFO));
2956 if (any_large_regions)
2958 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2959 PROP_DEATH_NOTES | PROP_REG_INFO);
2962 if (CHECK_DEAD_NOTES)
2964 /* Verify the counts of basic block notes in single the basic block
2966 for (rgn = 0; rgn < nr_regions; rgn++)
2967 if (RGN_NR_BLOCKS (rgn) == 1)
2969 sbitmap_zero (blocks);
2970 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2972 if (deaths_in_region[rgn]
2973 != count_or_remove_death_notes (blocks, 0))
2976 free (deaths_in_region);
2979 /* Reposition the prologue and epilogue notes in case we moved the
2980 prologue/epilogue insns. */
2981 if (reload_completed)
2982 reposition_prologue_and_epilogue_notes (get_insns ());
2984 /* Delete redundant line notes. */
2985 if (write_symbols != NO_DEBUG)
2986 rm_redundant_line_notes ();
2988 scope_to_insns_finalize ();
2992 if (reload_completed == 0 && flag_schedule_interblock)
2994 fprintf (sched_dump,
2995 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3003 fprintf (sched_dump, "\n\n");
3008 free (rgn_bb_table);
3010 free (containing_rgn);
3031 sbitmap_free (blocks);
3032 sbitmap_free (large_region_blocks);