From c9ab9ae440a8066b2c2b85b157b1fdadcf09916a Mon Sep 17 00:00:00 2001 From: obrien Date: Fri, 1 Feb 2002 18:16:02 +0000 Subject: Enlist the FreeBSD-CURRENT users as testers of what is to become Gcc 3.1.0. These bits are taken from the FSF anoncvs repo on 1-Feb-2002 08:20 PST. --- contrib/gcc/lcm.c | 1843 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 1229 insertions(+), 614 deletions(-) (limited to 'contrib/gcc/lcm.c') diff --git a/contrib/gcc/lcm.c b/contrib/gcc/lcm.c index 01367e3..0a8a7ce 100644 --- a/contrib/gcc/lcm.c +++ b/contrib/gcc/lcm.c @@ -1,26 +1,25 @@ -/* Generic partial redundancy elimination with lazy code motion - support. - Copyright (C) 1998 Free Software Foundation, Inc. +/* Generic partial redundancy elimination with lazy code motion support. + Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. -This file is part of GNU CC. +This file is part of GCC. -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ /* These routines are meant to be used by various optimization - passes which can be modeled as lazy code motion problems. + passes which can be modeled as lazy code motion problems. Including, but not limited to: * Traditional partial redundancy elimination. @@ -52,7 +51,6 @@ Boston, MA 02111-1307, USA. */ #include "config.h" #include "system.h" - #include "rtl.h" #include "regs.h" #include "hard-reg-set.h" @@ -61,739 +59,1356 @@ Boston, MA 02111-1307, USA. */ #include "insn-config.h" #include "recog.h" #include "basic-block.h" - -static void compute_antinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_earlyinout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_delayinout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_latein PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_isoinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_optimal PROTO ((int, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_redundant PROTO ((int, int, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); - -/* Similarly, but for the reversed flowgraph. */ -static void compute_avinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_fartherinout PROTO ((int, int, int_list_ptr *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_earlierinout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_firstout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_rev_isoinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); - -/* Given local properties TRANSP, ANTLOC, return the redundant and optimal - computation points for expressions. - - To reduce overall memory consumption, we allocate memory immediately - before its needed and deallocate it as soon as possible. */ -void -pre_lcm (n_blocks, n_exprs, s_preds, s_succs, transp, - antloc, redundant, optimal) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - int_list_ptr *s_succs; - sbitmap *transp; - sbitmap *antloc; - sbitmap *redundant; - sbitmap *optimal; -{ - sbitmap *antin, *antout, *earlyin, *earlyout, *delayin, *delayout; - sbitmap *latein, *isoin, *isoout; - - /* Compute global anticipatability. ANTOUT is not needed except to - compute ANTIN, so free its memory as soon as we return from - compute_antinout. */ - antin = sbitmap_vector_alloc (n_blocks, n_exprs); - antout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_antinout (n_blocks, s_succs, antloc, - transp, antin, antout); - free (antout); - antout = NULL; - - /* Compute earliestness. EARLYOUT is not needed except to compute - EARLYIN, so free its memory as soon as we return from - compute_earlyinout. */ - earlyin = sbitmap_vector_alloc (n_blocks, n_exprs); - earlyout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_earlyinout (n_blocks, n_exprs, s_preds, transp, antin, - earlyin, earlyout); - free (earlyout); - earlyout = NULL; - - /* Compute delayedness. DELAYOUT is not needed except to compute - DELAYIN, so free its memory as soon as we return from - compute_delayinout. We also no longer need ANTIN and EARLYIN. */ - delayin = sbitmap_vector_alloc (n_blocks, n_exprs); - delayout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_delayinout (n_blocks, n_exprs, s_preds, antloc, - antin, earlyin, delayin, delayout); - free (delayout); - delayout = NULL; - free (antin); - antin = NULL; - free (earlyin); - earlyin = NULL; - - /* Compute latestness. We no longer need DELAYIN after we compute - LATEIN. */ - latein = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_latein (n_blocks, n_exprs, s_succs, antloc, delayin, latein); - free (delayin); - delayin = NULL; - - /* Compute isolatedness. ISOIN is not needed except to compute - ISOOUT, so free its memory as soon as we return from - compute_isoinout. */ - isoin = sbitmap_vector_alloc (n_blocks, n_exprs); - isoout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_isoinout (n_blocks, s_succs, antloc, latein, isoin, isoout); - free (isoin); - isoin = NULL; - - /* Now compute optimal placement points and the redundant expressions. */ - compute_optimal (n_blocks, latein, isoout, optimal); - compute_redundant (n_blocks, n_exprs, antloc, latein, isoout, redundant); - free (latein); - latein = NULL; - free (isoout); - isoout = NULL; -} - -/* Given local properties TRANSP, AVLOC, return the redundant and optimal - computation points for expressions on the reverse flowgraph. - - To reduce overall memory consumption, we allocate memory immediately - before its needed and deallocate it as soon as possible. */ - -void -pre_rev_lcm (n_blocks, n_exprs, s_preds, s_succs, transp, - avloc, redundant, optimal) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - int_list_ptr *s_succs; - sbitmap *transp; - sbitmap *avloc; - sbitmap *redundant; - sbitmap *optimal; -{ - sbitmap *avin, *avout, *fartherin, *fartherout, *earlierin, *earlierout; - sbitmap *firstout, *rev_isoin, *rev_isoout; - - /* Compute global availability. AVIN is not needed except to - compute AVOUT, so free its memory as soon as we return from - compute_avinout. */ - avin = sbitmap_vector_alloc (n_blocks, n_exprs); - avout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_avinout (n_blocks, s_preds, avloc, transp, avin, avout); - free (avin); - avin = NULL; - - /* Compute fartherness. FARTHERIN is not needed except to compute - FARTHEROUT, so free its memory as soon as we return from - compute_earlyinout. */ - fartherin = sbitmap_vector_alloc (n_blocks, n_exprs); - fartherout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_fartherinout (n_blocks, n_exprs, s_succs, transp, - avout, fartherin, fartherout); - free (fartherin); - fartherin = NULL; - - /* Compute earlierness. EARLIERIN is not needed except to compute - EARLIEROUT, so free its memory as soon as we return from - compute_delayinout. We also no longer need AVOUT and FARTHEROUT. */ - earlierin = sbitmap_vector_alloc (n_blocks, n_exprs); - earlierout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_earlierinout (n_blocks, n_exprs, s_succs, avloc, - avout, fartherout, earlierin, earlierout); - free (earlierin); - earlierin = NULL; - free (avout); - avout = NULL; - free (fartherout); - fartherout = NULL; - - /* Compute firstness. We no longer need EARLIEROUT after we compute - FIRSTOUT. */ - firstout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_firstout (n_blocks, n_exprs, s_preds, avloc, earlierout, firstout); - free (earlierout); - earlierout = NULL; - - /* Compute rev_isolatedness. ISOIN is not needed except to compute - ISOOUT, so free its memory as soon as we return from - compute_isoinout. */ - rev_isoin = sbitmap_vector_alloc (n_blocks, n_exprs); - rev_isoout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_rev_isoinout (n_blocks, s_preds, avloc, firstout, - rev_isoin, rev_isoout); - free (rev_isoout); - rev_isoout = NULL; - - /* Now compute optimal placement points and the redundant expressions. */ - compute_optimal (n_blocks, firstout, rev_isoin, optimal); - compute_redundant (n_blocks, n_exprs, avloc, firstout, rev_isoin, redundant); - free (firstout); - firstout = NULL; - free (rev_isoin); - rev_isoin = NULL; -} - -/* Compute expression anticipatability at entrance and exit of each block. */ +#include "tm_p.h" + +/* We want target macros for the mode switching code to be able to refer + to instruction attribute values. */ +#include "insn-attr.h" + +/* Edge based LCM routines. */ +static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *, + sbitmap *, sbitmap *)); +static void compute_earliest PARAMS ((struct edge_list *, int, + sbitmap *, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *)); +static void compute_laterin PARAMS ((struct edge_list *, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *)); +static void compute_insert_delete PARAMS ((struct edge_list *edge_list, + sbitmap *, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *)); + +/* Edge based LCM routines on a reverse flowgraph. */ +static void compute_farthest PARAMS ((struct edge_list *, int, + sbitmap *, sbitmap *, + sbitmap*, sbitmap *, + sbitmap *)); +static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *)); +static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list, + sbitmap *, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *)); + +/* Edge based lcm routines. */ + +/* Compute expression anticipatability at entrance and exit of each block. + This is done based on the flow graph, and not on the pred-succ lists. + Other than that, its pretty much identical to compute_antinout. */ static void -compute_antinout (n_blocks, s_succs, antloc, transp, antin, antout) - int n_blocks; - int_list_ptr *s_succs; +compute_antinout_edge (antloc, transp, antin, antout) sbitmap *antloc; sbitmap *transp; sbitmap *antin; sbitmap *antout; { - int bb, changed, passes; - sbitmap old_changed, new_changed; + int bb; + edge e; + basic_block *worklist, *qin, *qout, *qend; + unsigned int qlen; + + /* Allocate a worklist array/queue. Entries are only added to the + list if they were not already on the list. So the size is + bounded by the number of basic blocks. */ + qin = qout = worklist + = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks); + + /* We want a maximal solution, so make an optimistic initialization of + ANTIN. */ + sbitmap_vector_ones (antin, n_basic_blocks); + + /* Put every block on the worklist; this is necessary because of the + optimistic initialization of ANTIN above. */ + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + { + *qin++ = BASIC_BLOCK (bb); + BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb); + } - sbitmap_zero (antout[n_blocks - 1]); - sbitmap_vector_ones (antin, n_blocks); + qin = worklist; + qend = &worklist[n_basic_blocks]; + qlen = n_basic_blocks; - old_changed = sbitmap_alloc (n_blocks); - new_changed = sbitmap_alloc (n_blocks); - sbitmap_ones (old_changed); + /* Mark blocks which are predecessors of the exit block so that we + can easily identify them below. */ + for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) + e->src->aux = EXIT_BLOCK_PTR; - passes = 0; - changed = 1; - while (changed) + /* Iterate until the worklist is empty. */ + while (qlen) { - changed = 0; - sbitmap_zero (new_changed); - /* We scan the blocks in the reverse order to speed up - the convergence. */ - for (bb = n_blocks - 1; bb >= 0; bb--) + /* Take the first entry off the worklist. */ + basic_block b = *qout++; + bb = b->index; + qlen--; + + if (qout >= qend) + qout = worklist; + + if (b->aux == EXIT_BLOCK_PTR) + /* Do not clear the aux field for blocks which are predecessors of + the EXIT block. That way we never add then to the worklist + again. */ + sbitmap_zero (antout[bb]); + else { - int_list_ptr ps; + /* Clear the aux field of this block so that it can be added to + the worklist again if necessary. */ + b->aux = NULL; + sbitmap_intersection_of_succs (antout[bb], antin, bb); + } - /* If none of the successors of this block have changed, - then this block is not going to change. */ - for (ps = s_succs[bb] ; ps; ps = ps->next) + if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], transp[bb], antout[bb])) + /* If the in state of this block changed, then we need + to add the predecessors of this block to the worklist + if they are not already on the worklist. */ + for (e = b->pred; e; e = e->pred_next) + if (!e->src->aux && e->src != ENTRY_BLOCK_PTR) { - if (INT_LIST_VAL (ps) == EXIT_BLOCK - || INT_LIST_VAL (ps) == ENTRY_BLOCK) - break; - - if (TEST_BIT (old_changed, INT_LIST_VAL (ps)) - || TEST_BIT (new_changed, INT_LIST_VAL (ps))) - break; + *qin++ = e->src; + e->src->aux = e; + qlen++; + if (qin >= qend) + qin = worklist; } + } + + clear_aux_for_edges (); + clear_aux_for_blocks (); + free (worklist); +} + +/* Compute the earliest vector for edge based lcm. */ - if (!ps) - continue; +static void +compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest) + struct edge_list *edge_list; + int n_exprs; + sbitmap *antin, *antout, *avout, *kill, *earliest; +{ + sbitmap difference, temp_bitmap; + int x, num_edges; + basic_block pred, succ; + + num_edges = NUM_EDGES (edge_list); - if (bb != n_blocks - 1) - sbitmap_intersect_of_successors (antout[bb], antin, - bb, s_succs); - if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], - transp[bb], antout[bb])) + difference = sbitmap_alloc (n_exprs); + temp_bitmap = sbitmap_alloc (n_exprs); + + for (x = 0; x < num_edges; x++) + { + pred = INDEX_EDGE_PRED_BB (edge_list, x); + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + if (pred == ENTRY_BLOCK_PTR) + sbitmap_copy (earliest[x], antin[succ->index]); + else + { + /* We refer to the EXIT_BLOCK index, instead of testing for + EXIT_BLOCK_PTR, so that EXIT_BLOCK_PTR's index can be + changed so as to pretend it's a regular block, so that + its antin can be taken into account. */ + if (succ->index == EXIT_BLOCK) + sbitmap_zero (earliest[x]); + else { - changed = 1; - SET_BIT (new_changed, bb); + sbitmap_difference (difference, antin[succ->index], + avout[pred->index]); + sbitmap_not (temp_bitmap, antout[pred->index]); + sbitmap_a_and_b_or_c (earliest[x], difference, + kill[pred->index], temp_bitmap); } } - sbitmap_copy (old_changed, new_changed); - passes++; } - free (old_changed); - free (new_changed); + + sbitmap_free (temp_bitmap); + sbitmap_free (difference); } -/* Compute expression earliestness at entrance and exit of each block. +/* later(p,s) is dependent on the calculation of laterin(p). + laterin(p) is dependent on the calculation of later(p2,p). + + laterin(ENTRY) is defined as all 0's + later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY) + laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)). + + If we progress in this manner, starting with all basic blocks + in the work list, anytime we change later(bb), we need to add + succs(bb) to the worklist if they are not already on the worklist. + + Boundary conditions: + + We prime the worklist all the normal basic blocks. The ENTRY block can + never be added to the worklist since it is never the successor of any + block. We explicitly prevent the EXIT block from being added to the + worklist. + + We optimistically initialize LATER. That is the only time this routine + will compute LATER for an edge out of the entry block since the entry + block is never on the worklist. Thus, LATERIN is neither used nor + computed for the ENTRY block. - From Advanced Compiler Design and Implementation pp411. + Since the EXIT block is never added to the worklist, we will neither + use nor compute LATERIN for the exit block. Edges which reach the + EXIT block are handled in the normal fashion inside the loop. However, + the insertion/deletion computation needs LATERIN(EXIT), so we have + to compute it. */ - An expression is earliest at the entrance to basic block BB if no - block from entry to block BB both evaluates the expression and - produces the same value as evaluating it at the entry to block BB - does. Similarly for earlistness at basic block BB exit. */ +static void +compute_laterin (edge_list, earliest, antloc, later, laterin) + struct edge_list *edge_list; + sbitmap *earliest, *antloc, *later, *laterin; +{ + int bb, num_edges, i; + edge e; + basic_block *worklist, *qin, *qout, *qend; + unsigned int qlen; + + num_edges = NUM_EDGES (edge_list); + + /* Allocate a worklist array/queue. Entries are only added to the + list if they were not already on the list. So the size is + bounded by the number of basic blocks. */ + qin = qout = worklist + = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1)); + + /* Initialize a mapping from each edge to its index. */ + for (i = 0; i < num_edges; i++) + INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; + + /* We want a maximal solution, so initially consider LATER true for + all edges. This allows propagation through a loop since the incoming + loop edge will have LATER set, so if all the other incoming edges + to the loop are set, then LATERIN will be set for the head of the + loop. + + If the optimistic setting of LATER on that edge was incorrect (for + example the expression is ANTLOC in a block within the loop) then + this algorithm will detect it when we process the block at the head + of the optimistic edge. That will requeue the affected blocks. */ + sbitmap_vector_ones (later, num_edges); + + /* Note that even though we want an optimistic setting of LATER, we + do not want to be overly optimistic. Consider an outgoing edge from + the entry block. That edge should always have a LATER value the + same as EARLIEST for that edge. */ + for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next) + sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]); + + /* Add all the blocks to the worklist. This prevents an early exit from + the loop given our optimistic initialization of LATER above. */ + for (bb = 0; bb < n_basic_blocks; bb++) + { + basic_block b = BASIC_BLOCK (bb); + *qin++ = b; + b->aux = b; + } + qin = worklist; + /* Note that we do not use the last allocated element for our queue, + as EXIT_BLOCK is never inserted into it. In fact the above allocation + of n_basic_blocks + 1 elements is not encessary. */ + qend = &worklist[n_basic_blocks]; + qlen = n_basic_blocks; + + /* Iterate until the worklist is empty. */ + while (qlen) + { + /* Take the first entry off the worklist. */ + basic_block b = *qout++; + b->aux = NULL; + qlen--; + if (qout >= qend) + qout = worklist; + + /* Compute the intersection of LATERIN for each incoming edge to B. */ + bb = b->index; + sbitmap_ones (laterin[bb]); + for (e = b->pred; e != NULL; e = e->pred_next) + sbitmap_a_and_b (laterin[bb], laterin[bb], later[(size_t)e->aux]); + + /* Calculate LATER for all outgoing edges. */ + for (e = b->succ; e != NULL; e = e->succ_next) + if (sbitmap_union_of_diff (later[(size_t) e->aux], + earliest[(size_t) e->aux], + laterin[e->src->index], + antloc[e->src->index]) + /* If LATER for an outgoing edge was changed, then we need + to add the target of the outgoing edge to the worklist. */ + && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0) + { + *qin++ = e->dest; + e->dest->aux = e; + qlen++; + if (qin >= qend) + qin = worklist; + } + } + + /* Computation of insertion and deletion points requires computing LATERIN + for the EXIT block. We allocated an extra entry in the LATERIN array + for just this purpose. */ + sbitmap_ones (laterin[n_basic_blocks]); + for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next) + sbitmap_a_and_b (laterin[n_basic_blocks], + laterin[n_basic_blocks], + later[(size_t) e->aux]); + + clear_aux_for_edges (); + free (worklist); +} + +/* Compute the insertion and deletion points for edge based LCM. */ static void -compute_earlyinout (n_blocks, n_exprs, s_preds, transp, antin, - earlyin, earlyout) - int n_blocks; +compute_insert_delete (edge_list, antloc, later, laterin, + insert, delete) + struct edge_list *edge_list; + sbitmap *antloc, *later, *laterin, *insert, *delete; +{ + int x; + + for (x = 0; x < n_basic_blocks; x++) + sbitmap_difference (delete[x], antloc[x], laterin[x]); + + for (x = 0; x < NUM_EDGES (edge_list); x++) + { + basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x); + + if (b == EXIT_BLOCK_PTR) + sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]); + else + sbitmap_difference (insert[x], later[x], laterin[b->index]); + } +} + +/* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and + delete vectors for edge based LCM. Returns an edgelist which is used to + map the insert vector to what edge an expression should be inserted on. */ + +struct edge_list * +pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete) + FILE *file ATTRIBUTE_UNUSED; int n_exprs; - int_list_ptr *s_preds; sbitmap *transp; - sbitmap *antin; - sbitmap *earlyin; - sbitmap *earlyout; + sbitmap *avloc; + sbitmap *antloc; + sbitmap *kill; + sbitmap **insert; + sbitmap **delete; { - int bb, changed, passes; - sbitmap temp_bitmap; - sbitmap old_changed, new_changed; + sbitmap *antin, *antout, *earliest; + sbitmap *avin, *avout; + sbitmap *later, *laterin; + struct edge_list *edge_list; + int num_edges; - temp_bitmap = sbitmap_alloc (n_exprs); + edge_list = create_edge_list (); + num_edges = NUM_EDGES (edge_list); - sbitmap_vector_zero (earlyout, n_blocks); - sbitmap_ones (earlyin[0]); +#ifdef LCM_DEBUG_INFO + if (file) + { + fprintf (file, "Edge List:\n"); + verify_edge_list (file, edge_list); + print_edge_list (file, edge_list); + dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks); + dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks); + dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks); + dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks); + } +#endif - old_changed = sbitmap_alloc (n_blocks); - new_changed = sbitmap_alloc (n_blocks); - sbitmap_ones (old_changed); + /* Compute global availability. */ + avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_available (avloc, kill, avout, avin); + sbitmap_vector_free (avin); - passes = 0; - changed = 1; - while (changed) + /* Compute global anticipatability. */ + antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_antinout_edge (antloc, transp, antin, antout); + +#ifdef LCM_DEBUG_INFO + if (file) { - changed = 0; - sbitmap_zero (new_changed); - for (bb = 0; bb < n_blocks; bb++) - { - int_list_ptr ps; + dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks); + dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks); + } +#endif - /* If none of the predecessors of this block have changed, - then this block is not going to change. */ - for (ps = s_preds[bb] ; ps; ps = ps->next) - { - if (INT_LIST_VAL (ps) == EXIT_BLOCK - || INT_LIST_VAL (ps) == ENTRY_BLOCK) - break; + /* Compute earliestness. */ + earliest = sbitmap_vector_alloc (num_edges, n_exprs); + compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest); - if (TEST_BIT (old_changed, INT_LIST_VAL (ps)) - || TEST_BIT (new_changed, INT_LIST_VAL (ps))) - break; - } +#ifdef LCM_DEBUG_INFO + if (file) + dump_sbitmap_vector (file, "earliest", "", earliest, num_edges); +#endif - if (!ps) - continue; + sbitmap_vector_free (antout); + sbitmap_vector_free (antin); + sbitmap_vector_free (avout); - if (bb != 0) - sbitmap_union_of_predecessors (earlyin[bb], earlyout, - bb, s_preds); - sbitmap_not (temp_bitmap, transp[bb]); - if (sbitmap_union_of_diff (earlyout[bb], temp_bitmap, - earlyin[bb], antin[bb])) - { - changed = 1; - SET_BIT (new_changed, bb); - } - } - sbitmap_copy (old_changed, new_changed); - passes++; + later = sbitmap_vector_alloc (num_edges, n_exprs); + + /* Allocate an extra element for the exit block in the laterin vector. */ + laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs); + compute_laterin (edge_list, earliest, antloc, later, laterin); + +#ifdef LCM_DEBUG_INFO + if (file) + { + dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1); + dump_sbitmap_vector (file, "later", "", later, num_edges); } - free (old_changed); - free (new_changed); - free (temp_bitmap); -} +#endif -/* Compute expression delayedness at entrance and exit of each block. + sbitmap_vector_free (earliest); - From Advanced Compiler Design and Implementation pp411. + *insert = sbitmap_vector_alloc (num_edges, n_exprs); + *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete); - An expression is delayed at the entrance to BB if it is anticipatable - and earliest at that point and if all subsequent computations of - the expression are in block BB. */ + sbitmap_vector_free (laterin); + sbitmap_vector_free (later); -static void -compute_delayinout (n_blocks, n_exprs, s_preds, antloc, - antin, earlyin, delayin, delayout) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - sbitmap *antloc; - sbitmap *antin; - sbitmap *earlyin; - sbitmap *delayin; - sbitmap *delayout; +#ifdef LCM_DEBUG_INFO + if (file) + { + dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges); + dump_sbitmap_vector (file, "pre_delete_map", "", *delete, + n_basic_blocks); + } +#endif + + return edge_list; +} + +/* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors. + Return the number of passes we performed to iterate to a solution. */ + +void +compute_available (avloc, kill, avout, avin) + sbitmap *avloc, *kill, *avout, *avin; { - int bb, changed, passes; - sbitmap *anti_and_early; - sbitmap temp_bitmap; + int bb; + edge e; + basic_block *worklist, *qin, *qout, *qend; + unsigned int qlen; + + /* Allocate a worklist array/queue. Entries are only added to the + list if they were not already on the list. So the size is + bounded by the number of basic blocks. */ + qin = qout = worklist + = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks); + + /* We want a maximal solution. */ + sbitmap_vector_ones (avout, n_basic_blocks); + + /* Put every block on the worklist; this is necessary because of the + optimistic initialization of AVOUT above. */ + for (bb = 0; bb < n_basic_blocks; bb++) + { + *qin++ = BASIC_BLOCK (bb); + BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb); + } - temp_bitmap = sbitmap_alloc (n_exprs); + qin = worklist; + qend = &worklist[n_basic_blocks]; + qlen = n_basic_blocks; + + /* Mark blocks which are successors of the entry block so that we + can easily identify them below. */ + for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next) + e->dest->aux = ENTRY_BLOCK_PTR; - /* This is constant throughout the flow equations below, so compute - it once to save time. */ - anti_and_early = sbitmap_vector_alloc (n_blocks, n_exprs); - for (bb = 0; bb < n_blocks; bb++) - sbitmap_a_and_b (anti_and_early[bb], antin[bb], earlyin[bb]); - - sbitmap_vector_zero (delayout, n_blocks); - sbitmap_copy (delayin[0], anti_and_early[0]); - - passes = 0; - changed = 1; - while (changed) + /* Iterate until the worklist is empty. */ + while (qlen) { - changed = 0; - for (bb = 0; bb < n_blocks; bb++) + /* Take the first entry off the worklist. */ + basic_block b = *qout++; + bb = b->index; + qlen--; + + if (qout >= qend) + qout = worklist; + + /* If one of the predecessor blocks is the ENTRY block, then the + intersection of avouts is the null set. We can identify such blocks + by the special value in the AUX field in the block structure. */ + if (b->aux == ENTRY_BLOCK_PTR) + /* Do not clear the aux field for blocks which are successors of the + ENTRY block. That way we never add then to the worklist again. */ + sbitmap_zero (avin[bb]); + else { - if (bb != 0) + /* Clear the aux field of this block so that it can be added to + the worklist again if necessary. */ + b->aux = NULL; + sbitmap_intersection_of_preds (avin[bb], avout, bb); + } + + if (sbitmap_union_of_diff (avout[bb], avloc[bb], avin[bb], kill[bb])) + /* If the out state of this block changed, then we need + to add the successors of this block to the worklist + if they are not already on the worklist. */ + for (e = b->succ; e; e = e->succ_next) + if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR) { - sbitmap_intersect_of_predecessors (temp_bitmap, delayout, - bb, s_preds); - changed |= sbitmap_a_or_b (delayin[bb], - anti_and_early[bb], - temp_bitmap); + *qin++ = e->dest; + e->dest->aux = e; + qlen++; + + if (qin >= qend) + qin = worklist; } - sbitmap_not (temp_bitmap, antloc[bb]); - changed |= sbitmap_a_and_b (delayout[bb], - temp_bitmap, - delayin[bb]); - } - passes++; } - /* We're done with this, so go ahead and free it's memory now instead - of waiting until the end of pre. */ - free (anti_and_early); - free (temp_bitmap); + clear_aux_for_edges (); + clear_aux_for_blocks (); + free (worklist); } -/* Compute latestness. - - From Advanced Compiler Design and Implementation pp412. - - An expression is latest at the entrance to block BB if that is an optimal - point for computing the expression and if on every path from block BB's - entrance to the exit block, any optimal computation point for the - expression occurs after one of the points at which the expression was - computed in the original flowgraph. */ +/* Compute the farthest vector for edge based lcm. */ static void -compute_latein (n_blocks, n_exprs, s_succs, antloc, delayin, latein) - int n_blocks; +compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, + kill, farthest) + struct edge_list *edge_list; int n_exprs; - int_list_ptr *s_succs; - sbitmap *antloc; - sbitmap *delayin; - sbitmap *latein; + sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest; { - int bb; - sbitmap temp_bitmap; + sbitmap difference, temp_bitmap; + int x, num_edges; + basic_block pred, succ; + + num_edges = NUM_EDGES (edge_list); + difference = sbitmap_alloc (n_exprs); temp_bitmap = sbitmap_alloc (n_exprs); - for (bb = 0; bb < n_blocks; bb++) + for (x = 0; x < num_edges; x++) { - /* The last block is succeeded only by the exit block; therefore, - temp_bitmap will not be set by the following call! */ - if (bb == n_blocks - 1) + pred = INDEX_EDGE_PRED_BB (edge_list, x); + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + if (succ == EXIT_BLOCK_PTR) + sbitmap_copy (farthest[x], st_avout[pred->index]); + else { - sbitmap_intersect_of_successors (temp_bitmap, delayin, - bb, s_succs); - sbitmap_not (temp_bitmap, temp_bitmap); + if (pred == ENTRY_BLOCK_PTR) + sbitmap_zero (farthest[x]); + else + { + sbitmap_difference (difference, st_avout[pred->index], + st_antin[succ->index]); + sbitmap_not (temp_bitmap, st_avin[succ->index]); + sbitmap_a_and_b_or_c (farthest[x], difference, + kill[succ->index], temp_bitmap); + } } - else - sbitmap_ones (temp_bitmap); - sbitmap_a_and_b_or_c (latein[bb], delayin[bb], - antloc[bb], temp_bitmap); } - free (temp_bitmap); -} -/* Compute isolated. + sbitmap_free (temp_bitmap); + sbitmap_free (difference); +} - From Advanced Compiler Design and Implementation pp413. +/* Compute nearer and nearerout vectors for edge based lcm. - A computationally optimal placement for the evaluation of an expression - is defined to be isolated if and only if on every path from a successor - of the block in which it is computed to the exit block, every original - computation of the expression is preceded by the optimal placement point. */ + This is the mirror of compute_laterin, additional comments on the + implementation can be found before compute_laterin. */ static void -compute_isoinout (n_blocks, s_succs, antloc, latein, isoin, isoout) - int n_blocks; - int_list_ptr *s_succs; - sbitmap *antloc; - sbitmap *latein; - sbitmap *isoin; - sbitmap *isoout; +compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout) + struct edge_list *edge_list; + sbitmap *farthest, *st_avloc, *nearer, *nearerout; { - int bb, changed, passes; - - sbitmap_vector_zero (isoin, n_blocks); - sbitmap_zero (isoout[n_blocks - 1]); + int bb, num_edges, i; + edge e; + basic_block *worklist, *tos; + + num_edges = NUM_EDGES (edge_list); + + /* Allocate a worklist array/queue. Entries are only added to the + list if they were not already on the list. So the size is + bounded by the number of basic blocks. */ + tos = worklist + = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1)); + + /* Initialize NEARER for each edge and build a mapping from an edge to + its index. */ + for (i = 0; i < num_edges; i++) + INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; + + /* We want a maximal solution. */ + sbitmap_vector_ones (nearer, num_edges); + + /* Note that even though we want an optimistic setting of NEARER, we + do not want to be overly optimistic. Consider an incoming edge to + the exit block. That edge should always have a NEARER value the + same as FARTHEST for that edge. */ + for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) + sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]); + + /* Add all the blocks to the worklist. This prevents an early exit + from the loop given our optimistic initialization of NEARER. */ + for (bb = 0; bb < n_basic_blocks; bb++) + { + basic_block b = BASIC_BLOCK (bb); + *tos++ = b; + b->aux = b; + } - passes = 0; - changed = 1; - while (changed) + /* Iterate until the worklist is empty. */ + while (tos != worklist) { - changed = 0; - for (bb = n_blocks - 1; bb >= 0; bb--) - { - if (bb != n_blocks - 1) - sbitmap_intersect_of_successors (isoout[bb], isoin, - bb, s_succs); - changed |= sbitmap_union_of_diff (isoin[bb], latein[bb], - isoout[bb], antloc[bb]); - } - passes++; + /* Take the first entry off the worklist. */ + basic_block b = *--tos; + b->aux = NULL; + + /* Compute the intersection of NEARER for each outgoing edge from B. */ + bb = b->index; + sbitmap_ones (nearerout[bb]); + for (e = b->succ; e != NULL; e = e->succ_next) + sbitmap_a_and_b (nearerout[bb], nearerout[bb], + nearer[(size_t) e->aux]); + + /* Calculate NEARER for all incoming edges. */ + for (e = b->pred; e != NULL; e = e->pred_next) + if (sbitmap_union_of_diff (nearer[(size_t) e->aux], + farthest[(size_t) e->aux], + nearerout[e->dest->index], + st_avloc[e->dest->index]) + /* If NEARER for an incoming edge was changed, then we need + to add the source of the incoming edge to the worklist. */ + && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0) + { + *tos++ = e->src; + e->src->aux = e; + } } + + /* Computation of insertion and deletion points requires computing NEAREROUT + for the ENTRY block. We allocated an extra entry in the NEAREROUT array + for just this purpose. */ + sbitmap_ones (nearerout[n_basic_blocks]); + for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next) + sbitmap_a_and_b (nearerout[n_basic_blocks], + nearerout[n_basic_blocks], + nearer[(size_t) e->aux]); + + clear_aux_for_edges (); + free (tos); } -/* Compute the set of expressions which have optimal computational points - in each basic block. This is the set of expressions that are latest, but - that are not isolated in the block. */ +/* Compute the insertion and deletion points for edge based LCM. */ static void -compute_optimal (n_blocks, latein, isoout, optimal) - int n_blocks; - sbitmap *latein; - sbitmap *isoout; - sbitmap *optimal; +compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, + insert, delete) + struct edge_list *edge_list; + sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete; { - int bb; + int x; + + for (x = 0; x < n_basic_blocks; x++) + sbitmap_difference (delete[x], st_avloc[x], nearerout[x]); - for (bb = 0; bb < n_blocks; bb++) - sbitmap_difference (optimal[bb], latein[bb], isoout[bb]); + for (x = 0; x < NUM_EDGES (edge_list); x++) + { + basic_block b = INDEX_EDGE_PRED_BB (edge_list, x); + if (b == ENTRY_BLOCK_PTR) + sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]); + else + sbitmap_difference (insert[x], nearer[x], nearerout[b->index]); + } } -/* Compute the set of expressions that are redundant in a block. They are - the expressions that are used in the block and that are neither isolated - or latest. */ +/* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the + insert and delete vectors for edge based reverse LCM. Returns an + edgelist which is used to map the insert vector to what edge + an expression should be inserted on. */ -static void -compute_redundant (n_blocks, n_exprs, antloc, latein, isoout, redundant) - int n_blocks; +struct edge_list * +pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill, + insert, delete) + FILE *file ATTRIBUTE_UNUSED; int n_exprs; - sbitmap *antloc; - sbitmap *latein; - sbitmap *isoout; - sbitmap *redundant; + sbitmap *transp; + sbitmap *st_avloc; + sbitmap *st_antloc; + sbitmap *kill; + sbitmap **insert; + sbitmap **delete; { - int bb; - sbitmap temp_bitmap; + sbitmap *st_antin, *st_antout; + sbitmap *st_avout, *st_avin, *farthest; + sbitmap *nearer, *nearerout; + struct edge_list *edge_list; + int num_edges; + + edge_list = create_edge_list (); + num_edges = NUM_EDGES (edge_list); + + st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs); + st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs); + sbitmap_vector_zero (st_antin, n_basic_blocks); + sbitmap_vector_zero (st_antout, n_basic_blocks); + compute_antinout_edge (st_antloc, transp, st_antin, st_antout); + + /* Compute global anticipatability. */ + st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_available (st_avloc, kill, st_avout, st_avin); + +#ifdef LCM_DEBUG_INFO + if (file) + { + fprintf (file, "Edge List:\n"); + verify_edge_list (file, edge_list); + print_edge_list (file, edge_list); + dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks); + dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks); + dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks); + dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks); + dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks); + dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks); + } +#endif - temp_bitmap = sbitmap_alloc (n_exprs); +#ifdef LCM_DEBUG_INFO + if (file) + { + dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks); + dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks); + } +#endif - for (bb = 0; bb < n_blocks; bb++) + /* Compute farthestness. */ + farthest = sbitmap_vector_alloc (num_edges, n_exprs); + compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, + kill, farthest); + +#ifdef LCM_DEBUG_INFO + if (file) + dump_sbitmap_vector (file, "farthest", "", farthest, num_edges); +#endif + + sbitmap_vector_free (st_antin); + sbitmap_vector_free (st_antout); + + sbitmap_vector_free (st_avin); + sbitmap_vector_free (st_avout); + + nearer = sbitmap_vector_alloc (num_edges, n_exprs); + + /* Allocate an extra element for the entry block. */ + nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs); + compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout); + +#ifdef LCM_DEBUG_INFO + if (file) { - sbitmap_a_or_b (temp_bitmap, latein[bb], isoout[bb]); - sbitmap_difference (redundant[bb], antloc[bb], temp_bitmap); + dump_sbitmap_vector (file, "nearerout", "", nearerout, + n_basic_blocks + 1); + dump_sbitmap_vector (file, "nearer", "", nearer, num_edges); } - free (temp_bitmap); -} +#endif -/* Compute expression availability at entrance and exit of each block. */ + sbitmap_vector_free (farthest); -static void -compute_avinout (n_blocks, s_preds, avloc, transp, avin, avout) - int n_blocks; - int_list_ptr *s_preds; - sbitmap *avloc; - sbitmap *transp; - sbitmap *avin; - sbitmap *avout; -{ - int bb, changed, passes; + *insert = sbitmap_vector_alloc (num_edges, n_exprs); + *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, + *insert, *delete); - sbitmap_zero (avin[0]); - sbitmap_vector_ones (avout, n_blocks); + sbitmap_vector_free (nearerout); + sbitmap_vector_free (nearer); - passes = 0; - changed = 1; - while (changed) +#ifdef LCM_DEBUG_INFO + if (file) { - changed = 0; - for (bb = 0; bb < n_blocks; bb++) - { - if (bb != 0) - sbitmap_intersect_of_predecessors (avin[bb], avout, - bb, s_preds); - changed |= sbitmap_a_or_b_and_c (avout[bb], avloc[bb], - transp[bb], avin[bb]); - } - passes++; + dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges); + dump_sbitmap_vector (file, "pre_delete_map", "", *delete, + n_basic_blocks); } +#endif + return edge_list; } -/* Compute expression latestness. +/* Mode switching: + + The algorithm for setting the modes consists of scanning the insn list + and finding all the insns which require a specific mode. Each insn gets + a unique struct seginfo element. These structures are inserted into a list + for each basic block. For each entity, there is an array of bb_info over + the flow graph basic blocks (local var 'bb_info'), and contains a list + of all insns within that basic block, in the order they are encountered. + + For each entity, any basic block WITHOUT any insns requiring a specific + mode are given a single entry, without a mode. (Each basic block + in the flow graph must have at least one entry in the segment table.) + + The LCM algorithm is then run over the flow graph to determine where to + place the sets to the highest-priority value in respect of first the first + insn in any one block. Any adjustments required to the transparancy + vectors are made, then the next iteration starts for the next-lower + priority mode, till for each entity all modes are exhasted. + + More details are located in the code for optimize_mode_switching(). */ + +/* This structure contains the information for each insn which requires + either single or double mode to be set. + MODE is the mode this insn must be executed in. + INSN_PTR is the insn to be executed (may be the note that marks the + beginning of a basic block). + BBNUM is the flow graph basic block this insn occurs in. + NEXT is the next insn in the same basic block. */ +struct seginfo +{ + int mode; + rtx insn_ptr; + int bbnum; + struct seginfo *next; + HARD_REG_SET regs_live; +}; + +struct bb_info +{ + struct seginfo *seginfo; + int computing; +}; + +/* These bitmaps are used for the LCM algorithm. */ + +#ifdef OPTIMIZE_MODE_SWITCHING +static sbitmap *antic; +static sbitmap *transp; +static sbitmap *comp; +static sbitmap *delete; +static sbitmap *insert; + +static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET)); +static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *)); +static void reg_dies PARAMS ((rtx, HARD_REG_SET)); +static void reg_becomes_live PARAMS ((rtx, rtx, void *)); +static void make_preds_opaque PARAMS ((basic_block, int)); +#endif + +#ifdef OPTIMIZE_MODE_SWITCHING + +/* This function will allocate a new BBINFO structure, initialized + with the MODE, INSN, and basic block BB parameters. */ + +static struct seginfo * +new_seginfo (mode, insn, bb, regs_live) + int mode; + rtx insn; + int bb; + HARD_REG_SET regs_live; +{ + struct seginfo *ptr; + ptr = xmalloc (sizeof (struct seginfo)); + ptr->mode = mode; + ptr->insn_ptr = insn; + ptr->bbnum = bb; + ptr->next = NULL; + COPY_HARD_REG_SET (ptr->regs_live, regs_live); + return ptr; +} - This is effectively the same as earliestness computed on the reverse - flow graph. */ +/* Add a seginfo element to the end of a list. + HEAD is a pointer to the list beginning. + INFO is the structure to be linked in. */ static void -compute_fartherinout (n_blocks, n_exprs, s_succs, - transp, avout, fartherin, fartherout) - int n_blocks; - int n_exprs; - int_list_ptr *s_succs; - sbitmap *transp; - sbitmap *avout; - sbitmap *fartherin; - sbitmap *fartherout; +add_seginfo (head, info) + struct bb_info *head; + struct seginfo *info; { - int bb, changed, passes; - sbitmap temp_bitmap; + struct seginfo *ptr; - temp_bitmap = sbitmap_alloc (n_exprs); + if (head->seginfo == NULL) + head->seginfo = info; + else + { + ptr = head->seginfo; + while (ptr->next != NULL) + ptr = ptr->next; + ptr->next = info; + } +} - sbitmap_vector_zero (fartherin, n_blocks); - sbitmap_ones (fartherout[n_blocks - 1]); +/* Make all predecessors of basic block B opaque, recursively, till we hit + some that are already non-transparent, or an edge where aux is set; that + denotes that a mode set is to be done on that edge. + J is the bit number in the bitmaps that corresponds to the entity that + we are currently handling mode-switching for. */ - passes = 0; - changed = 1; - while (changed) +static void +make_preds_opaque (b, j) + basic_block b; + int j; +{ + edge e; + + for (e = b->pred; e; e = e->pred_next) { - changed = 0; - for (bb = n_blocks - 1; bb >= 0; bb--) - { - if (bb != n_blocks - 1) - sbitmap_union_of_successors (fartherout[bb], fartherin, - bb, s_succs); - sbitmap_not (temp_bitmap, transp[bb]); - changed |= sbitmap_union_of_diff (fartherin[bb], temp_bitmap, - fartherout[bb], avout[bb]); - } - passes++; - } + basic_block pb = e->src; + + if (e->aux || ! TEST_BIT (transp[pb->index], j)) + continue; - free (temp_bitmap); + RESET_BIT (transp[pb->index], j); + make_preds_opaque (pb, j); + } } -/* Compute expression earlierness at entrance and exit of each block. +/* Record in LIVE that register REG died. */ - This is effectively the same as delayedness computed on the reverse - flow graph. */ +static void +reg_dies (reg, live) + rtx reg; + HARD_REG_SET live; +{ + int regno, nregs; + + if (GET_CODE (reg) != REG) + return; + + regno = REGNO (reg); + if (regno < FIRST_PSEUDO_REGISTER) + for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0; + nregs--) + CLEAR_HARD_REG_BIT (live, regno + nregs); +} + +/* Record in LIVE that register REG became live. + This is called via note_stores. */ static void -compute_earlierinout (n_blocks, n_exprs, s_succs, avloc, - avout, fartherout, earlierin, earlierout) - int n_blocks; - int n_exprs; - int_list_ptr *s_succs; - sbitmap *avloc; - sbitmap *avout; - sbitmap *fartherout; - sbitmap *earlierin; - sbitmap *earlierout; +reg_becomes_live (reg, setter, live) + rtx reg; + rtx setter ATTRIBUTE_UNUSED; + void *live; { - int bb, changed, passes; - sbitmap *av_and_farther; - sbitmap temp_bitmap; + int regno, nregs; - temp_bitmap = sbitmap_alloc (n_exprs); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (GET_CODE (reg) != REG) + return; + + regno = REGNO (reg); + if (regno < FIRST_PSEUDO_REGISTER) + for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0; + nregs--) + SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs); +} + +/* Find all insns that need a particular mode setting, and insert the + necessary mode switches. Return true if we did work. */ - /* This is constant throughout the flow equations below, so compute - it once to save time. */ - av_and_farther = sbitmap_vector_alloc (n_blocks, n_exprs); - for (bb = 0; bb < n_blocks; bb++) - sbitmap_a_and_b (av_and_farther[bb], avout[bb], fartherout[bb]); - - sbitmap_vector_zero (earlierin, n_blocks); - sbitmap_copy (earlierout[n_blocks - 1], av_and_farther[n_blocks - 1]); - - passes = 0; - changed = 1; - while (changed) +int +optimize_mode_switching (file) + FILE *file; +{ + rtx insn; + int bb, e; + int need_commit = 0; + sbitmap *kill; + struct edge_list *edge_list; + static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING; +#define N_ENTITIES (sizeof num_modes / sizeof (int)) + int entity_map[N_ENTITIES]; + struct bb_info *bb_info[N_ENTITIES]; + int i, j; + int n_entities; + int max_num_modes = 0; + bool emited = false; + +#ifdef NORMAL_MODE + /* Increment n_basic_blocks before allocating bb_info. */ + n_basic_blocks++; +#endif + + for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--) + if (OPTIMIZE_MODE_SWITCHING (e)) + { + /* Create the list of segments within each basic block. */ + bb_info[n_entities] + = (struct bb_info *) xcalloc (n_basic_blocks, sizeof **bb_info); + entity_map[n_entities++] = e; + if (num_modes[e] > max_num_modes) + max_num_modes = num_modes[e]; + } + +#ifdef NORMAL_MODE + /* Decrement it back in case we return below. */ + n_basic_blocks--; +#endif + + if (! n_entities) + return 0; + +#ifdef NORMAL_MODE + /* We're going to pretend the EXIT_BLOCK is a regular basic block, + so that switching back to normal mode when entering the + EXIT_BLOCK isn't optimized away. We do this by incrementing the + basic block count, growing the VARRAY of basic_block_info and + appending the EXIT_BLOCK_PTR to it. */ + n_basic_blocks++; + if (VARRAY_SIZE (basic_block_info) < n_basic_blocks) + VARRAY_GROW (basic_block_info, n_basic_blocks); + BASIC_BLOCK (n_basic_blocks - 1) = EXIT_BLOCK_PTR; + EXIT_BLOCK_PTR->index = n_basic_blocks - 1; +#endif + + /* Create the bitmap vectors. */ + + antic = sbitmap_vector_alloc (n_basic_blocks, n_entities); + transp = sbitmap_vector_alloc (n_basic_blocks, n_entities); + comp = sbitmap_vector_alloc (n_basic_blocks, n_entities); + + sbitmap_vector_ones (transp, n_basic_blocks); + + for (j = n_entities - 1; j >= 0; j--) { - changed = 0; - for (bb = n_blocks - 1; bb >= 0; bb--) + int e = entity_map[j]; + int no_mode = num_modes[e]; + struct bb_info *info = bb_info[j]; + + /* Determine what the first use (if any) need for a mode of entity E is. + This will be the mode that is anticipatable for this block. + Also compute the initial transparency settings. */ + for (bb = 0 ; bb < n_basic_blocks; bb++) { - if (bb != n_blocks - 1) + struct seginfo *ptr; + int last_mode = no_mode; + HARD_REG_SET live_now; + + REG_SET_TO_HARD_REG_SET (live_now, + BASIC_BLOCK (bb)->global_live_at_start); + for (insn = BLOCK_HEAD (bb); + insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); + insn = NEXT_INSN (insn)) { - sbitmap_intersect_of_successors (temp_bitmap, earlierin, - bb, s_succs); - changed |= sbitmap_a_or_b (earlierout[bb], - av_and_farther[bb], - temp_bitmap); + if (INSN_P (insn)) + { + int mode = MODE_NEEDED (e, insn); + rtx link; + + if (mode != no_mode && mode != last_mode) + { + last_mode = mode; + ptr = new_seginfo (mode, insn, bb, live_now); + add_seginfo (info + bb, ptr); + RESET_BIT (transp[bb], j); + } + + /* Update LIVE_NOW. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD) + reg_dies (XEXP (link, 0), live_now); + + note_stores (PATTERN (insn), reg_becomes_live, &live_now); + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_UNUSED) + reg_dies (XEXP (link, 0), live_now); + } + } + + info[bb].computing = last_mode; + /* Check for blocks without ANY mode requirements. */ + if (last_mode == no_mode) + { + ptr = new_seginfo (no_mode, insn, bb, live_now); + add_seginfo (info + bb, ptr); } - sbitmap_not (temp_bitmap, avloc[bb]); - changed |= sbitmap_a_and_b (earlierin[bb], - temp_bitmap, - earlierout[bb]); } - passes++; +#ifdef NORMAL_MODE + { + int mode = NORMAL_MODE (e); + + if (mode != no_mode) + { + edge eg; + + for (eg = ENTRY_BLOCK_PTR->succ; eg; eg = eg->succ_next) + { + bb = eg->dest->index; + + /* By always making this nontransparent, we save + an extra check in make_preds_opaque. We also + need this to avoid confusing pre_edge_lcm when + antic is cleared but transp and comp are set. */ + RESET_BIT (transp[bb], j); + + /* If the block already has MODE, pretend it + has none (because we don't need to set it), + but retain whatever mode it computes. */ + if (info[bb].seginfo->mode == mode) + info[bb].seginfo->mode = no_mode; + + /* Insert a fake computing definition of MODE into entry + blocks which compute no mode. This represents the mode on + entry. */ + else if (info[bb].computing == no_mode) + { + info[bb].computing = mode; + info[bb].seginfo->mode = no_mode; + } + } + + bb = n_basic_blocks - 1; + info[bb].seginfo->mode = mode; + } + } +#endif /* NORMAL_MODE */ } - /* We're done with this, so go ahead and free it's memory now instead - of waiting until the end of pre. */ - free (av_and_farther); - free (temp_bitmap); -} + kill = sbitmap_vector_alloc (n_basic_blocks, n_entities); + for (i = 0; i < max_num_modes; i++) + { + int current_mode[N_ENTITIES]; -/* Compute firstness. + /* Set the anticipatable and computing arrays. */ + sbitmap_vector_zero (antic, n_basic_blocks); + sbitmap_vector_zero (comp, n_basic_blocks); + for (j = n_entities - 1; j >= 0; j--) + { + int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i); + struct bb_info *info = bb_info[j]; - This is effectively the same as latestness computed on the reverse - flow graph. */ + for (bb = 0 ; bb < n_basic_blocks; bb++) + { + if (info[bb].seginfo->mode == m) + SET_BIT (antic[bb], j); -static void -compute_firstout (n_blocks, n_exprs, s_preds, avloc, earlierout, firstout) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - sbitmap *avloc; - sbitmap *earlierout; - sbitmap *firstout; -{ - int bb; - sbitmap temp_bitmap; + if (info[bb].computing == m) + SET_BIT (comp[bb], j); + } + } - temp_bitmap = sbitmap_alloc (n_exprs); + /* Calculate the optimal locations for the + placement mode switches to modes with priority I. */ - for (bb = 0; bb < n_blocks; bb++) - { - /* The first block is preceded only by the entry block; therefore, - temp_bitmap will not be set by the following call! */ - if (bb != 0) - { - sbitmap_intersect_of_predecessors (temp_bitmap, earlierout, - bb, s_preds); - sbitmap_not (temp_bitmap, temp_bitmap); - } - else + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + sbitmap_not (kill[bb], transp[bb]); + edge_list = pre_edge_lcm (file, 1, transp, comp, antic, + kill, &insert, &delete); + + for (j = n_entities - 1; j >= 0; j--) { - sbitmap_ones (temp_bitmap); + /* Insert all mode sets that have been inserted by lcm. */ + int no_mode = num_modes[entity_map[j]]; + + /* Wherever we have moved a mode setting upwards in the flow graph, + the blocks between the new setting site and the now redundant + computation ceases to be transparent for any lower-priority + mode of the same entity. First set the aux field of each + insertion site edge non-transparent, then propagate the new + non-transparency from the redundant computation upwards till + we hit an insertion site or an already non-transparent block. */ + for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--) + { + edge eg = INDEX_EDGE (edge_list, e); + int mode; + basic_block src_bb; + HARD_REG_SET live_at_edge; + rtx mode_set; + + eg->aux = 0; + + if (! TEST_BIT (insert[e], j)) + continue; + + eg->aux = (void *)1; + + mode = current_mode[j]; + src_bb = eg->src; + + REG_SET_TO_HARD_REG_SET (live_at_edge, + src_bb->global_live_at_end); + + start_sequence (); + EMIT_MODE_SET (entity_map[j], mode, live_at_edge); + mode_set = gen_sequence (); + end_sequence (); + + /* Do not bother to insert empty sequence. */ + if (GET_CODE (mode_set) == SEQUENCE + && !XVECLEN (mode_set, 0)) + continue; + + /* If this is an abnormal edge, we'll insert at the end + of the previous block. */ + if (eg->flags & EDGE_ABNORMAL) + { + emited = true; + if (GET_CODE (src_bb->end) == JUMP_INSN) + emit_insn_before (mode_set, src_bb->end); + /* It doesn't make sense to switch to normal mode + after a CALL_INSN, so we're going to abort if we + find one. The cases in which a CALL_INSN may + have an abnormal edge are sibcalls and EH edges. + In the case of sibcalls, the dest basic-block is + the EXIT_BLOCK, that runs in normal mode; it is + assumed that a sibcall insn requires normal mode + itself, so no mode switch would be required after + the call (it wouldn't make sense, anyway). In + the case of EH edges, EH entry points also start + in normal mode, so a similar reasoning applies. */ + else if (GET_CODE (src_bb->end) == INSN) + emit_insn_after (mode_set, src_bb->end); + else + abort (); + bb_info[j][src_bb->index].computing = mode; + RESET_BIT (transp[src_bb->index], j); + } + else + { + need_commit = 1; + insert_insn_on_edge (mode_set, eg); + } + } + + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + if (TEST_BIT (delete[bb], j)) + { + make_preds_opaque (BASIC_BLOCK (bb), j); + /* Cancel the 'deleted' mode set. */ + bb_info[j][bb].seginfo->mode = no_mode; + } } - sbitmap_a_and_b_or_c (firstout[bb], earlierout[bb], - avloc[bb], temp_bitmap); + + clear_aux_for_edges (); + free_edge_list (edge_list); } - free (temp_bitmap); -} -/* Compute reverse isolated. +#ifdef NORMAL_MODE + /* Restore the special status of EXIT_BLOCK. */ + n_basic_blocks--; + VARRAY_POP (basic_block_info); + EXIT_BLOCK_PTR->index = EXIT_BLOCK; +#endif - This is effectively the same as isolatedness computed on the reverse - flow graph. */ + /* Now output the remaining mode sets in all the segments. */ + for (j = n_entities - 1; j >= 0; j--) + { + int no_mode = num_modes[entity_map[j]]; -static void -compute_rev_isoinout (n_blocks, s_preds, avloc, firstout, - rev_isoin, rev_isoout) - int n_blocks; - int_list_ptr *s_preds; - sbitmap *avloc; - sbitmap *firstout; - sbitmap *rev_isoin; - sbitmap *rev_isoout; -{ - int bb, changed, passes; +#ifdef NORMAL_MODE + if (bb_info[j][n_basic_blocks].seginfo->mode != no_mode) + { + edge eg; + struct seginfo *ptr = bb_info[j][n_basic_blocks].seginfo; - sbitmap_vector_zero (rev_isoout, n_blocks); - sbitmap_zero (rev_isoin[0]); + for (eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next) + { + rtx mode_set; + + if (bb_info[j][eg->src->index].computing == ptr->mode) + continue; + + start_sequence (); + EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live); + mode_set = gen_sequence (); + end_sequence (); + + /* Do not bother to insert empty sequence. */ + if (GET_CODE (mode_set) == SEQUENCE + && !XVECLEN (mode_set, 0)) + continue; + + /* If this is an abnormal edge, we'll insert at the end of the + previous block. */ + if (eg->flags & EDGE_ABNORMAL) + { + emited = true; + if (GET_CODE (eg->src->end) == JUMP_INSN) + emit_insn_before (mode_set, eg->src->end); + else if (GET_CODE (eg->src->end) == INSN) + emit_insn_after (mode_set, eg->src->end); + else + abort (); + } + else + { + need_commit = 1; + insert_insn_on_edge (mode_set, eg); + } + } - passes = 0; - changed = 1; - while (changed) - { - changed = 0; - for (bb = 0; bb < n_blocks; bb++) + } +#endif + + for (bb = n_basic_blocks - 1; bb >= 0; bb--) { - if (bb != 0) - sbitmap_intersect_of_predecessors (rev_isoin[bb], rev_isoout, - bb, s_preds); - changed |= sbitmap_union_of_diff (rev_isoout[bb], firstout[bb], - rev_isoin[bb], avloc[bb]); + struct seginfo *ptr, *next; + for (ptr = bb_info[j][bb].seginfo; ptr; ptr = next) + { + next = ptr->next; + if (ptr->mode != no_mode) + { + rtx mode_set; + + start_sequence (); + EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live); + mode_set = gen_sequence (); + end_sequence (); + + /* Do not bother to insert empty sequence. */ + if (GET_CODE (mode_set) == SEQUENCE + && !XVECLEN (mode_set, 0)) + continue; + + emited = true; + if (GET_CODE (ptr->insn_ptr) == NOTE + && (NOTE_LINE_NUMBER (ptr->insn_ptr) + == NOTE_INSN_BASIC_BLOCK)) + emit_insn_after (mode_set, ptr->insn_ptr); + else + emit_insn_before (mode_set, ptr->insn_ptr); + } + + free (ptr); + } } - passes++; + + free (bb_info[j]); } + + /* Finished. Free up all the things we've allocated. */ + + sbitmap_vector_free (kill); + sbitmap_vector_free (antic); + sbitmap_vector_free (transp); + sbitmap_vector_free (comp); + sbitmap_vector_free (delete); + sbitmap_vector_free (insert); + + if (need_commit) + commit_edge_insertions (); + + if (!need_commit && !emited) + return 0; + + /* Ideally we'd figure out what blocks were affected and start from + there, but this is enormously complicated by commit_edge_insertions, + which would screw up any indices we'd collected, and also need to + be involved in the update. Bail and recompute global life info for + everything. */ + + allocate_reg_life_data (); + update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, + (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE + | PROP_SCAN_DEAD_CODE | PROP_REG_INFO)); + + return 1; } +#endif /* OPTIMIZE_MODE_SWITCHING */ -- cgit v1.1