/**
$Id: cssame.cc,v 2.26 1999/03/13 19:12:46 diego Exp $
*/
#include <set>

#include <suif_copyright.h>

#define _MODULE_ "CSSAME"

#include <values.h>
#include <ctype.h>

#include <iostream.h>
#include <iomanip.h>

#include <suif.h>
#include <par.h>
#include <odc-util.h>
#include <d_lib.h>

#include "ccfg.h"
#include "cssame.h"

using namespace std;

/**
Algorithm to build concurrent factored use-def chains. FUD chains have the
property that each use of a variable is reached by a single defintion: the
use-def chain for any one use is a single link to the unique reaching
definition. 

$\phi$-terms are inserted into the program when there are multiple reaching
definitions to one place. These $\phi$-terms are pseudo-assignments that
factor the multiple incoming reaching definitions.

FUD chains are an interpretation of the static single assignment (SSA) form of
the program. The difference is that FUD chains do not require the renaming of
variables to satisfy the single-assignment restriction.
*/
void
cssame::buildCSSAME()
{
    D_SELFTEST_HEADER(1, "cssame::cssame");
    D_SELFTEST(1) {
	cout << "Building CSSAME form for '" << _graph->tproc()->proc()->name()
	    << "'\n\n";
    }

    /* Gather all the symbols used in the procedure */
    D_SELFTEST(2) { cout << "Gathering symbols.\n"; }
    this->buildSymbolList();

    /* Add conflict and synchronization edges to the graph. */
    D_SELFTEST(2) { cout << "Adding conflict/synch edges to the graph.\n"; }
    this->addConfSyncEdges();

    /* Add ghost definitions in the first node for every variable in the
     * symbol table. This is useful to avoid PHI terms with NULL arguments.
     */
    D_SELFTEST(2) { cout << "Adding ghost definitions.\n"; }
    this->addGhostDefinitions();

    /* Compute dominance information needed by several methods */
    D_SELFTEST(2) { cout << "Computing dominance information.\n"; }
    _graph->find_dominators();
    _graph->find_postdominators();
    _graph->find_df();

    /* Find natural loops. Needed by cssame::computeOrdering to cluster all
     * the nodes in a loop in one big super-node. All the nodes in this
     * super-node will have the same precedence information than the
     * header. This makes the analysis less accurate but avoids computing
     * wrong precedence for nodes in a loop.
     */
    D_SELFTEST(2) { cout << "Finding natural loops.\n"; }
    _graph->find_natural_loops();

    D_SELFTEST(2) { cout << "Computing guaranteed execution ordering.\n"; }
    this->computeOrdering();

    D_SELFTEST(2) { cout << "Looking for mutex structures.\n"; }
    this->findMutexBodies();

    D_SELFTEST(1) { cout << "Computing sequential SSA form.\n"; }
    this->placePhiFunctions();
    this->placeFUDChains();

    D_SELFTEST(1) { cout << "Placing PI functions.\n"; }
    this->placePiFunctions();

    /* Before rewriting PI functions, we need to compute reaching
     * definitions. This information is used when checking for upward
     * exposed uses inside mutex bodies.
     */
    D_SELFTEST(2) { cout << "Computing reaching definitions.\n"; }
    this->computeReachingDefs();

    D_SELFTEST(1) { cout << "Rewriting PI functions.\n"; }
    this->rewritePiFunctions();

    D_SELFTEST(2) { cout << "Removing redundant PI and PHI functions.\n"; }
    this->removeRedundantPis();
    this->removeRedundantPhis();

    D_SELFTEST_FOOTER(1);
}


/**
Removes the CSSAME form from memory.
*/
cssame::~cssame()
{
    D_SELFTEST_HEADER(280, "cssame::~cssame");

    D_SELFTEST_FOOTER(280);
}



/**
Computes reaching definitions and reached uses for all the variable uses
and definitions in the program. Reaching definition lists are appended to
each use and to the statement that contains it. Reached uses lists are
appended to each definition and to the statement that contains it.
*/
void
cssame::computeReachingDefs()
{
    D_SELFTEST_HEADER(330, "cssame::computeReachingDefs");

    set_varref::iterator ref_iter = _graph->refs().begin();
    for (; ref_iter != _graph->refs().end(); ref_iter++) {
	p_varref ref = *ref_iter;

	D_SELFTEST(331) {
	    cout << "Processing reference "; ref->print(); cout << "\n\n";

	    cout << "Current reaching definitions:\n";
	    copy(ref->reachingDefs().begin(), ref->reachingDefs().end(),
		 ostream_iterator<p_vardef>(cout, "\n")
		);

	    cout << "\nCurrent reached uses:\n";
	    copy(ref->reachedUses().begin(), ref->reachedUses().end(),
		 ostream_iterator<p_varuse>(cout, "\n")
		);

	    cout << endl;
	}

	/* Set Marked(d) = NULL for every reference 'd' in the graph */
	ref->markedBy(NULL);

	/* Empty current reaching definition and reached refs information
	 * from the reference and from the statement containing it.
	 */
	ref->_rdefs.erase(ref->_rdefs.begin(), ref->_rdefs.end());
	ref->_ruses.erase(ref->_ruses.begin(), ref->_ruses.end());

	if (ref->instr()) {
	    set_reaching_defs(ref->instr(), &(ref->_rdefs));
	    set_reached_uses(ref->instr(), &(ref->_ruses));
	}
    }

    /* Traverse all the uses following their use-def chains looking for
     * reaching definitions.
     */
    set_varuse::iterator u = _graph->uses().begin();
    for (; u != _graph->uses().end(); u++) {
	p_varuse use = *u;
	D_SELFTEST(331) {
	    cout << "\nFollowing use-def chain for: " << use << endl;
	}
	this->followChain(use->chain(), use);
    }


    D_SELFTEST(330) {
	cout << endl << "Reaching definitions for all uses in the program:\n";
	u = _graph->uses().begin();
	for (; u != _graph->uses().end(); u++) {
	    p_varuse use = *u;
	    cout << "Use: " << use << "\n\t";
	    copy(use->reachingDefs().begin(), use->reachingDefs().end(),
		 ostream_iterator<p_vardef>(cout, "\n\t"));
	    cout << endl;
	}

	cout << endl << "Reached uses for all definitions in the program:\n";
	set_vardef::iterator d = _graph->defs().begin();
	for (; d != _graph->defs().end(); d++) {
	    p_vardef def = *d;
	    cout << "Definition: " << def << "\n\t";
	    copy(def->reachedUses().begin(), def->reachedUses().end(),
		 ostream_iterator<p_varuse>(cout, "\n\t"));
	    cout << endl;
	}
    }

    D_SELFTEST_FOOTER(330);
}


/**
Follows the factored use-def links to find all possible reaching definitions
for the given use. Adapted for explicitly parallel programs from "High
Performance Compilers for Parallel Computing", M. Wolfe, page 182. The steps of
the algorithm are included in the comments.

\begin{verbatim}
    if Marked(d) = u then return endif
    Marked(d) = u
    if d is a definition for u then
        add d to Defs(u)
    endif
    if d is a phi-term then
        for each link j do
	    followChain(phi-chain(d)[j], u)
        endfor
    else if d is a killing definition then
       [ stop here ]
    else [ follow def-def link ]
        followChain(Chain(d), u)
    endif
\end{verbatim}
*/
void
cssame::followChain(p_vardef d, p_varuse u)
{
    D_SELFTEST_HEADER(300, "cssame::followChain");

    D_SELFTEST(300) {
	cout << "Following factored use-def chain from use: " << u << endl
	    <<  "to definition:                             " << d << endl;
    }

    /* If the definition is null, don't do anything */
    if (!d) { D_SELFTEST_FOOTER(300); return; }

    /* if Marked(d) = u then return endif */
    if (d->isMarkedBy(u)) { D_SELFTEST_FOOTER(300); return; }

    /* Marked(d) = u */
    d->markedBy(u);

    /* if d is a definition for u then
     *    add d to Defs(u)
     * endif
     */
    if (d->isD() && d->var() == u->var()) {
	D_SELFTEST(300) { cout << "The definition is a real definition.\n"; }

	/* Add d to the list of reaching defs of u */
	u->reachingDefs().push_back(d);

	/* Add u to the list of reached uses of d. This adds def-use edges
	 * to the graph.
	 */
	d->reachedUses().push_back(u);

	D_SELFTEST(301) {
	    cout << endl << "Added '" << d << "' to the list of reaching "
		<< "defs for" << endl << " the use '" << u << "'" << endl
		<< "and instruction #" << u->instr()->number() << endl;
	}
    }

    /* if d is a phi-term then
     *     for each link j do
     *        FollowChain(phi-chain(d)[j], u)
     *     endfor
     * else if d is a killing definition then
     *     stop here
     * else (follow a def-def link)
     *     FollowChain(Chain(d), u)
     * endif
     */
    if (d->isPHI() || d->isPI()) {
	p_phiterm phi = d;

	D_SELFTEST(300) { cout << "The definition is a PHI/PI term.\n"; }

	/* Before following the chains from the arguments in the Phi/Pi
	 * term, we check if the term has not been removed from the graph.
	 */
	if (!phi->removed()) {
	    vector_vardef::iterator iter = phi->phi_chain().begin();
	    for ( ; iter != phi->phi_chain().end(); iter++) {
		D_SELFTEST(301) { cout << "Following argument: " << *iter; }
		this->followChain(*iter, u);
	    }
	}
    } else {
	/* We do nothing. We should add extra cases for def-def links, but
	 * we still don't have them in place.
	 */
    }

    D_SELFTEST_FOOTER(300);
}


/**
Prints the original program in pseudo-SSA form. Only $\phi$ and $\pi$ terms
are shown. Variables are not actually renumbered. This assumes that FUD
chains have already been built.
*/
void
cssame::print(ostream& f) const
{
    /* Output the original lines from the source file. When we find a phi or pi
     * term for the current line we print the term(s) that go in that line
     * before the source line.
     */
    char *fname = source_file_name(_graph->tproc().ptr());
    unsigned start = source_line_num(_graph->tproc().ptr());
    if (start > 1) { start--; }	/* Add some context lines */
    unsigned end;

    /* Gather all the phi/pi terms into a set so that they are sorted by
     * line number.
     */
    set_phiterm phi_set;
    copy(_graph->phiterms().begin(), _graph->phiterms().end(),
	    inserter(phi_set, phi_set.begin()));
    copy(_graph->piterms().begin(), _graph->piterms().end(),
	    inserter(phi_set, phi_set.begin()));

    for (set_phiterm::iterator j = phi_set.begin(); j != phi_set.end(); j++) {
	p_phiterm cref = *j;
	end = cref->lineno();
	char *lastline = displayFile(fname, start, end - 1, DF_LINENO, f);

	/* Only print phi and pi terms for user defined variables */
	if (cref->var()->is_userdef()) {
	    f << setw(7) << end << " ";
	    while (isspace(*lastline)) {
		f << *lastline;
		lastline++;
	    }
	    cref->print(f); cout << ";" << endl;
	}

	start = end;
    }

    end = source_line_num(_graph->tproc()->body()->tail()->contents);
    displayFile(fname, start, end, DF_LINENO, f);
}

ostream& operator<<(ostream& f, const cssame& c)
{
    c.print(f);

    return f;
}

//***************************************************************************
//			     Internal methods
//***************************************************************************
/**
Computes guaranteed execution ordering for all the nodes in the graph. Based on
'Concurrent Static Single Assignment Form and Constant Propagation for
Explicitly Parallel Programs', Padua, Midkiff & Lee LCPC '97.
*/
void
cssame::computeOrdering()
{
    D_SELFTEST_HEADER(430, "cssame::computeOrdering");

    /* Initialize the _prec, _prec_ct and _prec_sy bitvector sets in every
     * node of the graph.
     */
    unsigned num = _graph->num_nodes();
    unsigned i;
    for (i = 0; i < num; i++) {
	_graph->node(i)->_prec.expand(0, num);
	_graph->node(i)->_prec_ct.expand(0, num);
	_graph->node(i)->_prec_sy.expand(0, num);
    }

    /* Create a new work queue and initialize it with the successors to the
     * entry node.
     */
    ccfg_node_list *queue = new ccfg_node_list;
    ccfg_node_list_iter node_iter(_graph->entry_node()->succs());
    while (!node_iter.is_empty()) {
	queue->append(node_iter.step());
    }

    /* while Q is not empty do
     *     n = the first entry in Q
     *     prec_old = prec(n)
     *     if Ntype(n) is Coend then
     *         prec_ct(n) = Union{(m,n) in Ect} (Prec(m) U {n})
     *     else
     *         prec_ct(n) = Intersection{(m, n) in Ect} (Prec(m) U {n})
     *     endif
     *     prec_sy(n) = Intersection{(m, n) in Esy} (Prec(m) U {n})
     *     prec(n) = prec_ct(n) U prec_sy(n)
     *     if prec_old <> prec(n) then
     *         Put control flow and synchronization successors of n in Q
     *     endif
     * endwhile
     */
    while (!queue->is_empty()) {
	ccfg_node *n = queue->pop();

	D_SELFTEST(431) {
	    cout << endl << "Computing Prec(" << n->number()
		 << ") -> loop depth: " << _graph->loop_depth(n) << endl 
		;
	}

	bit_set prec_old(0, num);
	prec_old.copy(&(n->_prec));

	/* if Ntype(n) is Coend then
	 *     prec_ct(n) = Union{(m,n) in Ect} (Prec(m) U {n})
	 * else
	 *     prec_ct(n) = Intersection{(m, n) in Ect} (Prec(m) U {n})
	 * endif
	 */
	ccfg_node_list_iter preds_iter(n->preds());

	if (preds_iter.is_empty() == false) {
	    /* If n is coend we initialize its _prec_ct set to empty
	     * because we will be adding elements. Otherwise we initialize
	     * it to all ones because we will be intersecting elements.
	     */
	    if (n->is_coend()) {
		n->_prec_ct.clear();
	    } else {
		n->_prec_ct.universal();
	    }

	    /* Node n always goes in its own _prec_ct set */
	    n->_prec_ct.add(n->number());
	}

	while (preds_iter.is_empty() == false) {
	    ccfg_node *pred = preds_iter.step();

	    /* If n is a loop header we must avoid processing any
	     * predecessor for n inside the body of the loop (ie, the
	     * last node of the body). Therefore, if the predecessor
	     * pred has the same loop_depth than n and n is a loop
	     * header, we ignore the predecessor.
	     */
	    if (_graph->is_loop_begin(n) && 
		_graph->loop_depth(n) == _graph->loop_depth(pred)) {
		continue;
	    }

	    bit_set ancestor_prec(0, num);
	    ancestor_prec.copy(&(pred->_prec));
	    ancestor_prec.add(n->number());

	    if (n->is_coend()) {
		n->_prec_ct += ancestor_prec;
		D_SELFTEST(431) {
		    cout << "\tAdded _prec(" << pred->number()
			<< ") to _prec_ct(" << n->number() << "): ";
		    n->_prec_ct.print(stdout, "%d ");
		    cout << endl;
		}
	    } else {
		n->_prec_ct *= ancestor_prec;
		D_SELFTEST(431) {
		    cout << "\tIntersected _prec(" << pred->number()
			<< ") to _prec_ct(" << n->number() << "): ";
		    n->_prec_ct.print(stdout, "%d ");
		    cout << endl;
		}
	    }
	}

	/* prec_sy(n) = Intersection{(m, n) in Esy} (Prec(m) U {n}) */
	preds_iter.reset(n->spreds());
	if (preds_iter.is_empty() == false) {
	    n->_prec_sy.universal();
	}
	while (preds_iter.is_empty() == false) {
	    ccfg_node *pred = preds_iter.step();

	    bit_set ancestor_prec(0, num);
	    ancestor_prec.copy(&(pred->_prec));
	    ancestor_prec.add(n->number());

	    n->_prec_sy *= ancestor_prec;

	    D_SELFTEST(431) {
		cout << "\tIntersected _prec(" << pred->number()
		    << ") to _prec_sy(" << n->number() << "): ";
		n->_prec_sy.print(stdout, "%d ");
		cout << endl;
	    }
	}

	/* prec(n) = prec_ct(n) U prec_sy(n) */
	n->_prec.set_union(&(n->_prec_ct), &(n->_prec_sy));

	D_SELFTEST(431) {
	    cout << "\tprec_old is: "; prec_old.print(stdout, "%d ");
	    cout << endl << "\tn->_prec is: "; n->_prec.print(stdout, "%d ");
	    cout << endl;
	}

	if (prec_old != n->_prec) {
	    D_SELFTEST(431) {
		cout << "\tAppending control successors of node "
			 << n->number() << " d to work queue: ";
		ccfg_node_list_iter node_iter(n->succs());
		while (node_iter.is_empty() == false) {
		    cout << node_iter.step()->number() << " ";
		}
		cout << endl;

		cout << "\tAppending synchronization successors of node "
			 << n->number() << " to work queue: ";
		node_iter.reset(n->ssuccs());
		while (node_iter.is_empty() == false) {
		    cout << node_iter.step()->number() << " ";
		}
		cout << endl;
	    }

	    ccfg_node_list_iter node_iter(n->succs());
	    while (!node_iter.is_empty()) {
		queue->append(node_iter.step());
	    }

	    node_iter.reset(n->ssuccs());
	    while (!node_iter.is_empty()) {
		queue->append(node_iter.step());
	    }
	}
    }

    /* Traverse all the loop bodies assigning to each node n in the body the
     * same prec() set than the outermost header loop node enclosing n.
     */
    for (i = 0; i < num; i++) {
	p_ccfg_node node = _graph->node(i);
	if (_graph->loop_depth(node) > 0) {
	    p_ccfg_node header = _graph->outermostLoopHeader(node);

	    assert_msg(header, ("Node %d has a loop depth > 0. It should"
		    " have a loop header!\n", node->number()));

	    if (header != node) {

		D_SELFTEST(432) {
		    cout << "Setting prec(" << node->number() << ") = prec(" 
			<< header->number() << endl;
		}

		node->_prec.copy(&(header->_prec));
	    }
	}
    }


    D_SELFTEST(430) {
	cout << endl << "Guaranteed execution ordering Prec(n)" << endl;
	unsigned i;
	for (i = 0; i < _graph->num_nodes(); i++) {
	    cout << "\tPrec(" << i << ") = ";
	    _graph->node(i)->_prec.print(stdout, "%d ");
	    cout << endl;
	}
	cout << endl;
    }

    delete queue;

    D_SELFTEST_FOOTER(430);
}



/**
Algorithm to place phi-functions in the graph. Taken from "High Performance
Compilers for Parallel Computing", M. Wolfe, page 175. The steps of the
algorithm are also included in comments:

\begin{verbatim}
Find dominance frontier for nodes in the graph

for X in V do
   inWork(X) = Empty;
   added(X) = Empty;
endfor

workList = Empty;
for M in Symbols do
    for X in D(M) do
        workList = workList U {X}
        inWork(X) = M;
        
    endfor
    while workList <> Empty do
        remove some node X from workList
        for W in DF(X) do
            if added(W) <> M then
               add phi-term for M at W
               added(W) = M
               if inWork(W) <> M then
                   workList = workList U {W}
                   inWork(W) = M
               endif
            endif
        endfor
    endwhile
endfor
\end{verbatim}
*/
void
cssame::placePhiFunctions()
{
    D_SELFTEST_HEADER(350, "cssame::placePhiFunctions");

    vector_var_sym added(_graph->num_nodes());
    vector_var_sym inWork(_graph->num_nodes());
    unsigned x;

    D_SELFTEST(350) {
	cout << "Dominance frontier for all the nodes in the graph" << endl;

	for (x = 0; x < _graph->num_nodes(); x++) {
	    cout << "DF(" << x << ") = { ";
	    bit_set *df = _graph->dom_frontier(_graph->node(x));
	    bit_set_iter df_iter(df);
	    while (df_iter.is_empty() == false) {
		int w = df_iter.step();
		cout << w << " ";
	    }
	    cout << " }" << endl;
	}
    }

    /* for X in V do
     *    inWork(X) = Empty;
     *    added(X) = Empty;
     * endfor
     */
    for (x = 0; x < _graph->num_nodes(); x++) {
	inWork[x] = NULL;
	added[x] = NULL;
    }

    /* workList = Empty; */
    list_ccfg_node workList;

    /* for M in Symbols do */
    set_var_sym::iterator symbols_iter = _symbols.begin();

    for (; symbols_iter != _symbols.end(); symbols_iter++) {
	p_var_sym m = *symbols_iter;

	D_SELFTEST(350) {
	    cout << "\n\nProcessing variable " << m << endl;
	}

	/* for X in D(M) do
	 *    workList = workList U {X}
	 *    inWork(X) = M;
	 * endfor
	 */
	set_vardef *defs = ::get_vardefs(m.ptr());
	set_vardef::iterator defs_iter = defs->begin();
	for (; defs_iter != defs->end(); defs_iter++) {
	    p_vardef def = *defs_iter;
	    p_ccfg_node node = def->node();
	    if (!node || node->parent() != _graph.ptr()) {
		D_SELFTEST(350) {
		    cout << "Ignoring definition " << def << endl;
		    cout << "Because it comes from the external node " << node
			<< endl;
		}
		continue; /* Ignore defs made in another graph because they
			     can't possibly affect the sequential SSA form
			     for this procedure. 
			   */
	    }

	    D_SELFTEST(350) {
		cout << "Found definition " << def << endl;
		cout << "Adding node " << node << " to work list for variable "
		    << m << endl << endl;
	    }
	    workList.push_back(node);
	    inWork[node->number()] = m;
	}

	/* while workList <> Empty do */
	while (workList.size() > 0) {
	    /* remove some node X from workList */
	    p_ccfg_node node = workList.back(); workList.pop_back();

	    /* for W in DF(X) do */
	    bit_set *df = _graph->dom_frontier(node);
	    bit_set_iter df_iter(df);
	    while (df_iter.is_empty() == false) {
		/* if added(W) <> M then
		 *    add phi-term for M at W
		 *    added(W) = M
		 *    if inWork(W) <> M then
		 *        workList = workList U {W}
		 *        inWork(W) = M
		 *    endif
		 * endif
		 */
		int w = df_iter.step();
		if (added[w] != m) {
		    D_SELFTEST(350) {
			cout << "Add phi-term for '" << m->name()
			    << "' at node " << w << endl;
		    }

		    phiterm *phi = new phiterm(m, _graph->node(w));
		    phi = phi;	/* dummy ref to avoid compiler warnings */
		    added[w] = m;
		    if (inWork[w] != m) {
			workList.push_back(_graph->node(w));
			inWork[w] = m;
		    }
		}
	    }
	}
    }

    D_SELFTEST_FOOTER(350);
}


/**
Set up factored use-def chains. This method links each variable use with its
unique reaching definition or pseudo-assignment and fills in the phi-term
links. Taken from "High Performance Compilers for Parallel Computing", M.
Wolfe, page 178.
*/
void
cssame::placeFUDChains()
{
    /* for M in Symbols do
     *     CurrDef(M) = Empty;
     * endfor
     */
    set_var_sym::iterator symbols_iter = _symbols.begin();
    for (; symbols_iter != _symbols.end(); symbols_iter++) {
	/* Remove the existing k_currdef annotation */
	set_currdef((*symbols_iter), NULL);
    }

    /* Search(Entry) */
    this->searchFUDChains(_graph->entry_node());
}


/**
Performs a depth-first traversal of the dominator tree looking for FUD
(Factored Use-Def) chains. Taken from "High Performance Compilers for Parallel
Computing", M. Wolfe, page 178.
*/
void
cssame::searchFUDChains(p_ccfg_node x)
{
    D_SELFTEST_HEADER(340, "cssame::searchFUDChains");

    D_SELFTEST(340) {
	cout << "Searching FUD chains in node " << x->number() << endl
	    << "Variables referenced in this node" << endl;

	copy(x->refs().begin(), x->refs().end(),
		ostream_iterator<p_varref>(cout, "\t\n")
	    );
    }

    /* for each variable use or def or phi-term R in X do
     *     let M be the variable referenced at R
     *     if R is a use then
     *         Chain(R) = CurrDef(M)
     *     else if R is a def or $\phi$-term then
     *         SaveChain(R) = CurrDef(M)
     *         CurrDef(M) = R
     *     endif
     * endfor
     */
    set_varref::iterator ref_iter = x->refs().begin();
    for (; ref_iter != x->refs().end(); ref_iter++) {
	p_varref ref = *ref_iter;
	p_var_sym var = ref->var();

	/* Retrieve the current definition for the variable */
	p_vardef currdef = get_currdef(var);

	D_SELFTEST(342) {
	    cout << "Current definition for " << var << ": " << currdef << endl;
	}
	
	if (ref->isU()) {
	    ref->set_chain(currdef);
	    /* Besides setting a link back to our immediate control
	     * reaching definition (whether a $\phi$ term or an actual
	     * definition), we want to link that definition to its
	     * immediate use.
	     *
	     * If this use (ref) has a current definition (currdef), we add
	     * 'ref' to the list of uses immediately reached by 'currdef'.
	     */
	    if (currdef) {
		currdef->immediateUses().push_back(ref);
	    }
	} else if (ref->isD() || ref->isPHI()) {
	    ref->set_save_chain(currdef);
	    /* Replace the current k_currdef with a new one */
	    set_currdef(var, ref);

	    D_SELFTEST(342) {
		cout << "New definition for " << var->name() << ": ";
		ref->print();
		cout << endl;
	    }
	}
    }

    /* for Y in SUCC(X) do
     *    J = WhichPred(X -> Y)
     *    for each phi-term R in Y do
     *        let M be the variable referenced at R
     *        phi-chain(R)[J] = CurrDef(M)
     *    endfor
     * endfor
     */
    int j;
    for (j = 0; j < x->succs()->count(); j++) {
	ccfg_node *y = (*(x->succs()))[j];

	set_phiterm::iterator phi_iter = y->phiterms().begin();
	for (; phi_iter != y->phiterms().end(); phi_iter++) {
	    p_phiterm phi = *phi_iter;
	    p_var_sym var = phi->var();
	    p_vardef currdef = get_currdef(var);
	    phi->phi_chain().push_back(currdef);

	    D_SELFTEST(342) {
		cout << "Updated phi-term "; phi->print();
		cout << endl << "\twith argument " << currdef << endl;
	    }
	}
    }


    /* for Y in Child(X) do	<-- Child(X) is the set of dominator
     * 	   Search(Y)                children of X in the dominator tree.
     * endfor
     */
    for (j = 0; j < (int)_graph->num_nodes(); j++) {
	p_ccfg_node y = _graph->node(j);
	if (_graph->immed_dom(y) == x) {
	    D_SELFTEST(342){
		cout << "Node " << x->number() 
		     << " -> searching dominator child " << y->number() << endl;
	    }

	    this->searchFUDChains(y);
	}
    }

    /* for each variable use or def or phi-term R in X in reverse order do
     *     let M be the variable referenced at R
     *     if R is a def or a phi-term then
     *         CurrDef(M) = SaveChain(R)
     *     endif
     * endfor
     */
    set_varref::reverse_iterator i = x->refs().rbegin();
    for (i = x->refs().rbegin(); i != x->refs().rend(); i++) {
	p_varref ref = *i;
	if (ref->isD() || ref->isPHI()) {
	    /* Restore the current definition for the variable */
	    set_currdef(ref->var(), ref->save_chain());
	}
    }
}


/**
Places pi-terms at concurrent join nodes. A concurrent join node is a node x
that has at least one incoming DU conflict edge for a variable v. This
algorithm assumes that phi-terms have already been placed. The conflicts for a
node X are those nodes that have X as the conflict head. This makes it easier
when looking for all the incoming DU edges into each node.

Adapted from "Concurrent Static Single Assignment Form and Constant Propagation
for Explicitly Parallel Programs", J. Lee, S. P. Midkiff and D. A. Padua. LCPC
'97.
*/
void
cssame::placePiFunctions()
{
    D_SELFTEST_HEADER(290, "cssame::placePiFunctions");

    unsigned n;
    for (n = 0; n < _graph->num_nodes(); n++) {
	p_ccfg_node node = _graph->node(n);

	D_SELFTEST(290) {
	    cout << endl;
	    cout << "Checking conflicts for node"; node->print(); cout << endl;
	}

	if (node->conflicts().size() == 0) {
	    D_SELFTEST(290) { cout << "No conflicts. Skipping.\n\n"; }
	    continue;
	}

	D_SELFTEST(290) {
	    cout << "\nConflicts found\n";
	    copy(node->conflicts().begin(), node->conflicts().end(),
		    ostream_iterator<conflict>(cout, "")
		);
	    cout << endl;
	}

	/* The first argument to the PI function is the incoming control
	 * edge. Because of the way the flow graph is constructed there
	 * must only be one incoming control edge.
	 */
	assert(node->preds()->count() == 1);

	/* We traverse the incoming conflict edges to this node. For every
	 * incoming DU edge we create a new PI node for the conflicting
	 * variable if one hasn't been created already. If a PI term
	 * already exists for the variable, we add the reference to the
	 * list of arguments.
	 */
	list_conflict::iterator conf_iter = node->conflicts().begin();
	for (; conf_iter != node->conflicts().end(); conf_iter++) {
	    conflict conf = *conf_iter;
	    p_varref use = conf.head();
	    p_varref def = conf.tail();
	    if (def->isD() && use->isU()) {
		p_var_sym var = use->var();

		p_phiterm newpi = node->hasPi(var);
		if (!newpi) {
		    newpi = new phiterm(var, node, PI);

		    /* Since every PI term must contain an argument for the
		     * incoming control flow edge, we add the control
		     * reaching definition (cr_def) for the variable to the
		     * PI term. The incoming definition is the chain
		     * element for the conflicting reference (computed when
		     * we placed phi-terms).
		     * 
		     * A NULL incoming control definition means that
		     * there's something wrong. Every variable should have
		     * at least one definition.
		     */
		    p_vardef cr_def = use->chain();
		    assert(cr_def);
		    newpi->phi_chain().push_back(cr_def);
		}

		/* Before adding the arguments to the new pi-term we should
		 * compute the guaranteed ordering information. Some
		 * arguments to the pi-term might be eliminated due to
		 * guaranteed ordering between the concurrent basic blocks.
		 *
		 * If the use of the variable is guaranteed to execute
		 * before the incoming definition, then we don't need to
		 * add an argument for that particular definition because
		 * it can't possibly affect this use.
		 */
		D_SELFTEST(290) {
		    cout << "Checking conflict for variable " << var->name() 
			<< endl;
		    cout << "Definition "; def->print(); cout << endl;
		    cout << "Use        "; use->print(); cout << endl;
		}

		if (def->node()->prec().contains(use->node()->number())) {
		    /* Do nothing. The use precedes the def, so the
		     * definition cannot reach it. Therefore, the
		     * definition will not be added to the arguments of the
		     * PI term.
		     */
		    D_SELFTEST(290) {
			cout << "The use precedes the definition. "
			     << "No argument added to the PI term\n";
		    }
		} else {
		    D_SELFTEST(290) {
			cout << "The definition precedes the use. "
			     << "Adding definition to the PI term\n";
		    }

		    newpi->phi_chain().push_back(def);
		}

		/* Finally, we chain the use for the var to the pi-term we
		 * just added. (ie, link this use to its reaching def). We
		 * also link the PI term to the use by adding this use to
		 * the list of immediate reached uses for the PI term.
		 */
		use->set_chain(newpi);
		newpi->immediateUses().push_back(use);

		D_SELFTEST(290) {
		    cout << "Added/modified PI term for variable '"
			<< var->name() << "': "; newpi->print(); cout << endl;
		}
	    }
	}
    }

    D_SELFTEST_FOOTER(290);
}


/**
This method removes arguments to PI terms based on the mutex bodies found in
the program. 
*/
void
cssame::rewritePiFunctions()
{
    D_SELFTEST_HEADER(410, "cssame::rewritePiFunctions");

    D_SELFTEST(410) {
	cout << "CSSAME before rewriting PI terms" << endl << *this << "\n\n";

	set_var_sym::iterator mutex_iter = _mutexVars.begin();
	for (; mutex_iter != _mutexVars.end(); mutex_iter++) {
	    p_var_sym var = *mutex_iter;

	    cout << endl << "Mutex structure for synchronization variable '"
		<< var->name() << "'" << endl;

	    mutex_struct *s = get_mutex_struct(var);
	    copy(s->begin(),s->end(),ostream_iterator<p_mutex_body>(cout,"\n"));
	}
    }

    /* First step. Compute the set of definitions that reach the exit of
     * each mutex body in every mutex structure. Also compute the list of
     * upward exposed uses for the mutex body.
     */
    D_SELFTEST(410) {
	cout << "First step. Compute exit reaching defs and upward exposed "
	    << "uses for each" << endl << "mutex body" << endl << endl;
    }
    set_var_sym::iterator mutex_iter = _mutexVars.begin();
    for (; mutex_iter != _mutexVars.end(); mutex_iter++) {
	p_var_sym var = *mutex_iter;
	mutex_struct *mxstruct = get_mutex_struct(var);

	D_SELFTEST(410) {
	    cout << "\nMutex bodies for variable " << var << endl;
	}

	mutex_struct::iterator interval_iter = mxstruct->begin();
	for (; interval_iter != mxstruct->end(); interval_iter++) {
	    p_mutex_body interval = *interval_iter;
	    interval->computeExitRDefs();
	    interval->computeUpwardExposedUses();

	    D_SELFTEST(410) {
		cout << "\nMutex body: " << interval << endl;
		cout << "Exit reaching definitions:\n\t";
		copy(interval->exitRDefs().begin(), interval->exitRDefs().end(),
		    ostream_iterator<p_vardef>(cout, "\n\t"));
		cout << endl;
		cout << "Upward exposed uses:\n\t";
		copy(interval->upwardExposedUses().begin(), 
		     interval->upwardExposedUses().end(),
		    ostream_iterator<p_varuse>(cout, "\n\t"));
		cout << endl;
	    }
	}
    }


    /* Second step. For each PI term in every body of a mutex structure,
     * remove those arguments that come from other mutex bodies and are not
     * in the set of exit reaching definitions.
     */
    D_SELFTEST(410) {
	cout << "Second step. Remove PI-term arguments." << endl;
    }

    mutex_iter = _mutexVars.begin();
    for (; mutex_iter != _mutexVars.end(); mutex_iter++) {
	p_var_sym var = *mutex_iter;

	mutex_struct *mxstruct = get_mutex_struct(var);
	mutex_struct::iterator interval_iter = mxstruct->begin();
	for (; interval_iter != mxstruct->end(); interval_iter++) {
	    p_mutex_body interval = *interval_iter;
	    D_SELFTEST(410) {
		cout << endl;
		cout << "Rewriting PI terms for variable " << var << endl;
		cout << "inside mutex body " << interval << endl;
	    }
	    interval->rewritePiFunctions();
	}
    }

    D_SELFTEST(410) {
	cout << "\nCSSAME after rewriting PI terms" << endl << *this << "\n\n";
    }

    D_SELFTEST_FOOTER(410);
}


/**
Remove $\pi$ terms that have no effect on the program. This method analyzes
all the $\pi$ terms in the program. A $\pi$ term that only has one
argument is superfluous because it means that all the conflicts that it is
supposed to model have been eliminated by the synchronization analysis
phases.
*/
void
cssame::removeRedundantPis()
{
    D_SELFTEST_HEADER(370, "cssame::removeRedundantPis");

    /* Once all the PI functions have been examined, we check how many
     * arguments are left in each PI term. If only one argument is left, it
     * means that all the conflicts were eliminated by the lock. Therefore, the
     * PI function is superfluous and can be eliminated.
     *
     * Before we remove the PI function from the graph we have to update
     * the phi chaining for the reference it is affecting. That is, the
     * reference 'u' that this PI function is reaching is a variable use
     * whose control reaching definition is the PI term itself. Since the
     * PI term is going to be eliminated, we link 'u' to the first argument
     * of the PI term.
     */
    set_phiterm::iterator pi_iter = _graph->piterms().begin();
    for (; pi_iter != _graph->piterms().end(); pi_iter++) {
	p_phiterm pi = *pi_iter;
	if (pi->removed()) {
	    continue;
	}

	if (pi->phi_chain().size() == 1) {
	    /* The first (and only) use in the $\pi$-term's list of
	     * immediate uses is the reference that we have to fix. We
	     * link that reference's UD chain (varref::chain()) to the
	     * control reaching definition of the $\pi$ term. After doing
	     * that, we remove the $\pi$ term from the $\pi$ list.
	     */
	    p_varuse use = pi->immediateUses().front();
	    p_vardef ctrl_rdef = pi->phi_chain().front();
	    assert(use);
	    use->set_chain(ctrl_rdef);

	    D_SELFTEST(370) {
		cout << "Removing PI term " << pi;
		cout << "\n\twith control reaching def: " << ctrl_rdef;
		cout << "\n\tand immediate use: " << use;
		cout << "\n\tThe new control reaching def for the use is: "
		    << use->chain() << endl;
	    }

	    /* [HACK] Implement smart pointers! (4-Feb-99) */
	    /* delete pi; */
	    pi->remove();
	}
    }

    D_SELFTEST_FOOTER(370);
}


/**
Remove $\phi$ terms that have no effect on the program. This is done
using a refinement on the rules set out in Lee et. al's paper "Concurrent
Static Single Assignment Form and Constant Propagation for Explicitly
Parallel Programs" (LCPC'97).

Specifically, a PHI term is superfluous at a coend node if all of its
parameters are the same or all but one of its parameters are defined before
the corresponding cobegin node.

Notice that since we use dominance frontiers to place $\phi$ terms (as
opposed to the Brandis/Mossenbock approach used by Lee et al), we never
place a $\phi$ term that will have all its arguments the same.

If all the arguments but one come from nodes before the cobegin node, this
means that only one of the threads makes a new definition for the variable.
This definition will always overwrite the definitions made before the
cobegin, so the $\phi$ term is not needed.


We refine these rules using the following observation:

Let $p$ be the $\phi$-term being analyzed. Any argument to $p$ coming from
nodes prior to the 'cobegin' node is not needed because it will be
superceded by definitions made inside the cobegin/coend structure.

This is correct because of the following reasoning:
1- A $\phi$ term at a coend node implies that there is at least one definition
   for the variable inside the cobegin/coend structure.

2- Since all the branches of cobegin/coend are always executed, definitions
   inside cobegin/coend always kill definitions made prior to cobegin.

Notice that this is correct even if the definitions made inside
cobegin/coend are inside conditional structures. When that happens, the
definition reaching the coend node will be a $\phi$ term which will have the
definition prior to the cobegin as an argument, so we haven't lost
anything.

At the end of the analysis, if $p$ is left with only one argument, it can
be safely eliminated. All the immediate uses of $p$ must be updated so that
their control reaching definition is $p$'s only argument.
*/
void
cssame::removeRedundantPhis()
{
    D_SELFTEST_HEADER(360, "cssame::removeRedundantPhis");

    /* Examine all the PHI terms at coend nodes in the program to see if
     * they are superfluous. 
     */
    unsigned n;
    for (n = 0; n < _graph->num_nodes(); n++) {
	p_ccfg_node node = _graph->node(n);

	if (!node->is_coend()) {
	    continue;
	}

	D_SELFTEST(360) {
	    cout << "Analyzing coend node " << node->number() 
		<< ": "; node->print();
	    cout << endl;
	}

	p_ccfg_coend coend = node;
	p_ccfg_cobegin cobegin = coend->companion();
	assert(cobegin);


	/* Traverse all the PHI terms at the coend node. Analyze them and
	 * remove arguments that comply with the following condition:
	 *
	 * Let $p$ be the PHI term being analyzed. Any argument to $p$
	 * coming from nodes prior to the 'cobegin' node is not needed
	 * because it will be superceded by definitions made inside the
	 * cobegin/coend structure.
	 *
	 * This is correct because of the following reasoning:
	 * 1- A PHI term at a coend node implies that there is at least one
	 *    definition  for the variable inside the cobegin/coend
	 *    structure.
	 *
	 * 2- Since all the branches of cobegin/coend are always
	 *    executed, definitions inside cobegin/coend always kill
	 *    definitions made prior to cobegin.
	 *
	 * Notice that this is correct even if the definitions made inside
	 * cobegin/coend are inside conditional structures. When that
	 * happens, the definition reaching the coend node will be a PHI
	 * term which will have the definition prior to the cobegin as an
	 * argument, so we haven't lost anything.
	 *
	 * At the end of the analysis, if $p$ is left with only one
	 * argument, it can be safely eliminated. All the immediate uses of
	 * $p$ must be updated so that their control reaching definition is
	 * $p$'s only argument.
	 */
	set_varref::iterator refs_iter = coend->refs().begin();
	for (; refs_iter != coend->refs().end(); refs_iter++) {
	    p_varref ref = *refs_iter;

	    /* A coend node should only contain PHI terms */
	    assert(ref->isPHI());
	    p_phiterm phi = ref;
	    
	    D_SELFTEST(360) {
		cout << "\n\nAnalyzing PHI term at node " << node->number() 
		    << ": " << phi << endl;
	    }

	    vector_vardef::iterator args_iter = phi->phi_chain().begin();
	    while (args_iter != phi->phi_chain().end()) {
		p_vardef arg = *args_iter;

		/* If this argument dominates the cobegin node, then it is
		 * not needed.
		 */
		if (arg->node()->dominates(cobegin)) {
		    D_SELFTEST(361) {
			cout << "\tArgument "; arg->print(); cout << endl;
			cout << "\tcomes from a node prior to cobegin, "
			    << "so it is not needed" << endl;
		    }
		    args_iter = phi->phi_chain().erase(args_iter);
		} else {
		    args_iter++;
		}
	    }


	    /* If the PHI term contains only one argument, it can be
	     * eliminated. We also update the term's immediate reached use 'u'.
	     * The control reaching definition for 'u' is now the PHI term's
	     * only argument.
	     */
	    if (phi->phi_chain().size() == 1) {
		p_varuse use;
		p_vardef ctrl_rdef;

		if (! phi->immediateUses().empty()) {
		    use = phi->immediateUses().front();
		    ctrl_rdef = phi->phi_chain().front();
		    assert(use);
		    use->set_chain(ctrl_rdef);
		}

		D_SELFTEST(360) {
		    cout << "Removing PHI term "; phi->print(); cout << endl
			<< "\twith control reaching def: " << ctrl_rdef << endl;
		    cout << "\tand immediate use: " << use << endl;
		    cout << "\tThe new control reaching def is: "
			<< use->chain() << endl;
		}

		phi->remove();	/// Mark the $\phi$ term as removed.
	    }
	}
    }

    D_SELFTEST_FOOTER(360);
}


/**
Add default (or ghost) definitions for every variable. This method adds
definitions for every variable in the symbol table to the first node of the
program. This is a trick that avoids PHI terms with NULL arguments which
might confuse some methods.
*/
void
cssame::addGhostDefinitions()
{
    D_SELFTEST_HEADER(380, "cssame::addGhostDefinitions");

    /* Add a definition for variables that have not been defined anywhere
     * at the start of the program. We associate these definitions with
     * node 0 and the first instruction of the program.
     */
    instruction *in = first_ti(_graph->tproc()->body())->instr();
    p_ccfg_node node = _graph->node(0);

    set_var_sym::iterator symbols_iter = _symbols.begin();
    for (; symbols_iter != _symbols.end(); symbols_iter++) {
	p_var_sym var = *symbols_iter;
	vardef *newdef = new vardef(var, in, node);
	D_SELFTEST(380) {
	    cout << "Added ghost definition "; newdef->print();
	    cout << ": at line " << source_line_num(in) << endl;
	}
    }

    D_SELFTEST_FOOTER(380);
}


static void _add_symbols_from(base_symtab *table, set_var_sym *symbols);

/**
Create the list of symbols for the different methods that need to access
the symbol table. We append all the symbols in the current symbol table and
all the enclosing symbol tables.
*/
void
cssame::buildSymbolList()
{
    base_symtab *current = _graph->tproc()->proc_syms();
    for (; current != NULL; current = current->parent()) {
	_add_symbols_from(current, &_symbols);
    }

    /* Add symbols found in children symbol tables. */
    current = _graph->tproc()->proc_syms();
    base_symtab_list_iter iter(current->children());
    while (!iter.is_empty()) {
	_add_symbols_from(iter.step(), &_symbols);
    }
}


static void
_add_symbols_from(base_symtab *table, set_var_sym *symbols)
{
    sym_node_list_iter iter(table->symbols());
    while (!iter.is_empty()) {
	sym_node *sym = iter.step();
	if (sym->is_var() && sym->is_userdef()) {
	    symbols->insert(p_var_sym(sym));
	}
    }
}




/**
Add conflict and synchronization edges between the nodes in the graph.
*/
void 
cssame::addConfSyncEdges()
{
    D_SELFTEST_HEADER(420, "cssame::addConfSyncEdges");

    list_conflict *confs = ::get_conflicts(_graph->tproc().ptr());

    D_SELFTEST(420) {
	cout << "Conflicts found in the procedure" << endl;
	copy(confs->begin(), confs->end(), ostream_iterator<conflict>(cout,""));
	cout << endl;
    }

    /* Now we can add the sync/conflict edges. We update the _csuccs/_ssuccs
     * and _cpreds/_spreds list for each node. The _csuccs list for node i
     * contains all the nodes j that conflict with i for which i is the tail of
     * the conflict. The _cpreds list is similarly defined.
     */
    list_conflict::iterator iter;
    for (iter = confs->begin(); iter != confs->end(); iter++) {
	conflict conf = *iter;
	p_varref tail = conf.tail();
	p_varref head = conf.head();

	/* Add the conflict to the list of conflicts for the head nodes.
	 * The list of conflicts for a node 'n' are all the conflicts for
	 * which 'n' is the head of the conflict. This simplifies the
	 * process of adding PI functions to the graph.
	 */
	head->node()->conflicts().push_back(conf);

	/* If the tail of the conflict is a directed synchronization
	 * instruction(eg, set), we update the synchronization successors
	 * and predecessors. Otherwise, it is a regular conflict.
	 */
	if (tail->isTSync()) {
	    D_SELFTEST(420) {
		cout << "\nAdding a directed synchronization edge between:\n";
		cout << "\tTail: "; tail->print(); cout << endl;
		cout << "\tHead: "; head->print(); cout << endl;
	    }
	    tail->node()->ssuccs()->append(head->node().ptr());
	    head->node()->spreds()->append(tail->node().ptr());
	} else {
	    D_SELFTEST(420) {
		cout << "\nAdding a conflict edge between:\n";
		cout << "\tTail: "; tail->print(); cout << endl;
		cout << "\tHead: "; head->print(); cout << endl;
	    }
	    tail->node()->csuccs()->append(head->node().ptr());
	    head->node()->cpreds()->append(tail->node().ptr());
	}
    }

    D_SELFTEST_FOOTER(420);
}
