/**
$Id: findrefs.cc,v 2.4 1999/03/13 19:12:47 diego Exp $
Free functions to find all the memory references made by each procedure of the
program.
*/
#include <suif_copyright.h>

#define _MODULE_ "CSSAME"

#include <suif.h>
#include <useful.h>

#include "ccfg.h"

/* Private functions */
static void _find_refs(tree_node *tn, void *x);


/**
Find variable definitions and uses in the procedure. Variable reference
objects are stored in collections associated to the instruction trees that
make them. Returns the set of all references made in the given tree_proc.
*/
set_varref *
find_memory_refs(tree_proc *tp)
{
    D_STACK(find_memory_refs);
    D_SELFTEST_HEADER(70, "find_memory_refs");

    D_SELFTEST(70) {
	cout << "Looking for references in procedure '" << tp->proc()->name()
	    << "':\n";
    }

    set_varref *refs = ::get_varrefs(tp, false);
    if (refs) {
	/* If references have been gathered already, return. */
	D_SELFTEST(70) {
	    cout << "References for function '" << tp->proc()->name()
		<< "' have been computed already. Re-using.\n";
	}
	return refs;
    }

    /* Map the reference gathering function over all the tree nodes in the
     * procedure's body. Notice that we annotate the procedure before
     * looking for references. This will avoid infinite loops in case of
     * recursive calls.
     */
    refs = new set_varref;
    ::set_varrefs(tp, refs);
    tp->body()->map(_find_refs, (void *)refs);

    D_SELFTEST(70) {
	cout << "\n\nReferences found in procedure '" << tp->proc()->name()
	    << "':\n";
	copy(refs->begin(), refs->end(),ostream_iterator<p_varref>(cout, "\n"));
    }

    D_SELFTEST_FOOTER(70);

    return refs;
}


/*---------------------------------------------------------------------------
			       Private functions
---------------------------------------------------------------------------*/
static void _find_refs(instruction *in, set_varref *refs);

/**
Mapped function to traverse all the children nodes of a tree node looking
for variable references.
*/
void
_find_refs(tree_node *tn, void *x)
{
    D_STACK(_find_refs);
    D_SELFTEST_HEADER(100, "_find_refs(tree_node)");

    set_varref *refs = (set_varref *)x;

    D_SELFTEST(100) {
	cout << "Analyzing tree_node:\n";
	tn->print(); fflush(stdout);
	cout << endl;
    }

    if (tn->is_instr()) {
	/* The node is an instruction. Call the actual reference finder
	 * routine.
	 */
	::_find_refs(((tree_instr *)tn)->instr(), refs);
    } else {
	/* The node is not an instruction. The only special case we need to
	 * handle are sequential tree_fors. We need to add 2 definitons and
	 * 2 uses for the loop index variable:
	 *
	 * 1- One definition for the initial assignment.
	 * 2- One use for testing the upper bound of the loop.
	 * 3- One use for incrementing the index value at every iteration.
	 * 4- One definition for setting the new index value at every
	 *    iteration.
	 *
	 * These four references will be associated with different nodes of
	 * the subgraph for the tree_for by the method ccfg_test::findRefs()
	 */
	if (tn->is_for() && !is_parloop(tn)) {
	    D_SELFTEST(100) {
		cout << "Tree node is a tree_for. Adding refs for index var.\n";
	    }
	    tree_for *tf = (tree_for *)tn;

	    /* Create references for index variable. Note that we insert
	     * the second use (test upper bound) in the step_list to make
	     * it easier for ccfg_test::findRefs to find all the
	     * references.
	     */
	    instruction *lb, *step;
	    lb = ((tree_instr *)tf->lb_list()->head()->contents)->instr();
	    step = ((tree_instr *)tf->step_list()->head()->contents)->instr();

	    refs->insert(new vardef(tf->index(), lb, 0));
	    refs->insert(new varuse(tf->index(), step, 0));
	    refs->insert(new varuse(tf->index(), step, 0));
	    refs->insert(new vardef(tf->index(), step, 0));
	} else {
	    D_SELFTEST(100) {
		cout << "Tree node is a control structure. Returning.\n";
	    }
	}
    }

    D_SELFTEST_FOOTER(100);
}



/* Local helpers for finding variable references. */
static void _find_ptr_refs(instruction *in, set_varref *refs);
static void _find_vars_ref_by(operand op, vector_var_sym *syms, set_varref *refs);
static void _find_refs_in_call(instruction *in, set_varref *refs);

/* static bool hasPointerRef(operand op); */
/* static var_sym_list *_find_aliases(instruction *in); */


/**
Return a list of variable reference objects for each variable referenced
in the given instruction. Notice that the flowgraph node associated with
the reference will be NULL if the graph has not been built before calling
this function.

A reference may be a definition or a use. Definitions may be found in the
destination operand or in pointers to variables. Uses are found in source
operands to the instruction.
*/
void
_find_refs(instruction *in, set_varref *refs)
{
    D_STACK(_find_refs);
    D_SELFTEST_HEADER(60, "_find_refs(instruction)");

    p_ccfg_node node = get_node(in->parent());

    D_SELFTEST(61) {
	cout << "Checking instruction at line " << source_line_num(in) << ":\n";
	in->print(); fflush(stdout);
	cout << endl;
    }

    /* If the instruction is a function call, analyze the arguments to see
     * if the functions might modify local and/or global variables.
     */
    if (in->opcode() == io_cal) {
	::_find_refs_in_call(in, refs);
    }

    /* Check if the instruction is one of the synchronization functions. If
     * it is, add the synch reference to the list of references.
     */
    if (is_head_sync(in) || is_tail_sync(in) || is_mutex_sync(in)) {
	in_cal *call = (in_cal *)in;
	vector_var_sym synch_vars;
	::_find_vars_ref_by(call->argument(0), &synch_vars, refs);
	
	assert(synch_vars.size() == 1);

	p_var_sym var = synch_vars[0];

	D_SELFTEST(61) {
	    cout << "Line " << source_line_num(in) 
		<< ". Found synchronization operation: "; in->print(); 
	    cout << endl;
	}

	if (is_head_sync(in)) {
	    head_syncref *ref = new head_syncref(var, in, node);
	    refs->insert(ref);
	} else if (is_tail_sync(in)){
	    tail_syncref *ref = new tail_syncref(var, in, node);
	    refs->insert(ref);
	} else if (is_lock_sync(in)) {
	    lockref *ref = new lockref(var, in, node);
	    refs->insert(ref);
	} else if (is_unlock_sync(in)) {
	    unlockref *ref = new unlockref(var, in, node);
	    refs->insert(ref);
	} else {
	    error_line(1, in, ("Unknown synchronization instruction\n"));
	}
    }
	

    /* Special case. Check store, load and memcpy instructions for pointer,
     * structures and array references.
     */
    if_ops op = in->opcode();
    if (op == io_lod || op == io_str || op == io_memcpy) {
	::_find_ptr_refs(in, refs);
    }


    /* Now check the destination and source operands for the instruction.
     * Note that we must check all the source operands first because if the
     * instruction defines and uses the same variable, the use should
     * appear first so that the FUD chain algorithm doesn't get confused by
     * linking the use to the def in this instruction.
     */
    for (unsigned i = 0; i < in->num_srcs(); i++) {
	vector_var_sym vars;

	::_find_vars_ref_by(in->src_op(i), &vars, refs);

	set_var_sym::iterator iter;
	for (vector_var_sym::iterator i = vars.begin(); i != vars.end(); i++) {
	    p_var_sym var = *i;

	    D_SELFTEST(61) {
		cout << "Line " << source_line_num(in) << ". Found use "
		    << "for '" << var << "'\n";
	    }

	    p_varuse newref(new varuse(var, in, node));
	    refs->insert(newref);
	}
    }


    /* Check the destination operand.
     *
     * [HACK] We only create a vardef object for the first symbol found.
     *        I'm almost positive that SUIF instructions are always setup
     *        so that the first symbol we find is the symbol being defined.
     *
     *        If there are more than symbols in the destination operand,
     *        they are just being used and they will be found in later
     *        recursions.
     */
    vector_var_sym vars;
    ::_find_vars_ref_by(in->dst_op(), &vars, refs);
    if (vars.size() >= 1) {
	p_var_sym var = vars[0];

	D_SELFTEST(61) {
	    cout << "Line " << source_line_num(in) 
		<< ". Found definition for '" << var << "'\n";
	}

	p_vardef newref(new vardef(var, in, node));
	refs->insert(newref);
    }

    D_SELFTEST_FOOTER(60);
}



/**
Finds symbol references in memory copy instructions (str, lod and memcpy).
These instructions move data from memory addresses which can be pointers,
arrays and fields within data structures.
*/
void
_find_ptr_refs(instruction *in, set_varref *refs)
{
    D_SELFTEST_HEADER(60, "_find_ptr_refs");

    p_ccfg_node node = get_node(in->parent());

    if_ops op = in->opcode();
    assert(op == io_lod || op == io_str || op == io_memcpy);

    D_SELFTEST(60) {
	cout << "Checking instruction for pointer references" << endl;
	in->print();
    }

    /* Look for uses in the src_addr_op() of 'lod' and 'memcpy'
     * instructions. Note again that we must check uses before the defs.
     */
    if (op == io_lod || op == io_memcpy) {
	vector_var_sym vars;

	::_find_vars_ref_by(((in_rrr *)in)->src_addr_op(), &vars, refs);

	for (vector_var_sym::iterator i = vars.begin(); i != vars.end(); i++) {
	    p_var_sym var = *i;
	    D_SELFTEST(60) { cout << "Found source address: " << var << endl; }

	    p_varuse use(new varuse(var, in, node));
	    refs->insert(use);
	}
    }

    /* Look for definitions in the dst_addr_op() of 'str' and 'memcpy'
     * instructions.
     */
    if (op == io_str || op == io_memcpy) {
	vector_var_sym vars;
	::_find_vars_ref_by(((in_rrr *)in)->dst_addr_op(), &vars, refs);

	if (vars.size() >= 1) {
	    p_var_sym var = vars[0];

	    D_SELFTEST(61) {
		cout << "Line " << source_line_num(in) 
		    << ". Found definition for '" << var << "'\n";
	    }

	    p_vardef newref(new vardef(var, in, node));
	    refs->insert(newref);
	}
    }

    D_SELFTEST_FOOTER(60);
}



/**
Helper method to find symbols inside an operand. There are three types of
operands in SUIF: null, symbol and instruction. A null operand is the
easiest to deal with, we just return without altering the list. If the
operand is a symbol, it is added to the set and the function returns.

Finally, if the operand is an instruction, the function examines all the
operands for the instruction adding symbols to the set until all the
symbols have been found.
*/
void
_find_vars_ref_by(operand op, vector_var_sym *vars, set_varref *refs)
{
    D_SELFTEST_HEADER(60, "_find_vars_ref_by");

    D_SELFTEST(60) {
	cout << endl;
	cout << "Looking for a symbol in operand:\n";
	op.print(); fflush(stdout);
	cout << endl;
    }

    /* Ignore null operands. */
    if (op.is_null()) {
	D_SELFTEST(61) { cout << "Operand is null. No symbols found.\n"; }
	D_SELFTEST_FOOTER(60);
	return;
    }

    /* If the operand is already a symbol, add it to the list and return. */
    if (op.is_symbol()) {
	p_var_sym var = op.symbol();

	D_SELFTEST(61) { cout << "Operand is a symbol: '" << var << "'.\n"; }

	if (var->is_userdef()) {
	    vars->push_back(op.symbol());
	} else {
	    D_SELFTEST(61) { cout << "It is not user defined.\n"; }
	}

	D_SELFTEST_FOOTER(60);
	return;
    }


    /* If we got here, the operand an instruction. Recurse to find all the
     * variables referenced in the source and destination operands of the
     * instruction.
     */
    assert(op.is_instr());
    instruction *op_i = op.instr();

    D_SELFTEST(61) { cout << "Operand is an instruction.\n"; }

    /* Special case for 'ldc' instructions. An ldc instruction may load the
     * address of a symbol or it may load another constant. If it's loading
     * a symbol, add it to the list of variables, otherwise just return.
     */
    if (op_i->opcode() == io_ldc) {
	D_SELFTEST(61) { cout << "Checking 'ldc' instruction for symbols.\n"; }
	in_ldc *ldc = (in_ldc *)op_i;
	if (ldc->value().is_symbol()) {
	    p_sym_node sym = ldc->value().symbol();

	    D_SELFTEST(61) { cout << "Found symbol '" << sym << "'.\n"; }

	    if (sym->is_var() && sym->is_userdef()) {
		p_var_sym var = sym;
		type_node *var_type = var->type();

		D_SELFTEST(62) {
		    cout << "The symbol is a user defined variable.\n";

		    cout << "The offset is: " << ldc->value().offset() << "\n";

		    cout << "The variable type is: "; var_type->print();
		    cout << ((var_type->is_struct()) ? " (structure)" :
			    " (non-structure)") << endl;
		}

		/* Only add the symbol if it's a structure. Otherwise, this
		 * ldc is just loading the address of a regular variable
		 * which is not a reference to the variable.
		 */
		if (var_type->is_struct() || var_type->is_array()) {
		    vars->push_back(var);
		}
	    } else {
		D_SELFTEST(61) {
		    cout << "It is not user defined or it's a function.\n";
		}
	    }
	} else {
	    D_SELFTEST(61) { cout << "It does not refer to any variable.\n"; }
	}

	D_SELFTEST_FOOTER(60);
	return;
    }

    /* If the instruction operand is a function call, look for references
     * made by the function.
     */
    if (op_i->opcode() == io_cal) {
	::_find_refs_in_call(op_i, refs);
    }

    /* General case. Recurse into the source and destination operands
     * looking for symbols. Note that it's not necessary to look in the
     * destination operand for the instruction, because the destination
     * operand is our parent instruction.
     */
    for (unsigned i = 0; i < op_i->num_srcs(); i++) {
	D_SELFTEST(61) {
	    cout << "Looking for variables in source operand " << i << ":\n";
	}
	::_find_vars_ref_by(op_i->src_op(i), vars, refs);
    }

    D_SELFTEST_FOOTER(60);
}



/**
Looks for aliases in the given instruction. The instruction type is assumed
to be a pointer. The analysis is very simple for now, we only check for
ampersand operators.

[ NOTE - 11/Jan/99 - This method is not called by anybody. Does it work? ]

*/
var_sym_list *
findAliases(instruction *in)
{
    assert(in->result_type()->is_ptr());

    var_sym_list *list = new var_sym_list;
    if (in->opcode() == io_ldc) {
	in_ldc *load = (in_ldc *)in;
	immed value = load->value();

	if (value.is_symbol()) {
	    sym_node *sym = value.symbol();
	    if (sym->is_var()) {
		list->append((var_sym *)sym);
	    }
	}
    }

    return list;
}


static void _find_refs_in_proc(p_proc_sym proc, instruction *call_site, 
	set_varref *refs);
static type_node *_base_type(type_node *ptr);
static void _create_defs_for_scope(tree_instr *ti, type_node *type, 
				set_varref *refs);
static void _create_defs_for_globals(tree_proc *proc, set_varref *refs,
				    tree_instr *ti);

/**
Determine which variables are modified by the given function call.
*/
void
_find_refs_in_call(instruction *in, set_varref *refs)
{
    D_STACK(_find_refs_in_call);
    D_SELFTEST_HEADER(90, "_find_refs_in_call");

    tree_instr *ti = in->parent();
    tree_proc *proc = ti->proc()->block();
    p_ccfg_node node = get_node(ti);

    if (in->opcode() != io_cal) {
	return;
    }
    in_cal *call = (in_cal *)in;

    D_SELFTEST(90) {
	cout << "Checking function call\n";
	call->print(); fflush(stdout);
	cout << endl;
    }

    /* If the function's body is in memory, examine it recursively and
     * store all the refs for common variables at the call site. Note that
     * this is not done if this is a recursive call.
     */
    p_proc_sym psym = ::func_symbol(in);
    if (psym->is_in_memory() && psym.ptr() != proc->proc()) {
	::_find_refs_in_proc(psym, in, refs);
    }


    /* Check whether we need to add the local variables to the list of
     * variables modified by the call. If one of the arguments is a pointer
     * reference and the function may have side effects, we assume that all
     * the local variables of the same type as the pointer can be modified
     * by the call.
     *
     * [BUG] Functions in memory could actually be marked as pure! We are
     *       being overly conservative here.
     */
    if (instr_is_impure_call(in)) {
	D_SELFTEST(90) {
	    cout << "Impure function '" << ::func_symbol(in)->name()
		<< "' may have side effects.\n";
	}

	set<type_node *> processed_types;

	for (unsigned i = 0; i < call->num_args(); i++) {
	    D_SELFTEST(90) { 
		cout << "Checking argument " << i << " for pointer refs\n";
		cout << "\t"; call->argument(i).print(); fflush(stdout);
		cout << endl;
	    }

	    type_node *type = call->argument(i).type();
	    if (type->is_ptr() || type->is_array() || type->is_call_by_ref()) {
		type_node *base = ::_base_type(type);	
		if (processed_types.find(base) == processed_types.end()) {
		    ::_create_defs_for_scope(in->parent(), base, refs);
		}
		processed_types.insert(base);
	    }
	}

	/* Now add global variables to the list of potential definitions.
	 * Note that we only need to do this if the called function is not
	 * in memory. If it is, we have already visited its body and we
	 * know which globals are referenced in it.
	 */
	if (!psym->is_in_memory()) {
	    ::_create_defs_for_globals(proc, refs, ti);
	}
    } else {
	D_SELFTEST(90) {
	    cout << "Pure function '" << ::func_symbol(in)->name() 
		<< "' has no side effects.\n";
	}
    }

    D_SELFTEST_FOOTER(90);
}



/**
Find all the references made in the called function and store the references
for symbols visible in this scope in the set of references for the instruction.
*/
void
_find_refs_in_proc(p_proc_sym psym, instruction *call_site, set_varref *refs)
{
    D_SELFTEST(90) {
	cout << "\nCall to local function '" << psym->name() << "'.\n";
    }

    set_varref *fn_refs = ::get_varrefs(psym->block(), false);
    if (!fn_refs) {
	/* If we couldn't find references for the procedure it means that
	 * these two procedures are in a recursion loop.
	 */
	D_SELFTEST(90) {
	    cout << "Gathering references in function '" << psym->name()
		<< "' for the first time.\n";
	}
	fn_refs = ::find_memory_refs(psym->block());
    } else {
	D_SELFTEST(90) {
	    cout << "References for function '" << psym->name()
		<< "' have been computed already. Re-using.\n";
	}
    }

    /* All the references from the called function that are visible at the
     * call site are added to the references made in the caller.
     */
    set_varref::iterator i;
    for (i = fn_refs->begin(); i != fn_refs->end(); i++) {
	p_varref ref = *i;
	p_var_sym var = ref->var();
	base_symtab *this_symtab = call_site->parent()->scope();
	if (this_symtab->is_visible(var.ptr())) {
	    D_SELFTEST(90) {
		cout << "Adding visible ref '" << ref << "' to the caller "
		    << "refs.\n";
	    }
	    p_varref clone = ref->clone(call_site);
	    refs->insert(clone);
	}
    }
}



/**
Local helper to determine which type is the given pointer type pointing to.
*/
type_node *
_base_type(type_node *ptr)
{
    assert(ptr->is_ptr() || ptr->is_array() || ptr->is_call_by_ref());

    if (ptr->is_ptr()) {
	return ((ptr_type *)ptr)->ref_type();
    } else if (ptr->is_array()) {
	return ((array_type *)ptr)->elem_type();
    } else {
	return ((modifier_type *)ptr)->base();
    }
}



/**
Local helper to create definitions for local variables of the given type 
and store them in the given set of variable refrences. Local variables are
those that can be referenced from the scope of the reference point (ti).
*/
void
_create_defs_for_scope(tree_instr *ti, type_node *type, set_varref *refs)
{
    p_ccfg_node node = get_node(ti);

    D_SELFTEST(90) {
	cout << "Argument is an address. It may modify variables of type: ";
	type->print(); fflush(stdout);
	cout << endl;
    }

    base_symtab *current = ti->scope();
    base_symtab *global = ti->proc()->parent();
    for (; current != NULL && current != global; current = current->parent()) {
	sym_node_list_iter iter(current->symbols());
	while (!iter.is_empty()) {
	    sym_node *sym = iter.step();
	    if (!sym->is_var() || !sym->is_userdef()) { continue; }

	    p_var_sym var = (var_sym *)sym;
	    if (var->type()->is_same(type)) {
		vardef *def = new vardef(var, ti->instr(), node);
		refs->insert(def);

		D_SELFTEST(90) {
		    cout << "\nAssuming variable " << var << " of type ";
		    var->type()->print(); fflush(stdout);
		    cout << " is defined:\n\t" << *def;
		    cout << endl;
		}
	    }
	}
    }
}



/**
Local helper to create definitions for variables outside the scope of the
given procedure and store them in the given set of variable refrences.
*/
void
_create_defs_for_globals(tree_proc *proc, set_varref *refs, tree_instr *ti)
{
    p_ccfg_node node = get_node(ti);

    /* Traverse the list of global symbols (file and external scope) and
     * create vardef objects for each of them.
     */
    base_symtab *symtbl = proc->scope()->parent();
    while (symtbl != NULL) {
	sym_node_list_iter iter(symtbl->symbols());
	while (!iter.is_empty()) {
	    sym_node *sym = iter.step();
	    if (!sym->is_var() || !sym->is_userdef()) { continue; }

	    vardef *def = new vardef((var_sym *)sym, ti->instr(), node);
	    refs->insert(def);
	    D_SELFTEST(90) {
		cout << "Assuming global variable is defined: " << *def;
		cout << endl;
	    }
	}
	symtbl = symtbl->parent();
    }
}
