/*
 * StringGrammar.cpp
 *
 *  Created on: 19.01.2012
 *      Author: Markus Bals
 */

#include "ignf.h"

namespace IGNF {

StringGrammar::StringGrammar() {
}

StringGrammar::StringGrammar(map<Symbol, string> names, map<string, Symbol> symbols) {
	m_names = names;
	m_symbols = symbols;
	m_productions.clear();
}

StringGrammar::~StringGrammar() {
}

void StringGrammar::initFromFile(string filename) {
	m_productions.clear();
	m_names.clear();
	appendFromFile(filename);
}

void StringGrammar::appendFromFile(string filename) {
	GrammarReader reader;
	reader.readGrammarFile(filename);

	while ( reader.good() ) {
		addProduction( reader.getNextProduction() );
	}
}

void StringGrammar::addProduction(StringProduction strProduction) {
	addProduction( encodeProduction(strProduction) );
}

void StringGrammar::addProduction(Production production) {
	m_productions.insert(production);
}


void StringGrammar::addProductions(set<Production> productions) {
	m_productions.insert(productions.begin(), productions.end());
}

void StringGrammar::removeProduction(Production production) {
	m_productions.erase(production);
}

set<Production> StringGrammar::getProductions(Symbol lhs) {
	if ( lhs == 0 )
		return m_productions;
	set<Production> result;
	Production pivot(lhs, RHS());
	set<Production>::iterator it = m_productions.upper_bound(pivot);

	while ( it != m_productions.end() && (*it).first == lhs ) {
		result.insert(*it);
		it++;
	}
	return result;
}

string StringGrammar::getEncodingTableString() {
	stringstream ss;

	ss << "Encoding Table for Nonterminal Symbols\n";

	map<Symbol, string>::iterator nontermIt = m_names.begin();
	while ( nontermIt != m_names.end() && (*nontermIt).first < 0 ) {
		ss << "  " << (*nontermIt).first << " = " << (*nontermIt).second << "\n";
		nontermIt++;
	}

	ss << "Encoding Table for Terminal Symbols\n";
	map<Symbol, string>::iterator termIt = m_names.upper_bound(0);
	while ( termIt != m_names.end() ) {
		ss << "  " << (*termIt).first << " = " << (*termIt).second << "\n";
		termIt++;
	}

	return ss.str();
}

string StringGrammar::getProductionString(Production production, bool rhsOnly) {
	stringstream ss;
	StringProduction cur = decodeProduction(production);
	if ( !rhsOnly )
		ss << cur.first << " -> ";
	for ( vector<string>::size_type i=0; i<cur.second.size(); i++)
		ss << cur.second[i];

	return ss.str();
}

string StringGrammar::getProductionsString(set<Production> productions, bool compact) {
	if ( productions.size() == 0 )
		productions = m_productions;

	if ( productions.size() == 0 )
		return "\n";

	Symbol curLHS;
	stringstream ss;
	set<Production>::iterator it = productions.begin();

	curLHS = (*it).first;

	if ( compact )
		ss << decodeSymbol(curLHS) << " -> ";

	while ( it != productions.end() ) {
		ss << getProductionString(*it, compact);
		if ( !compact )
			ss << "\n";
		it++;
		if ( !compact )
			continue;

		if ( it != productions.end() && curLHS != (*it).first ) {
			curLHS = (*it).first;
			ss << "\n" << decodeSymbol(curLHS) << " -> ";
		}
		else {
			ss << (it != productions.end() ? " | " : "\n");
		}
	}

	return ss.str();
}

//***** static functions **************************************************************************

set<Production> StringGrammar::getLeftDerivations(
		const Production& replaced, const set<Production>& rhsList) {
	set<Production> result;

	//cut the first symbol of replaced RHS
	RHS rest = replaced.second;
	rest.assign(rest.begin()+1, rest.end());

	set<Production>::iterator it = rhsList.begin();
	while ( it != rhsList.end() ) {
		RHS curRHS = (*it).second;
		assert(replaced.second[0] == (*it).first);
		curRHS.insert(curRHS.end(), rest.begin(), rest.end());
		result.insert( Production(replaced.first, curRHS) );
		it++;
	}
	return result;
}

set<Production> StringGrammar::getRightRecursiveReplacement(
			const Production& removed, const set<Production>& nonRecursive, const bool& rrExists) {

	//nonRecursive are the productions of a regular nonterminal that are not left recursive
	set<Production>::iterator it = nonRecursive.begin();
	//rrSymbol is the right recursive symbol associated with the given regular nonterminal (e.g. B1 for A1)
	Symbol rrSymbol = RECURSIVE_SYMBOL((*it).first);
	set<Production> newProductions;

	//if there is a production of the form A -> A it is just removed (no new productions)
	if ( removed.second.size() < 2 )
		return newProductions;

	//cut the first symbol off (of the RHS being replaced)
	RHS rest = removed.second;
	rest.assign(rest.begin()+1, rest.end());

	//two new productions of right recursive symbol
	newProductions.insert( Production(rrSymbol, rest) );
	rest.push_back(rrSymbol);
	newProductions.insert( Production(rrSymbol, rest) );

	//done here if the right recursive symbol already existed
	if ( rrExists )
	  return newProductions;

	//right recursive copies of original productions
	while ( it != nonRecursive.end() ) {
		Production rrCopy = *it;
		rrCopy.second.push_back(rrSymbol);
		newProductions.insert(rrCopy);
		it++;
	}

	return newProductions;
}

bool StringGrammar::ordered(Production production) {
	if ( production.second.empty() )
		return false;
	return (production.first <= production.second[0] || IS_RECURSIVE_SYMBOL(production.first));
}

bool StringGrammar::greibach(Production production) {
	return (production.second[0] > 0);
}

bool StringGrammar::recursive(Production production) {
	return (production.first == production.second[0]);
}

string StringGrammar::getEncodedProductionString(Production production) {
	stringstream ss;
	ss << production.first << "\t->\t";
	for ( vector<Symbol>::size_type i=0; i<production.second.size(); i++)
		ss << production.second[i];
	return ss.str();
}

string StringGrammar::joinStringProduction(StringProduction strProduction) {
	stringstream ss;

	ss << strProduction.first << " -> ";
	for(size_t i=0; i<strProduction.second.size(); i++)
		ss << strProduction.second[i];

	return ss.str();
}

//***** en- and decoding functions ****************************************************************

/*
 * Checks if the corresponding recursive Symbol for a supplied regular nonterminal Symbol exists
 * and creates entries in the encoding tables if it does not exist.
 * @return: boolean value whether the right recursive symbol existed yet or not
 */
bool StringGrammar::checkRecursiveSymbol(Symbol regular) {
	Symbol recSymbol = RECURSIVE_SYMBOL(regular);
	if ( m_names.find(recSymbol) != m_names.end() )
		return true;

	string name = "@" + m_names[regular];

	m_names[recSymbol] = name;
	m_symbols[name] = recSymbol;

	return false;
}

Production StringGrammar::encodeProduction(StringProduction rule) {
	Symbol lhs = encodeNonterminal(rule.first);
	RHS rhs;

	for(vector<string>::size_type i=0; i<rule.second.size(); i++) {
		rhs.push_back( encodeSymbol(rule.second[i]));
	}

	return make_pair(lhs, rhs);
}

Symbol StringGrammar::encodeNonterminal(string str) {
	//check if this symbol was already encoded
	if ( m_symbols.find(str) != m_symbols.end() ) {
		return m_symbols[str];
	}

	Symbol candidate = 0;

	//if there is no nonterminal so far, start with the maximal regular one
	if ( m_names.find(MAX_REG_NONTERMINAL) == m_names.end() ) {
		candidate = MAX_REG_NONTERMINAL;
	}
	//otherwise find the least entry and decrease by one
	else {
		map<Symbol, string>::iterator it = m_names.upper_bound(MIN_INT);
		candidate = (*it).first - 1;
	}

	//create the actual entries in the encoding tables
	m_symbols[str] = candidate;
	m_names[candidate] = str;

	return candidate;
}

Symbol StringGrammar::encodeSymbol(string str) {
	//check if there is already a symbol for this token
	if ( m_symbols.find(str) != m_symbols.end() ) {
		return m_symbols[str];
	}

	//unknown symbols are always introduced as new terminal symbols
	map<Symbol,string>::reverse_iterator it = m_names.rbegin();
	Symbol candidate = 1;
	if ( it != m_names.rend() && (*it).first > 0 )
		candidate = (*it).first  + 1;

	m_names[candidate] = str;
	m_symbols[str] = candidate;

	return candidate;
}


StringProduction StringGrammar::decodeProduction(Production rule) {
	StringProduction decoded;
	decoded.first = decodeSymbol(rule.first);
	for(vector<string>::size_type i=0; i<rule.second.size(); i++) {
		decoded.second.push_back(decodeSymbol(rule.second[i]));
	}
	return decoded;
}

string StringGrammar::decodeSymbol(Symbol symbol) {
	if (m_names.find(symbol) == m_names.end() ) {
		cout << "Error: unable to decode symbol " << symbol << endl;
		return "ERROR";
	}
	return m_names[symbol];
}

} /* namespace IGNF */
