/*
 * GrammarReader.cpp
 *
 *  Created on: 19.01.2012
 *      Author: mbals
 * Description: see GrammarReader.h
 */

#include "GrammarReader.h"

namespace IGNF {

GrammarReader::GrammarReader() {
}

GrammarReader::~GrammarReader() {
}

void GrammarReader::readGrammarFile(string filename, const string& delimiters) {
	string line;
	ifstream ifs;
	ifs.open(filename.c_str(), ifstream::in);

	if ( !ifs.is_open() ) {
		cout << "Cannot open file \"" << filename << "\"\n";
		return;
	}

	while ( ifs.good() ) {
		getline(ifs, line);
		vector<StringProduction> lineProductions = parseLine(line, delimiters);
		m_productions.insert(m_productions.end(), lineProductions.begin(), lineProductions.end());
	}

	ifs.close();
	m_iter = m_productions.begin();

}

StringProduction GrammarReader::readProduction(string raw, const string& delimiters) {
	vector<StringProduction> result = parseLine(raw, delimiters);
	if ( result.empty() ) {
		cout << "Warning: Could not resolve \"" << raw << "\".\n";
		return make_pair("", vector<string>());
	}
	if ( result.size() != 1 )
		cout << "Warning: parsing resulted in multiple productions (using first one).\n";
	return result[0];
}

bool GrammarReader::good() {
	return (m_iter != m_productions.end());
}

StringProduction GrammarReader::getNextProduction() {
	assert( good() );
	StringProduction production = *m_iter;
	m_iter++;
	return production;
}

vector<StringProduction> GrammarReader::parseLine(string line, const string& delimiters) {
	string lhs, rhsList;
	vector<string> words;
	vector<StringProduction> result;

	//each valid line has to contain the "produces" token "->"
	string::size_type pos = line.find("->");
	if ( pos == string::npos )
		return result;

	//split at "->" and remove all spaces from the left hand side (lhs)
	lhs = line.substr(0, pos);
	string::iterator invalid = remove(lhs.begin(), lhs.end(), ' ');
	lhs.erase(invalid, lhs.end());
	rhsList = line.substr(pos+2);

	//split the right hand side if multiple productions are declared within a line
	tokenize(rhsList, words, "|");

	//split each word into its symbols and append the production to the list of productions
	vector<string>::iterator it = words.begin();
	while ( it != words.end() ) {
		string word = *it;
		vector<string> symbols;
		tokenize(word, symbols, delimiters);
		result.push_back(make_pair(lhs, symbols));
		it++;
	}

	return result;
}

void GrammarReader::tokenize(const string& str, vector<string>& tokens, const string& delimiters) {
    // Skip delimiters at beginning.
    string::size_type lastPos = str.find_first_not_of(delimiters, 0);
    // Find first delimiter starting from first non-delimiter.
    string::size_type pos     = str.find_first_of(delimiters, lastPos);

    while (string::npos != pos || string::npos != lastPos) {
        // Found a token, add it to the vector.
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        // Skip delimiters.  Find next non-delimiter.
        lastPos = str.find_first_not_of(delimiters, pos);
        // Find next delimiter starting from last non-delimiter.
        pos = str.find_first_of(delimiters, lastPos);
    }
}


} /* namespace IGNF */
