// copyright (C) 2000 by David Cox All Rights Reserved

#include "lexan.h"
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#include <iostream>



Token::Token() :
	m_string(""),
	m_type(e_none)
{
}


Token::Token(const Token & t) :
	m_string(t.m_string),
	m_type(t.m_type)
{
}


Token::~Token()
{
}


Token & Token::operator=(const Token & t)
{
	if (this == &t) return *this;

	m_type = t.m_type;
	m_string = t.m_string;

	return *this;
}


Token::TokenType Token::Type()
{
	return m_type;
}


void Token::Type(Token::TokenType t)
{
	m_type = t;
}


std::string & Token::String()
{
	return m_string;
}


void Token::String(const std::string & s)
{
	m_string = s;
}


bool Token::operator==(const Token & t)
{
	if (m_type == t.m_type) return true;
	return false;
}


bool Token::operator!=(const Token & t)
{
	if (m_type != t.m_type) return true;
	return false;
}

// *****************************************************************
// Lexan Class

Lexan::Lexan() :
	m_lineNo(1),
	m_colNo(1)
{
}


// ***********************************
// Stream functions


void Lexan::EatWhiteSpace(std::istream & in)
{
	int temp = GetChar(in);
	while (temp == ' ' || temp == '\t' || temp == '\n' || temp == '\r')
		temp = GetChar(in);
	UnGetChar(temp, in);
}


void Lexan::GetCharData(Token & t, std::istream & in)
{
	std::string buffer = t.String();
	char temp[2];
	temp[0] = GetChar(in);
	temp[1] = 0;

	t.Type(Token::e_charData);

	while (true)
	{
		while (temp[0] != '<')
		{
			buffer += std::string(temp);
			temp[0] = GetChar(in);
			if (temp[0] == EOF)
			{
				t.Type(Token::e_eof);
				return;
			}
		}

		// we have a <
		// signaling end of GetCharData
		UnGetChar(temp[0], in);
		t.String(buffer);
		return;
	}
}


void Lexan::GetComment(Token & t, std::istream & in)
{
	std::string comment;
	t.Type(Token::e_comment);

	char temp[2];
	temp[1] = 0;
	temp[0] = GetChar(in);

	while (true)
	{
		while (temp[0] != '-')
		{
			if (temp[0] == EOF || temp[0] == '<')
			{
				t.Type(Token::e_error);
				return;
			}

			comment += std::string(temp);
			temp[0] = GetChar(in);
		}

		temp[0] = GetChar(in);
		if (temp[0] == '-')
		{
			// end of comment
			temp[0] = GetChar(in);
			if (temp[0] != '>')
			{
				t.Type(Token::e_error);
			}
			t.String(comment);
			return;
		}
		else
		{
			// not end of comment
			// continue to get comment
			comment += std::string(temp);
			temp[0] = GetChar(in);
		}
	}

	return;
}


void Lexan::GetKeyword(Token & t, std::istream & in)
{
	t.Type(Token::e_keyword);
}


Token Lexan::NextToken(Token & t, std::istream & in)
{
	// get a character from the input stream and choose a token

	// Need the loop in case we encounter whitespace.
	// In this case we eat the whitespace and loop back up
	// to get the next token
	while (true)
	{
		// initialize token to none
		t.Type(Token::e_none);
		t.String("");

		// get next character
		int temp = GetChar(in);

		// take action depending on character retrieved
		if (temp == EOF)
		{
			t.Type(Token::e_eof);
			return t;
		}

		else if (temp == ' ' || temp == '\t' || temp == '\n' || temp == '\r') 
		{
			EatWhiteSpace(in);
		}

		else if(temp == '"' || temp == '\'')
		{
			GetQuotedString(t, temp, in);
			return t;
		}

		else if (isalnum(temp) || temp == '_' || temp == ':')
		{
			GetName(t, temp, in);
			return t;
		}

		else if (temp == '<')
		{
			// we have several choices depending on the next char
			// either <, </ or <!

			temp = GetChar(in);
			if (temp == '/')
			{
				t.Type(Token::e_endTag); // its an end tag
				t.String("</");
			}
			else if (temp == '!')
			{
				// we have more choices depending on the next string
				// could be --, [CDATA[, DOCTYPE, ELEMENT, ATTLIST
				// if not a comment (--), we use a helper function to 
				// get these keyword
				temp = GetChar(in);
				if (temp == '-')
				{
					int temp1 = GetChar(in);
					if (temp1 == '-')
					{
						// its a commment start tag
						t.Type(Token::e_startComment);
						GetComment(t, in);
						return t;
					}
				}
				else
				{
					// could be a keyword
					GetKeyword(t, in);
					return t;
				}
			}
			else
			{
				UnGetChar(temp, in); // nope, put it back
				t.Type(Token::e_startTag); // its a start tag
				t.String("<");
			}
			return t;
		}

		else if (temp == '>')
		{
			t.Type(Token::e_closeTag);
			t.String(">");
			return t;
		}

		else if (temp == '=')
		{
			t.Type(Token::e_equal);
			t.String("=");
			return t;
		}

		else
		{
			return t;
		}
	}
}


int Lexan::GetChar(std::istream & in)
{
	int temp = in.get();
	if (temp == '\n' || temp == '\r')
	{
		m_colNo = 1;
		m_lineNo++;
	}
	else
		m_colNo++;

	return temp;
}


void Lexan::UnGetChar(int c, std::istream & in)
{
	m_colNo--;
	in.putback(c);
}


void Lexan::GetQuotedString(Token & t, int delim, std::istream & in)
{
	std::string buffer("");

	// get string that is in the quotes
	GetString(buffer, delim, in);
	t.Type(Token::e_string);
	t.String(buffer);
}


void Lexan::GetString(std::string & buffer, int delim, std::istream &  in)
{
	char temp[2];
	temp[0] = GetChar(in);
	temp[1] = 0;

	// get anything that isn't a delimiter
	while (temp[0] != delim)
	{
		buffer += std::string(temp);
		temp[0] = GetChar(in);
	}
}


void Lexan::GetName(Token & t, int start, std::istream & in)
{
	std::string buffer;
	char temp[2];
	temp[0] = start;
	temp[1] = 0;

	t.Type(Token::e_name);
	while (IsNameChar(temp[0]))
	{
		buffer += std::string(temp);
		temp[0] = (char)GetChar(in);
	}
	UnGetChar(temp[0], in);

	t.String(buffer.c_str());
}



// ***********************************
// Utility functions

bool Lexan::IsNameChar(int c)
{
	return (isalnum(c) ||	// letter or number
			c == '.'   ||	// period
			c == '-'   ||	// dash
			c == '_'   ||	// underscore
			c == ':');		// colon
}


int Lexan::LineNumber()
{
	return m_lineNo;
}


int Lexan::ColNumber()
{
	return m_colNo;
}


void Lexan::Error()
{
	std::cout << "error ";
	std::cout << "line " << m_lineNo << " col " << m_colNo << std::endl;
	exit(1);
}