
#include "TeX_atom.h"
#include <ctype.h>

using std::string;
typedef std::vector<string> vector;
typedef std::vector<Util::TeX_atom*> vector_atom;

// Split a line of TeX source into tokens.
//
// This function tokenizes a single line of TeX source.  Following TeX
// lexical conventions, it makes most characters in the line individual
// tokens, but joins a backslash followed by a single non-letter or by
// any number of letters into a single token.  It ignores leading and
// trailing whitespace, but tokenizes internal whitespace, except that
// it counts multiple spaces together as a single space and ignores
// whitespace following a backslash-letters token.
//
// The lexical conventions come from no authoritative source,
// unfortunately, but are merely inferred from experience in using
// LaTeX.
//
void Util::tokenize_TeX(
  const string &line,
  vector *const tokens,
  const Translate_nobreakspace translate_nobreakspace
) {
  {
    struct {
      bool operator()(
        const string::const_iterator p,
        const string::const_iterator end
      ) { return p != end && *p != '\n'; }
    } in_string;
    string      ::const_iterator p   = line.begin();
    const string::const_iterator end = line.end  ();
    bool first_time = true;
    while ( in_string( p, end ) ) {
      const string::const_iterator q = p;
      if ( *p == '\\' ) {
        if ( in_string( ++p, end ) ) {
          if ( isalpha(*p) ) {
            do ++p; while ( in_string( p, end ) && isalpha(*p) );
            tokens->push_back( string( q, p ) );
            tokens->back() += ' ';
            while ( in_string( p, end ) && isspace(*p) ) ++p;
            if (
              translate_nobreakspace == TRANSLATE_NOBREAKSPACE &&
              tokens->back() == "\\nobreakspace "              &&
              in_string( p  , end ) && * p    == '{'           &&
              in_string( p+1, end ) && *(p+1) == '}'
            ) {
              p += 2;
              tokens->back() = ' ';
            }
          }
          else if ( isspace(*p) ) {
            do ++p; while ( in_string( p, end ) && isspace(*p) );
            tokens->push_back( "\\ " );
          }
          else tokens->push_back( string( q, ++p ) );
        }
      }
      else if ( isspace(*p) ) {
        do ++p; while ( in_string( p, end ) && isspace(*p) );
        // Cancel leading whitespace.
        if (!first_time) tokens->push_back( " " );
      }
      else tokens->push_back( string( q, ++p ) );
      first_time = false;
    }
  }
  // Cancel trailing whitespace.
  if ( tokens->end() != tokens->begin() && tokens->back () == " " )
    tokens->pop_back ();
}

void Util::TeX_atom_nonterminal::init(
  vector<std::string>      ::const_iterator p,
  const vector<std::string>::const_iterator end
) {
  int level = 0;
  vector<std::string>::const_iterator q = end;
  for ( ; p != end; ++p ) {
    if      ( *p == "{" ) {
      if ( !level ) q = p+1;
      ++level;
    }
    else if ( *p == "}" ) {
      --level;
      if ( level < 0 ) throw Exc_unbalanced();
      if ( !level ) {
        push_back( new TeX_atom_nonterminal( q, p ) );
        q = end;
      }
    }
    else if ( !level ) push_back( new TeX_atom_terminal(*p) );
  }
  if ( level ) throw Exc_unbalanced();
}

string Util::TeX_atom_nonterminal::term() const {
  string res = "{";
  for ( const_iterator p = begin(); p != end(); ++p )
    res += (*p)->term();
  res += "}";
  return res;
}

Util::TeX_atom_nonterminal::~TeX_atom_nonterminal() {
  for ( iterator p = begin(); p != end(); ++p ) delete *p;
}

Util::TeX_atom_nonterminal::TeX_atom_nonterminal(
  const string &line
) {
  vector<std::string> tokens;
  tokenize_TeX( line, &tokens, TRANSLATE_NOBREAKSPACE );
  init( tokens.begin(), tokens.end() );
}

Util::TeX_atom_nonterminal::TeX_atom_nonterminal(
  const vector<std::string> &tokens
) {
  init( tokens.begin(), tokens.end() );
}

Util::TeX_atom_nonterminal::TeX_atom_nonterminal(
  const vector<std::string>::const_iterator begin,
  const vector<std::string>::const_iterator end
) {
  init( begin, end );
}

Util::TeX_atom_nonterminal::TeX_atom_nonterminal(
  const vector_atom &atoms
) {
  for (
    vector_atom::const_iterator p = atoms.begin();
    p != atoms.end();
    ++p
  ) push_back( (*p)->replicate() );
}

Util::TeX_atom_nonterminal::TeX_atom_nonterminal(
  vector_atom      ::const_iterator p,
  const vector_atom::const_iterator end
) {
  for ( ; p != end; ++p ) push_back( (*p)->replicate() );
}

Util::TeX_atom_nonterminal::TeX_atom_nonterminal(
  const TeX_atom_nonterminal &atom
) : TeX_atom(), vector_atom() {
  for ( const_iterator p = atom.begin(); p != atom.end(); ++p )
    push_back( (*p)->replicate() );
}

Util::TeX_atom_nonterminal &Util::TeX_atom_nonterminal::operator=(
  const TeX_atom_nonterminal &atom
) {
  if ( &atom != this ) {
    clear();
    for ( const_iterator p = atom.begin(); p != atom.end(); ++p )
      push_back( (*p)->replicate() );
  }
  return *this;
}

Util::TeX_atom_terminal &Util::TeX_atom_terminal::operator=(
  const TeX_atom_terminal &atom
) {
  if ( &atom != this ) term1 = atom.term();
  return *this;
}

std::ostream &Util::operator<<(
  std::ostream &os,
  const TeX_atom &atom
) {
  return os << atom.term();
}

