tempest/resources/3rdparty/cpphoafparser-0.99.2/include/cpphoafparser/parser/hoa_lexer.hh


								//==============================================================================

								//

								//  Copyright (c) 2015-

								//  Authors:

								//  * Joachim Klein <klein@tcs.inf.tu-dresden.de>

								//  * David Mueller <david.mueller@tcs.inf.tu-dresden.de>

								//

								//------------------------------------------------------------------------------

								//

								//  This file is part of the cpphoafparser library,

								//      http://automata.tools/hoa/cpphoafparser/

								//

								//  The cpphoafparser library is free software; you can redistribute it and/or

								//  modify it under the terms of the GNU Lesser General Public

								//  License as published by the Free Software Foundation; either

								//  version 2.1 of the License, or (at your option) any later version.

								//

								//  The cpphoafparser library is distributed in the hope that it will be useful,

								//  but WITHOUT ANY WARRANTY; without even the implied warranty of

								//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

								//  Lesser General Public License for more details.

								//

								//  You should have received a copy of the GNU Lesser General Public

								//  License along with this library; if not, write to the Free Software

								//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

								//

								//==============================================================================


								#ifndef CPPHOAFPARSER_HOALEXER_H

								#define CPPHOAFPARSER_HOALEXER_H


								#include <map>

								#include <string>

								#include <stdexcept>


								#include "cpphoafparser/parser/hoa_parser_exception.hh"


								namespace cpphoafparser {


								/** Lexer for tokenizing a HOA stream (used internally by HOAParser). */

								class HOALexer {

								public:

								  /** The type of the tokens in a HOA stream. */

								  enum TokenType {

								    TOKEN_INT,

								    TOKEN_IDENT,

								    TOKEN_STRING,

								    TOKEN_HEADER_NAME,

								    TOKEN_ALIAS_NAME,


								    TOKEN_EOF,


								    TOKEN_BODY,

								    TOKEN_END,

								    TOKEN_ABORT,

								    TOKEN_HOA,

								    TOKEN_STATE,

								    TOKEN_STATES,

								    TOKEN_START,

								    TOKEN_AP,

								    TOKEN_ALIAS,

								    TOKEN_ACCEPTANCE,

								    TOKEN_ACCNAME,

								    TOKEN_TOOL,

								    TOKEN_NAME,

								    TOKEN_PROPERTIES,


								    // Punctuation, etc.

								    TOKEN_NOT,

								    TOKEN_AND,

								    TOKEN_OR,

								    TOKEN_LPARENTH,

								    TOKEN_RPARENTH,

								    TOKEN_LBRACKET,

								    TOKEN_RBRACKET,

								    TOKEN_LCURLY,

								    TOKEN_RCURLY,

								    TOKEN_TRUE,

								    TOKEN_FALSE

								  };


								  /** A token in the HOA stream. */

								  struct Token {

								    /** The kind of the token. */

								    TokenType kind;

								    /** The string representation of this token (if applicable) */

								    std::string vString;

								    /** The integer representation of this token (if applicable) */

								    unsigned int vInteger;


								    /** The line where this token started */

								    unsigned int line;

								    /** The column where this token started */

								    unsigned int col;


								    /** EOF (end-of-file) constructor. */

								    Token() : kind(TOKEN_EOF), vString(""), vInteger(0), line(0), col(0) {}

								    /** Constructor for syntactic element */

								    Token(TokenType kind, unsigned int line, unsigned int col) : kind(kind), vString(""), vInteger(0), line(line), col(col) {}

								    /** Constructor for a token having variable string content (e.g., TOKEN_IDENTIFIER, TOKEN_ALIAS, TOKEN_STRING, ...) */

								    Token(TokenType kind, const std::string vString, unsigned int line, unsigned int col) : kind(kind), vString(vString), vInteger(0), line(line), col(col) {}

								    /** Constructor for an unsigned integer token */

								    Token(unsigned int vInteger, unsigned int line, unsigned int col) : kind(TOKEN_INT), vString(""), vInteger(vInteger), line(line), col(col) {}


								    /** Returns true if this token represents the end-of-file. */

								    bool isEOF() const {return kind == TOKEN_EOF;}


								    /** Returns a string name for the given token type. */

								    static std::string typeAsString(TokenType kind) {

								      switch (kind) {

								      case TOKEN_INT: return std::string("INT");

								      case TOKEN_IDENT: return std::string("IDENT");

								      case TOKEN_STRING: return std::string("STRING");

								      case TOKEN_HEADER_NAME: return std::string("HEADER_NAME");

								      case TOKEN_ALIAS_NAME: return std::string("ALIAS_NAME");


								      case TOKEN_EOF: return std::string("EOF");


								      case TOKEN_BODY: return std::string("BODY");

								      case TOKEN_END: return std::string("END");

								      case TOKEN_ABORT: return std::string("ABORT");

								      case TOKEN_HOA: return std::string("HOA");

								      case TOKEN_STATE: return std::string("STATE");

								      case TOKEN_STATES: return std::string("STATES");

								      case TOKEN_START: return std::string("START");

								      case TOKEN_AP: return std::string("AP");

								      case TOKEN_ALIAS: return std::string("ALIAS");

								      case TOKEN_ACCEPTANCE: return std::string("ACCEPTANCE");

								      case TOKEN_ACCNAME: return std::string("ACCNAME");

								      case TOKEN_TOOL: return std::string("TOOL");

								      case TOKEN_NAME: return std::string("NAME");

								      case TOKEN_PROPERTIES: return std::string("PROPERTIES");


								      // Punctuation: etc.

								      case TOKEN_NOT: return std::string("NOT");

								      case TOKEN_AND: return std::string("AND");

								      case TOKEN_OR: return std::string("OR");

								      case TOKEN_LPARENTH: return std::string("LPARENTH");

								      case TOKEN_RPARENTH: return std::string("RPARENTH");

								      case TOKEN_LBRACKET: return std::string("LBRACKET");

								      case TOKEN_RBRACKET: return std::string("RBRACKET");

								      case TOKEN_LCURLY: return std::string("LCURLY");

								      case TOKEN_RCURLY: return std::string("RCURLY");

								      case TOKEN_TRUE: return std::string("TRUE");

								      case TOKEN_FALSE: return std::string("FALSE");

								      }

								      throw std::logic_error("Unhandled token type");

								    }


								    /** Returns a string name for the given token type (for use in error messages). */

								    static std::string forErrorMessage(TokenType kind) {

								      switch (kind) {

								      case TOKEN_INT: return std::string("INTEGER");

								      case TOKEN_IDENT: return std::string("IDENTIFIER");

								      case TOKEN_STRING: return std::string("STRING");

								      case TOKEN_HEADER_NAME: return std::string("HEADER_NAME");

								      case TOKEN_ALIAS_NAME: return std::string("ALIAS_NAME");


								      case TOKEN_EOF: return std::string("END-OF_FILE");


								      case TOKEN_BODY: return std::string("--BODY--");

								      case TOKEN_END: return std::string("--END--");

								      case TOKEN_ABORT: return std::string("--ABORT--");

								      case TOKEN_HOA: return std::string("HOA:");

								      case TOKEN_STATE: return std::string("State:");

								      case TOKEN_STATES: return std::string("States:");

								      case TOKEN_START: return std::string("Start:");

								      case TOKEN_AP: return std::string("AP:");

								      case TOKEN_ALIAS: return std::string("Alias:");

								      case TOKEN_ACCEPTANCE: return std::string("Acceptance:");

								      case TOKEN_ACCNAME: return std::string("acc-name:");

								      case TOKEN_TOOL: return std::string("tool:");

								      case TOKEN_NAME: return std::string("name:");

								      case TOKEN_PROPERTIES: return std::string("properties:");


								      // Punctuation: etc.

								      case TOKEN_NOT: return std::string("!");

								      case TOKEN_AND: return std::string("&");

								      case TOKEN_OR: return std::string("|");

								      case TOKEN_LPARENTH: return std::string("(");

								      case TOKEN_RPARENTH: return std::string(")");

								      case TOKEN_LBRACKET: return std::string("[");

								      case TOKEN_RBRACKET: return std::string("]");

								      case TOKEN_LCURLY: return std::string("{");

								      case TOKEN_RCURLY: return std::string("}");

								      case TOKEN_TRUE: return std::string("t");

								      case TOKEN_FALSE: return std::string("f");

								      }

								      throw std::logic_error("Unhandled token type");

								    }


								    /** Returns a string representation of a given token (for error messages). */

								    static std::string forErrorMessage(Token token) {

								      switch (token.kind) {

								      case TOKEN_INT: return std::string("INTEGER ")+std::to_string(token.vInteger);

								      case TOKEN_IDENT: return std::string("IDENTIFIER ")+token.vString;

								      case TOKEN_STRING: return std::string("STRING ")+token.vString;

								      case TOKEN_HEADER_NAME: return std::string("HEADER ")+token.vString;

								      case TOKEN_ALIAS_NAME: return std::string("ALIAS ")+token.vString;


								      case TOKEN_EOF: return std::string("END-OF-FILE");


								      case TOKEN_BODY: return std::string("--BODY--");

								      case TOKEN_END: return std::string("--END--");

								      case TOKEN_ABORT: return std::string("--ABORT--");

								      case TOKEN_HOA: return std::string("HEADER HOA");

								      case TOKEN_STATES: return std::string("HEADER States");

								      case TOKEN_START: return std::string("HEADERr Start");

								      case TOKEN_AP: return std::string("HEADER AP");

								      case TOKEN_ALIAS: return std::string("HEADER Alias");

								      case TOKEN_ACCEPTANCE: return std::string("HEADER Acceptance");

								      case TOKEN_ACCNAME: return std::string("HEADER acc-name");

								      case TOKEN_TOOL: return std::string("HEADER tool");

								      case TOKEN_NAME: return std::string("HEADER name");

								      case TOKEN_PROPERTIES: return std::string("HEADER properties");


								      case TOKEN_STATE: return std::string("DEFINITION State");


								      // Punctuation: etc.

								      case TOKEN_NOT: return std::string("!");

								      case TOKEN_AND: return std::string("&");

								      case TOKEN_OR: return std::string("|");

								      case TOKEN_LPARENTH: return std::string("(");

								      case TOKEN_RPARENTH: return std::string(")");

								      case TOKEN_LBRACKET: return std::string("[");

								      case TOKEN_RBRACKET: return std::string("]");

								      case TOKEN_LCURLY: return std::string("{");

								      case TOKEN_RCURLY: return std::string("}");

								      case TOKEN_TRUE: return std::string("TRUE t");

								      case TOKEN_FALSE: return std::string("FALSE f");

								      }

								      throw std::logic_error("Unhandled token type");

								    }


								    /** Output function for a given token. */

								    friend std::ostream& operator<<(std::ostream& out, const Token& token) {

								      out << "<" << token.typeAsString(token.kind) << "> ";

								      if (token.kind == TOKEN_INT) {

								        out << token.vInteger;

								      } else {

								        out << token.vString;

								      }

								      out << "     (" << token.line << "," << token.col << ")";

								      return out;

								    }

								  };


								  /** Constructor for a lexer, reading from the given input stream. */

								  HOALexer(std::istream& in)

								  : in(in), line(1), col(0), ch(0) {

								    // The headers we know

								    knownHeaders["HOA:"] = TOKEN_HOA;

								    knownHeaders["State:"] = TOKEN_STATE;

								    knownHeaders["States:"] = TOKEN_STATES;

								    knownHeaders["Start:"] = TOKEN_START;

								    knownHeaders["AP:"] = TOKEN_AP;

								    knownHeaders["Alias:"] = TOKEN_ALIAS;

								    knownHeaders["Acceptance:"] = TOKEN_ACCEPTANCE;

								    knownHeaders["acc-name:"] = TOKEN_ACCNAME;

								    knownHeaders["tool:"] = TOKEN_TOOL;

								    knownHeaders["name:"] = TOKEN_NAME;

								    knownHeaders["properties:"] = TOKEN_PROPERTIES;

								  }


								  /** Get the next token from the input stream. */

								  Token nextToken() {

								    // first, skip any whitespace

								    skip();

								    if (ch == EOF) return Token(TOKEN_EOF, line, col);


								    // handle the simple syntactic elements

								    switch (ch) {

								    case '!': return Token(TOKEN_NOT, line, col);

								    case '&': return Token(TOKEN_AND, line, col);

								    case '|': return Token(TOKEN_OR, line, col);

								    case '(': return Token(TOKEN_LPARENTH, line, col);

								    case ')': return Token(TOKEN_RPARENTH, line, col);

								    case '[': return Token(TOKEN_LBRACKET, line, col);

								    case ']': return Token(TOKEN_RBRACKET, line, col);

								    case '{': return Token(TOKEN_LCURLY, line, col);

								    case '}': return Token(TOKEN_RCURLY, line, col);

								    }


								    // remember where the token began

								    unsigned int lineStart = line;

								    unsigned int colStart = col;


								    // handle --XYZ-- style markers

								    if (ch == '-') {

								      unsigned int index=0;

								      bool canBeAbort = true;

								      bool canBeBody  = true;

								      bool canBeEnd   = true;

								      std::string abort("-ABORT--");

								      std::string body("-BODY--");

								      std::string end("-END--");


								      while (canBeAbort || canBeBody || canBeEnd) {

								        nextChar();

								        if (ch == EOF) {throw error("Premature end-of-file inside token", lineStart, colStart);}

								        if (canBeAbort && ch == abort.at(index)) {

								          if (index == abort.length()-1) {

								            return Token(TOKEN_ABORT, lineStart, colStart);

								          }

								        } else {

								          canBeAbort=false;

								        }

								        if (canBeBody && ch == body.at(index)) {

								          if (index == body.length()-1) {

								            return Token(TOKEN_BODY, lineStart, colStart);

								          }

								        } else {

								          canBeBody=false;

								        }

								        if (canBeEnd && ch == end.at(index)) {

								          if (index == end.length()-1) {

								            return Token(TOKEN_END, lineStart, colStart);

								          }

								        } else {

								          canBeEnd=false;

								        }


								        index++;

								        if (index >= abort.length()) canBeAbort = false;

								        if (index >= body.length()) canBeBody = false;

								        if (index >= end.length()) canBeEnd = false;

								      }

								      throw error("Lexical error: For token starting with '-', expected either '--BODY--', '--END--' or '--ABORT--'", lineStart, colStart);

								    }


								    // handle quoted strings

								    if (ch == '"') {

								      std::string text(1, (char)ch);

								      bool last_was_quote = false;

								      while (true) {

								        nextChar();

								        if (ch == EOF) {throw error("Premature end-of-file in quoted string", lineStart, colStart);}

								        text+=(char)ch;

								        if (ch == '"' && !last_was_quote) break;

								        if (ch == '\\' && !last_was_quote) {

								          last_was_quote = true;

								        } else {

								          last_was_quote = false;

								        }

								      }


								      return Token(TOKEN_STRING, text, lineStart, colStart);

								    }


								    // handle integers

								    if (ch >= '0' && ch <= '9') {

								      std::string text(1, (char)ch);

								      while (true) {

								        int next = peekChar();

								        if (next >= '0' && next <= '9') {

								          nextChar();

								          text+=(char)ch;

								        } else {

								          break;

								        }

								      }


								      if (text.at(0)=='0' && text.length() > 1) {

								        throw error("Syntax error parsing integer, starts with 0: "+text, lineStart, colStart);

								      }


								      try {

								        unsigned int vInteger = std::stoi(text);

								        return Token(vInteger, lineStart, colStart);

								      } catch (std::invalid_argument& e) {

								        throw error("Syntax error: "+text+" is not an integer", lineStart, colStart);

								      } catch (std::out_of_range& e) {

								        throw error("Syntax error: integer "+text+" is too big to represent as an unsigned int", lineStart, colStart);

								      }


								    } else if (ch == '@' || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {

								      // handle identifiers, @alias-names, headers, t and f

								      std::string text(1, (char)ch);


								      bool alias = (ch == '@');

								      while (true) {

								        int next = peekChar();

								        if (next == EOF) break;

								        if (next == ':') {

								          if (alias) break;

								          // consume ':'

								          nextChar();

								          text+=':';

								          break;

								        }

								        if (next == '_' ||

								            next == '-' ||

								            (next >= 'a' && next <= 'z') ||

								            (next >= 'A' && next <= 'Z') ||

								            (next >= '0' && next <= '9')) {

								          nextChar();

								          text+=(char)ch;

								          continue;

								        } else {

								          break;

								        }

								      }


								      if (alias) {

								        return Token(TOKEN_ALIAS_NAME, text, lineStart, colStart);

								      }


								      if (text.back() == ':') {

								        auto it = knownHeaders.find(text);

								        if (it != knownHeaders.end()) {

								          return Token((*it).second, text, lineStart, colStart);

								        }

								        return Token(TOKEN_HEADER_NAME, text, lineStart, colStart);

								      }

								      if (text == "t") {

								        return Token(TOKEN_TRUE, text, lineStart, colStart);

								      } else if (text == "f") {

								        return Token(TOKEN_FALSE, text, lineStart, colStart);

								      }

								      return Token(TOKEN_IDENT, text, lineStart, colStart);

								    }


								    throw error("Syntax error, illegal character '"+std::string(1, (char)ch)+"'", lineStart, colStart);

								  }


								private:


								  /** Skip whitespace. */

								  void skip() {

								    while (true) {

								      nextChar();

								      if (ch == EOF) { // EOF

								        return;

								      }

								      if (ch == '/') {

								        skipComment();

								        continue;

								      }

								      if (ch == ' ' || ch == '\t') {

								        continue;

								      }

								      if (ch == '\n' || ch == '\r') {

								        line++;

								        col=0;

								        continue;

								      }

								      break;

								    }

								  }


								  /** Skip a comment */

								  void skipComment() {

								    nextChar();

								    if (ch != '*') {

								      throw error("Malformed start of comment", line, col);

								    }

								    bool last_was_slash = false;

								    bool last_was_star = false;

								    unsigned int nesting = 0;

								    while (true) {

								      nextChar();

								      if (ch == EOF) {throw error("End-of-file inside comment", line, col);}

								      if (ch == '\n' || ch == '\r') {

								        line++;

								        col=0;

								        last_was_slash = false;

								        last_was_star = false;

								        continue;

								      }

								      if (ch == '/') {

								        if (last_was_star) {

								          if (nesting == 0) {

								            return;

								          } else {

								            nesting--;

								          }

								        } else {

								          last_was_slash = true;

								        }

								        continue;

								      }

								      if (ch == '*') {

								        if (last_was_slash) {

								          nesting++;

								        } else {

								          last_was_star = true;

								          continue;

								        }

								      }

								      last_was_slash = false;

								      last_was_star = false;

								    }

								  }


								  /** Read the next char in the input stream, store in `ch` */

								  void nextChar() {

								    ch = in.get();

								    if (ch != EOF) {

								      col++;

								    }

								  }


								  /** Peek at the next char in the input stream without consuming */

								  int peekChar() {

								    return in.peek();

								  }


								  /**

								   * Construct a HOAParserExeption for a lexer error.

								   * @param msg the error message

								   * @param errLine the line number where the error occured

								   * @param errCol column number where the error occured

								   */

								  HOAParserException error(const std::string& msg, unsigned int errLine, unsigned int errCol) {

								    return HOAParserException(msg+" (at line "+std::to_string(errLine)+", col "+std::to_string(errCol)+")", errLine, errCol);

								  }


								private:

								  /** The input stream */

								  std::istream& in;

								  /** The current line number */

								  unsigned int line;

								  /** The current column number */

								  unsigned int col;

								  /** The current character (or EOF) */

								  int ch;


								  /** A map for mapping the known header names to the corresponding token types */

								  std::map<std::string, TokenType> knownHeaders;

								};


								}


								#endif