diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 21dcdbcbf..760e96b52 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,15 +14,17 @@ set (task_SRCS A3.cpp A3.h DOM.cpp DOM.h Date.cpp Date.h Directory.cpp Directory.h - OldDuration.cpp OldDuration.h + Duration.cpp Duration.h E9.cpp E9.h File.cpp File.h Hooks.cpp Hooks.h ISO8601.cpp ISO8601.h JSON.cpp JSON.h + Lexer.cpp Lexer.h LRParser.cpp LRParser.h Msg.cpp Msg.h Nibbler.cpp Nibbler.h + OldDuration.cpp OldDuration.h Parser.cpp Parser.h Path.cpp Path.h RX.cpp RX.h diff --git a/src/Duration.cpp b/src/Duration.cpp new file mode 100644 index 000000000..af1360ff3 --- /dev/null +++ b/src/Duration.cpp @@ -0,0 +1,167 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2006 - 2014, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +#define DAY 86400 +#define HOUR 3600 +#define MINUTE 60 +#define SECOND 1 + +static struct +{ + std::string unit; + int seconds; +} durations[] = +{ + // These are sorted by first character, then length, so that Nibbler::getOneOf + // returns a maximal match. + {"annual", 365 * DAY}, + {"biannual", 730 * DAY}, + {"bimonthly", 61 * DAY}, + {"biweekly", 14 * DAY}, + {"biyearly", 730 * DAY}, + {"daily", 1 * DAY}, + {"days", 1 * DAY}, + {"day", 1 * DAY}, + {"d", 1 * DAY}, + {"fortnight", 14 * DAY}, + {"hours", 1 * HOUR}, + {"hour", 1 * HOUR}, + {"hrs", 1 * HOUR}, // Deprecate + {"hr", 1 * HOUR}, // Deprecate + {"h", 1 * HOUR}, + {"minutes", 1 * MINUTE}, + {"minute", 1 * MINUTE}, + {"mins", 1 * MINUTE}, // Deprecate + {"min", 1 * MINUTE}, + {"monthly", 30 * DAY}, + {"months", 30 * DAY}, + {"month", 30 * DAY}, + {"mnths", 30 * DAY}, // Deprecate + {"mths", 30 * DAY}, // Deprecate + {"mth", 30 * DAY}, // Deprecate + {"mos", 30 * DAY}, // Deprecate + {"mo", 30 * DAY}, + {"quarterly", 91 * DAY}, + {"quarters", 91 * DAY}, + {"quarter", 91 * DAY}, + {"qrtrs", 91 * DAY}, // Deprecate + {"qtrs", 91 * DAY}, // Deprecate + {"qtr", 91 * DAY}, // Deprecate + {"q", 91 * DAY}, + {"semiannual", 183 * DAY}, + {"sennight", 14 * DAY}, + {"seconds", 1 * SECOND}, + {"second", 1 * SECOND}, + {"secs", 1 * SECOND}, // Deprecate + {"sec", 1 * SECOND}, // Deprecate + {"s", 1 * SECOND}, + {"weekdays", DAY}, + {"weekly", 7 * DAY}, + {"weeks", 7 * DAY}, + {"week", 7 * DAY}, + {"wks", 7 * DAY}, // Deprecate + {"wk", 7 * DAY}, // Deprecate + {"w", 7 * DAY}, + {"yearly", 365 * DAY}, + {"years", 365 * DAY}, + {"year", 365 * DAY}, + {"yrs", 365 * DAY}, // Deprecate + {"yr", 365 * DAY}, // Deprecate + {"y", 365 * DAY}, +}; + +#define NUM_DURATIONS (sizeof (durations) / sizeof (durations[0])) + +//////////////////////////////////////////////////////////////////////////////// +Duration::Duration () +: _secs (0) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Duration::~Duration () +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Duration::operator time_t () const +{ + return _secs; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Duration::parse (const std::string& input, std::string::size_type& start) +{ + std::string::size_type original_start = start; + Nibbler n (input.substr (start)); + + std::vector units; + for (int i = 0; i < NUM_DURATIONS; i++) + units.push_back (durations[i].unit); + + std::string number; + std::string unit; + if ((n.getNumber (number) && n.skipWS () && n.getOneOf (units, unit)) || + n.getOneOf (units, unit)) + { + if (n.depleted () || + Lexer::is_ws (n.next ())) + { + start = original_start + n.cursor (); + double quantity = (number == "") + ? 1.0 + : strtod (number.c_str (), NULL); + + // Linear lookup - should be logarithmic. + double seconds = 1; + for (int i = 0; i < NUM_DURATIONS; i++) + { + if (durations[i].unit == unit) + { + seconds = durations[i].seconds; + _secs = static_cast (quantity * static_cast (seconds)); + return true; + } + } + } + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +void Duration::clear () +{ + _secs = 0; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/Duration.h b/src/Duration.h new file mode 100644 index 000000000..1b14ae9ce --- /dev/null +++ b/src/Duration.h @@ -0,0 +1,49 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2006 - 2014, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDED_DURATION +#define INCLUDED_DURATION + +#include +#include + +class Duration +{ +public: + Duration (); // Default constructor + ~Duration (); // Destructor + Duration (const Duration&); // Unimplemented + Duration& operator= (const Duration&); // Unimplemented + operator time_t () const; + bool parse (const std::string&, std::string::size_type&); + void clear (); + +protected: + time_t _secs; +}; + +#endif +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/Lexer.cpp b/src/Lexer.cpp new file mode 100644 index 000000000..506208984 --- /dev/null +++ b/src/Lexer.cpp @@ -0,0 +1,600 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2013 - 2014, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +Lexer::Lexer (const std::string& input) +: _input (input) +, _i (0) +, _n0 (32) +, _n1 (32) +, _n2 (32) +, _n3 (32) +, _ambiguity (true) +{ + // Read 4 chars in preparation. Even if there are < 4. Take a deep breath. + shift (); + shift (); + shift (); + shift (); +} + +//////////////////////////////////////////////////////////////////////////////// +Lexer::~Lexer () +{ +} + +//////////////////////////////////////////////////////////////////////////////// +// Walk the input string, looking for transitions. +bool Lexer::token (std::string& token, Type& type) +{ + // Start with nothing. + token = ""; + + // Different types of matching quote: ', ". + int quote = 0; + + type = typeNone; + while (_n0) + { + switch (type) + { + case typeNone: + if (is_ws (_n0)) + shift (); + else if (_n0 == '"' || _n0 == '\'') + { + type = typeString; + quote = _n0; + shift (); + } + else if (_n0 == '0' && + _n1 == 'x' && + is_hex_digit (_n2)) + { + type = typeHex; + token += utf8_character (_n0); + shift (); + token += utf8_character (_n0); + shift (); + token += utf8_character (_n0); + shift (); + } + else if (is_dec_digit (_n0)) + { + // Speculatively try a date and duration parse. Longest wins. + std::string::size_type iso_i = 0; + std::string iso_token; + ISO8601d iso; + iso.ambiguity (_ambiguity); + if (iso.parse (_input.substr (_i < 4 ? 0 : _i - 4), iso_i)) + iso_token = _input.substr ((_i < 4 ? 0 : _i - 4), iso_i); + + std::string::size_type dur_i = 0; + std::string dur_token; + Duration dur; + if (dur.parse (_input.substr (_i < 4 ? 0 : _i - 4), dur_i)) + dur_token = _input.substr ((_i < 4 ? 0 : _i - 4), dur_i); + + if (iso_token.length () > dur_token.length ()) + { + while (iso_i--) shift (); + token = iso_token; + type = typeDate; + return true; + } + else if (dur_token.length () > iso_token.length ()) + { + while (dur_i--) shift (); + token = dur_token; + type = typeDuration; + return true; + } + + type = typeNumber; + token += utf8_character (_n0); + shift (); + } + else if (_n0 == '.' && is_dec_digit (_n1)) + { + type = typeDecimal; + token += utf8_character (_n0); + shift (); + } + else if (is_triple_op (_n0, _n1, _n2)) + { + type = typeOperator; + token += utf8_character (_n0); + shift (); + token += utf8_character (_n0); + shift (); + token += utf8_character (_n0); + shift (); + return true; + } + else if (is_double_op (_n0, _n1)) + { + type = typeOperator; + token += utf8_character (_n0); + shift (); + token += utf8_character (_n0); + shift (); + return true; + } + else if (is_single_op (_n0)) + { + type = typeOperator; + token += utf8_character (_n0); + shift (); + return true; + } + else if (_n0 == '\\') + { + type = typeIdentifierEscape; + shift (); + } + else if (is_ident_start (_n0)) + { + // Speculatively try a date and duration parse. Longest wins. + std::string::size_type iso_i = 0; + std::string iso_token; + ISO8601p iso; + if (iso.parse (_input.substr (_i < 4 ? 0 : _i - 4), iso_i)) + iso_token = _input.substr ((_i < 4 ? 0 : _i - 4), iso_i); + + std::string::size_type dur_i = 0; + std::string dur_token; + Duration dur; + if (dur.parse (_input.substr (_i < 4 ? 0 : _i - 4), dur_i)) + dur_token = _input.substr ((_i < 4 ? 0 : _i - 4), dur_i); + + if (iso_token.length () > dur_token.length ()) + { + while (iso_i--) shift (); + token = iso_token; + type = typeDuration; + return true; + } + else if (dur_token.length () > iso_token.length ()) + { + while (dur_i--) shift (); + token = dur_token; + type = typeDuration; + return true; + } + + type = typeIdentifier; + token += utf8_character (_n0); + shift (); + } + else + throw std::string ("Unexpected error 1"); + break; + + case typeString: + if (_n0 == quote) + { + shift (); + quote = 0; + return true; + } + else if (_n0 == '\\') + { + type = typeEscape; + shift (); + } + else + { + token += utf8_character (_n0); + shift (); + } + break; + + case typeIdentifier: + if (is_ident (_n0)) + { + token += utf8_character (_n0); + shift (); + } + else + { + return true; + } + break; + + case typeIdentifierEscape: + if (_n0 == 'u') + { + type = typeEscapeUnicode; + shift (); + } + break; + + case typeEscape: + if (_n0 == 'x') + { + type = typeEscapeHex; + shift (); + } + else if (_n0 == 'u') + { + type = typeEscapeUnicode; + shift (); + } + else + { + token += decode_escape (_n0); + type = quote ? typeString : typeIdentifier; + shift (); + } + break; + + case typeEscapeHex: + if (is_hex_digit (_n0) && is_hex_digit (_n1)) + { + token += utf8_character (hex_to_int (_n0, _n1)); + type = quote ? typeString : typeIdentifier; + shift (); + shift (); + } + else + { + type = quote ? typeString : typeIdentifier; + shift (); + quote = 0; + return true; + } + break; + + case typeEscapeUnicode: + if (is_hex_digit (_n0) && + is_hex_digit (_n1) && + is_hex_digit (_n2) && + is_hex_digit (_n3)) + { + token += utf8_character (hex_to_int (_n0, _n1, _n2, _n3)); + shift (); + shift (); + shift (); + shift (); + type = quote ? typeString : typeIdentifier; + } + else if (_n0 == quote) + { + type = typeString; + shift (); + quote = 0; + return true; + } + + case typeNumber: + if (is_dec_digit (_n0)) + { + token += utf8_character (_n0); + shift (); + } + else if (_n0 == '.') + { + type = typeDecimal; + token += utf8_character (_n0); + shift (); + } + else if (_n0 == 'e' || _n0 == 'E') + { + type = typeExponentIndicator; + token += utf8_character (_n0); + shift (); + } + else + { + return true; + } + break; + + case typeDecimal: + if (is_dec_digit (_n0)) + { + token += utf8_character (_n0); + shift (); + } + else if (_n0 == 'e' || _n0 == 'E') + { + type = typeExponentIndicator; + token += utf8_character (_n0); + shift (); + } + else + { + return true; + } + break; + + case typeExponentIndicator: + if (_n0 == '+' || _n0 == '-') + { + token += utf8_character (_n0); + shift (); + } + else if (is_dec_digit (_n0)) + { + type = typeExponent; + token += utf8_character (_n0); + shift (); + } + break; + + case typeExponent: + if (is_dec_digit (_n0)) + { + token += utf8_character (_n0); + shift (); + } + else if (_n0 == '.') + { + token += utf8_character (_n0); + shift (); + } + else + { + type = typeDecimal; + return true; + } + break; + + case typeHex: + if (is_hex_digit (_n0)) + { + token += utf8_character (_n0); + shift (); + } + else + { + return true; + } + break; + + default: + throw std::string ("Unexpected error 2"); + break; + } + + // Fence post. + if (!_n0 && token != "") + return true; + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::ambiguity (bool value) +{ + _ambiguity = value; +} + +//////////////////////////////////////////////////////////////////////////////// +const std::string Lexer::type_name (const Type& type) +{ + switch (type) + { + case Lexer::typeNone: return "None"; + case Lexer::typeString: return "String"; + case Lexer::typeIdentifier: return "Identifier"; + case Lexer::typeIdentifierEscape: return "IdentifierEscape"; + case Lexer::typeNumber: return "Number"; + case Lexer::typeDecimal: return "Decimal"; + case Lexer::typeExponentIndicator: return "ExponentIndicator"; + case Lexer::typeExponent: return "Exponent"; + case Lexer::typeHex: return "Hex"; + case Lexer::typeOperator: return "Operator"; + case Lexer::typeEscape: return "Escape"; + case Lexer::typeEscapeHex: return "EscapeHex"; + case Lexer::typeEscapeUnicode: return "EscapeUnicode"; + case Lexer::typeDate: return "Date"; + case Lexer::typeDuration: return "Duration"; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Complete Unicode whitespace list. +// +// http://en.wikipedia.org/wiki/Whitespace_character +// Updated 2013-11-18 +bool Lexer::is_ws (int c) +{ + return (c == 0x0020 || // space Common Separator, space + c == 0x0009 || // Common Other, control HT, Horizontal Tab + c == 0x000A || // Common Other, control LF, Line feed + c == 0x000B || // Common Other, control VT, Vertical Tab + c == 0x000C || // Common Other, control FF, Form feed + c == 0x000D || // Common Other, control CR, Carriage return + c == 0x0085 || // Common Other, control NEL, Next line + c == 0x00A0 || // no-break space Common Separator, space + c == 0x1680 || // ogham space mark Ogham Separator, space + c == 0x180E || // mongolian vowel separator Mongolian Separator, space + c == 0x2000 || // en quad Common Separator, space + c == 0x2001 || // em quad Common Separator, space + c == 0x2002 || // en space Common Separator, space + c == 0x2003 || // em space Common Separator, space + c == 0x2004 || // three-per-em space Common Separator, space + c == 0x2005 || // four-per-em space Common Separator, space + c == 0x2006 || // six-per-em space Common Separator, space + c == 0x2007 || // figure space Common Separator, space + c == 0x2008 || // punctuation space Common Separator, space + c == 0x2009 || // thin space Common Separator, space + c == 0x200A || // hair space Common Separator, space + c == 0x2028 || // line separator Common Separator, line + c == 0x2029 || // paragraph separator Common Separator, paragraph + c == 0x202F || // narrow no-break space Common Separator, space + c == 0x205F || // medium mathematical space Common Separator, space + c == 0x3000); // ideographic space Common Separator, space +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_punct (int c) const +{ + if (c == ',' || + c == '.') // Tab + return true; + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_num (int c) const +{ + if ((c >= '0' && c <= '9') || + c == '.') + return true; + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_ident_start (int c) const +{ + return c && // Include null character check. + ! is_ws (c) && + ! is_dec_digit (c); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_ident (int c) const +{ + return c && // Include null character check. + ! is_ws (c) && + ! is_single_op (c); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_triple_op (int c0, int c1, int c2) const +{ + return (c0 == 'a' && c1 == 'n' && c2 == 'd') || + (c0 == 'x' && c1 == 'o' && c2 == 'r'); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_double_op (int c0, int c1) const +{ + return (c0 == '=' && c1 == '=') || + (c0 == '!' && c1 == '=') || + (c0 == '<' && c1 == '=') || + (c0 == '>' && c1 == '=') || + (c0 == 'o' && c1 == 'r') || + (c0 == '|' && c1 == '|') || + (c0 == '&' && c1 == '&') || + (c0 == '!' && c1 == '~'); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_single_op (int c) const +{ + return c == '+' || + c == '-' || + c == '*' || + c == '/' || + c == '(' || + c == ')' || + c == '<' || + c == '>' || + c == '^' || + c == '!' || + c == '%' || + c == '=' || + c == '~'; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_dec_digit (int c) const +{ + return c >= '0' && c <= '9'; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Lexer::is_hex_digit (int c) const +{ + return (c >= '0' && c <= '9') || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); +} + +//////////////////////////////////////////////////////////////////////////////// +int Lexer::decode_escape (int c) const +{ + switch (c) + { + case 'b': return 0x08; + case 'f': return 0x0C; + case 'n': return 0x0A; + case 'r': return 0x0D; + case 't': return 0x09; + case 'v': return 0x0B; + case '\'': return 0x27; + case '"': return 0x22; + case '\\': return 0x5C; + default: return c; + } +} + +//////////////////////////////////////////////////////////////////////////////// +int Lexer::hex_to_int (int c) const +{ + if (c >= '0' && c <= '9') return (c - '0'); + else if (c >= 'a' && c <= 'f') return (c - 'a' + 10); + else return (c - 'A' + 10); +} + +//////////////////////////////////////////////////////////////////////////////// +int Lexer::hex_to_int (int c0, int c1) const +{ + return (hex_to_int (c0) << 4) + hex_to_int (c1); +} + +//////////////////////////////////////////////////////////////////////////////// +int Lexer::hex_to_int (int c0, int c1, int c2, int c3) const +{ + return (hex_to_int (c0) << 12) + + (hex_to_int (c1) << 8) + + (hex_to_int (c2) << 4) + + hex_to_int (c3); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::shift () +{ + _n0 = _n1; + _n1 = _n2; + _n2 = _n3; + _n3 = utf8_next_char (_input, _i); + + //std::cout << "# shift [" << (char) _n0 << (char) _n1 << (char) _n2 << (char) _n3 << "]\n"; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/Lexer.h b/src/Lexer.h new file mode 100644 index 000000000..9010b6ea8 --- /dev/null +++ b/src/Lexer.h @@ -0,0 +1,94 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2013 - 2014, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDED_LEXER +#define INCLUDED_LEXER + +#include +#include + +class Lexer +{ +public: + enum Type + { + typeNone = 0, + typeString, + typeIdentifier, + typeIdentifierEscape, // Intermediate + typeEscape, // Intermediate + typeEscapeHex, // Intermediate + typeEscapeUnicode, // Intermediate + typeNumber, + typeDecimal, + typeExponentIndicator, // Intermediate + typeExponent, // Intermediate + typeHex, + typeOperator, + typeDate, + typeDuration, + }; + + Lexer (const std::string&); + virtual ~Lexer (); + Lexer (const Lexer&); // Not implemented. + Lexer& operator= (const Lexer&); // Not implemented. + bool operator== (const Lexer&); // Not implemented. + bool token (std::string&, Type&); + void ambiguity (bool); + + static const std::string type_name (const Type&); + static bool is_ws (int); + +private: + bool is_punct (int) const; + bool is_num (int) const; + bool is_ident_start (int) const; + bool is_ident (int) const; + bool is_triple_op (int, int, int) const; + bool is_double_op (int, int) const; + bool is_single_op (int) const; + bool is_dec_digit (int) const; + bool is_hex_digit (int) const; + int decode_escape (int) const; + int hex_to_int (int) const; + int hex_to_int (int, int) const; + int hex_to_int (int, int, int, int) const; + void shift (); + +private: + const std::string _input; + std::string::size_type _i; + int _n0; + int _n1; + int _n2; + int _n3; + bool _ambiguity; +}; + +#endif + +////////////////////////////////////////////////////////////////////////////////