- Extended Lexer to consider forms like '123.abc' as identifiers. This is
  essential to recignizing and extending DOM support.
This commit is contained in:
Paul Beckingham
2014-06-02 22:19:38 -04:00
parent 43718cbce5
commit 870e45be0d

View File

@@ -363,6 +363,12 @@ bool Lexer::token (std::string& result, Type& type)
result += utf8_character (_n0); result += utf8_character (_n0);
shift (); shift ();
} }
else if (is_ident_start (_n0))
{
type = typeIdentifier;
result += utf8_character (_n0);
shift ();
}
else else
{ {
return true; return true;
@@ -381,15 +387,16 @@ bool Lexer::token (std::string& result, Type& type)
result += utf8_character (_n0); result += utf8_character (_n0);
shift (); shift ();
} }
break; else if (is_ident_start (_n0))
case typeExponent:
if (is_dec_digit (_n0))
{ {
type = typeIdentifier;
result += utf8_character (_n0); result += utf8_character (_n0);
shift (); shift ();
} }
else if (_n0 == '.') break;
case typeExponent:
if (is_dec_digit (_n0) || _n0 == '.')
{ {
result += utf8_character (_n0); result += utf8_character (_n0);
shift (); shift ();
@@ -427,7 +434,7 @@ bool Lexer::token (std::string& result, Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Just like Lexer::token, but no operators, dates or durations. // Just like Lexer::token, but no operators, numbers, dates or durations.
bool Lexer::word (std::string& token, Type& type) bool Lexer::word (std::string& token, Type& type)
{ {
// Start with nothing. // Start with nothing.
@@ -668,7 +675,8 @@ bool Lexer::is_ident_start (int c) const
{ {
return c && // Include null character check. return c && // Include null character check.
! is_ws (c) && ! is_ws (c) &&
! is_dec_digit (c); ! is_dec_digit (c) &&
! is_single_op (c);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////