Lexer2
- Integrated Lexer2 in place of Lexer. Tests fail.
This commit is contained in:
40
src/CLI.cpp
40
src/CLI.cpp
@@ -29,7 +29,6 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <Context.h>
|
#include <Context.h>
|
||||||
#include <Nibbler.h>
|
#include <Nibbler.h>
|
||||||
#include <Lexer.h>
|
|
||||||
#include <Lexer2.h>
|
#include <Lexer2.h>
|
||||||
#include <CLI.h>
|
#include <CLI.h>
|
||||||
#include <Color.h>
|
#include <Color.h>
|
||||||
@@ -662,13 +661,13 @@ void CLI::addArg (const std::string& arg)
|
|||||||
// that cause the lexemes to be ignored, and the original arugment used
|
// that cause the lexemes to be ignored, and the original arugment used
|
||||||
// intact.
|
// intact.
|
||||||
std::string lexeme;
|
std::string lexeme;
|
||||||
Lexer::Type type;
|
Lexer2::Type type;
|
||||||
Lexer lex (raw);
|
Lexer2 lex (raw);
|
||||||
lex.ambiguity (false);
|
lex.ambiguity (false);
|
||||||
|
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > lexemes;
|
std::vector <std::pair <std::string, Lexer2::Type> > lexemes;
|
||||||
while (lex.token (lexeme, type))
|
while (lex.token (lexeme, type))
|
||||||
lexemes.push_back (std::pair <std::string, Lexer::Type> (lexeme, type));
|
lexemes.push_back (std::pair <std::string, Lexer2::Type> (lexeme, type));
|
||||||
|
|
||||||
if (disqualifyInsufficientTerms (lexemes) ||
|
if (disqualifyInsufficientTerms (lexemes) ||
|
||||||
disqualifyNoOps (lexemes) ||
|
disqualifyNoOps (lexemes) ||
|
||||||
@@ -682,7 +681,7 @@ void CLI::addArg (const std::string& arg)
|
|||||||
{
|
{
|
||||||
// How often have I said to you that when you have eliminated the
|
// How often have I said to you that when you have eliminated the
|
||||||
// impossible, whatever remains, however improbable, must be the truth?
|
// impossible, whatever remains, however improbable, must be the truth?
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >::iterator l;
|
std::vector <std::pair <std::string, Lexer2::Type> >::iterator l;
|
||||||
for (l = lexemes.begin (); l != lexemes.end (); ++l)
|
for (l = lexemes.begin (); l != lexemes.end (); ++l)
|
||||||
_original_args.push_back (l->first);
|
_original_args.push_back (l->first);
|
||||||
}
|
}
|
||||||
@@ -714,9 +713,7 @@ void CLI::aliasExpansion ()
|
|||||||
{
|
{
|
||||||
if (_aliases.find (raw) != _aliases.end ())
|
if (_aliases.find (raw) != _aliases.end ())
|
||||||
{
|
{
|
||||||
std::vector <std::string> lexed;
|
std::vector <std::string> lexed = Lexer2::split (_aliases[raw]);
|
||||||
Lexer::token_split (lexed, _aliases[raw]);
|
|
||||||
|
|
||||||
std::vector <std::string>::iterator l;
|
std::vector <std::string>::iterator l;
|
||||||
for (l = lexed.begin (); l != lexed.end (); ++l)
|
for (l = lexed.begin (); l != lexed.end (); ++l)
|
||||||
{
|
{
|
||||||
@@ -1815,8 +1812,7 @@ void CLI::injectDefaults ()
|
|||||||
if (defaultCommand != "")
|
if (defaultCommand != "")
|
||||||
{
|
{
|
||||||
// Split the defaultCommand into separate args.
|
// Split the defaultCommand into separate args.
|
||||||
std::vector <std::string> tokens;
|
std::vector <std::string> tokens = Lexer2::split (defaultCommand);
|
||||||
Lexer::token_split (tokens, defaultCommand);
|
|
||||||
|
|
||||||
// Modify _args to be: <args0> [<def0> ...] <args1> [...]
|
// Modify _args to be: <args0> [<def0> ...] <args1> [...]
|
||||||
std::vector <A> reconstructed;
|
std::vector <A> reconstructed;
|
||||||
@@ -2306,9 +2302,9 @@ bool CLI::isName (const std::string& raw) const
|
|||||||
{
|
{
|
||||||
for (int i = 0; i < raw.length (); ++i)
|
for (int i = 0; i < raw.length (); ++i)
|
||||||
{
|
{
|
||||||
if (i == 0 && ! Lexer::is_ident_start (raw[i]))
|
if (i == 0 && ! Lexer2::isIdentifierStart (raw[i]))
|
||||||
return false;
|
return false;
|
||||||
else if (! Lexer::is_ident (raw[i]))
|
else if (! Lexer2::isIdentifierNext (raw[i]))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2320,19 +2316,19 @@ bool CLI::isName (const std::string& raw) const
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool CLI::disqualifyInsufficientTerms (
|
bool CLI::disqualifyInsufficientTerms (
|
||||||
const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
|
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
|
||||||
{
|
{
|
||||||
return lexemes.size () < 3 ? true : false;
|
return lexemes.size () < 3 ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool CLI::disqualifyNoOps (
|
bool CLI::disqualifyNoOps (
|
||||||
const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
|
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
|
||||||
{
|
{
|
||||||
bool foundOP = false;
|
bool foundOP = false;
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >::const_iterator l;
|
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator l;
|
||||||
for (l = lexemes.begin (); l != lexemes.end (); ++l)
|
for (l = lexemes.begin (); l != lexemes.end (); ++l)
|
||||||
if (l->second == Lexer::typeOperator)
|
if (l->second == Lexer2::Type::op)
|
||||||
foundOP = true;
|
foundOP = true;
|
||||||
|
|
||||||
return ! foundOP;
|
return ! foundOP;
|
||||||
@@ -2340,16 +2336,16 @@ bool CLI::disqualifyNoOps (
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool CLI::disqualifyOnlyParenOps (
|
bool CLI::disqualifyOnlyParenOps (
|
||||||
const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
|
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
|
||||||
{
|
{
|
||||||
int opCount = 0;
|
int opCount = 0;
|
||||||
int opSugarCount = 0;
|
int opSugarCount = 0;
|
||||||
int opParenCount = 0;
|
int opParenCount = 0;
|
||||||
|
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >::const_iterator l;
|
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator l;
|
||||||
for (l = lexemes.begin (); l != lexemes.end (); ++l)
|
for (l = lexemes.begin (); l != lexemes.end (); ++l)
|
||||||
{
|
{
|
||||||
if (l->second == Lexer::typeOperator)
|
if (l->second == Lexer2::Type::op)
|
||||||
{
|
{
|
||||||
++opCount;
|
++opCount;
|
||||||
|
|
||||||
@@ -2376,7 +2372,7 @@ bool CLI::disqualifyOnlyParenOps (
|
|||||||
// as there are no operators in between, which includes syntactic sugar that
|
// as there are no operators in between, which includes syntactic sugar that
|
||||||
// hides operators.
|
// hides operators.
|
||||||
bool CLI::disqualifyFirstLastBinary (
|
bool CLI::disqualifyFirstLastBinary (
|
||||||
const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
|
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
|
||||||
{
|
{
|
||||||
bool firstBinary = false;
|
bool firstBinary = false;
|
||||||
bool lastBinary = false;
|
bool lastBinary = false;
|
||||||
@@ -2395,7 +2391,7 @@ bool CLI::disqualifyFirstLastBinary (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Disqualify terms when there operators hidden by syntactic sugar.
|
// Disqualify terms when there operators hidden by syntactic sugar.
|
||||||
bool CLI::disqualifySugarFree (
|
bool CLI::disqualifySugarFree (
|
||||||
const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
|
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
|
||||||
{
|
{
|
||||||
bool sugared = true;
|
bool sugared = true;
|
||||||
for (unsigned int i = 1; i < lexemes.size () - 1; ++i)
|
for (unsigned int i = 1; i < lexemes.size () - 1; ++i)
|
||||||
|
|||||||
12
src/CLI.h
12
src/CLI.h
@@ -29,7 +29,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <Lexer.h>
|
#include <Lexer2.h>
|
||||||
#include <Path.h>
|
#include <Path.h>
|
||||||
#include <File.h>
|
#include <File.h>
|
||||||
|
|
||||||
@@ -126,11 +126,11 @@ private:
|
|||||||
bool isOperator (const std::string&) const;
|
bool isOperator (const std::string&) const;
|
||||||
bool isName (const std::string&) const;
|
bool isName (const std::string&) const;
|
||||||
|
|
||||||
bool disqualifyInsufficientTerms (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
bool disqualifyInsufficientTerms (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
bool disqualifyNoOps (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
bool disqualifyNoOps (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
bool disqualifyOnlyParenOps (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
bool disqualifyOnlyParenOps (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
bool disqualifyFirstLastBinary (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
bool disqualifyFirstLastBinary (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
bool disqualifySugarFree (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
bool disqualifySugarFree (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::multimap <std::string, std::string> _entities;
|
std::multimap <std::string, std::string> _entities;
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ set (task_SRCS CLI.cpp CLI.h
|
|||||||
Hooks.cpp Hooks.h
|
Hooks.cpp Hooks.h
|
||||||
ISO8601.cpp ISO8601.h
|
ISO8601.cpp ISO8601.h
|
||||||
JSON.cpp JSON.h
|
JSON.cpp JSON.h
|
||||||
Lexer.cpp Lexer.h
|
|
||||||
Lexer2.cpp Lexer2.h
|
Lexer2.cpp Lexer2.h
|
||||||
Msg.cpp Msg.h
|
Msg.cpp Msg.h
|
||||||
Nibbler.cpp Nibbler.h
|
Nibbler.cpp Nibbler.h
|
||||||
|
|||||||
@@ -657,8 +657,8 @@ void Context::staticInitialization ()
|
|||||||
|
|
||||||
Task::searchCaseSensitive = Variant::searchCaseSensitive = config.getBoolean ("search.case.sensitive");
|
Task::searchCaseSensitive = Variant::searchCaseSensitive = config.getBoolean ("search.case.sensitive");
|
||||||
Task::regex = Variant::searchUsingRegex = config.getBoolean ("regex");
|
Task::regex = Variant::searchUsingRegex = config.getBoolean ("regex");
|
||||||
Lexer::dateFormat = Variant::dateFormat = config.get ("dateformat");
|
Lexer2::dateFormat = Variant::dateFormat = config.get ("dateformat");
|
||||||
Lexer::isoEnabled = Variant::isoEnabled = config.getBoolean ("date.iso");
|
Lexer2::isoEnabled = Variant::isoEnabled = config.getBoolean ("date.iso");
|
||||||
|
|
||||||
std::map <std::string, Column*>::iterator i;
|
std::map <std::string, Column*>::iterator i;
|
||||||
for (i = columns.begin (); i != columns.end (); ++i)
|
for (i = columns.begin (); i != columns.end (); ++i)
|
||||||
|
|||||||
176
src/Eval.cpp
176
src/Eval.cpp
@@ -125,13 +125,13 @@ void Eval::addSource (bool (*source)(const std::string&, Variant&))
|
|||||||
void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
|
void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
|
||||||
{
|
{
|
||||||
// Reduce e to a vector of tokens.
|
// Reduce e to a vector of tokens.
|
||||||
Lexer l (e);
|
Lexer2 l (e);
|
||||||
l.ambiguity (_ambiguity);
|
l.ambiguity (_ambiguity);
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > tokens;
|
std::vector <std::pair <std::string, Lexer2::Type> > tokens;
|
||||||
std::string token;
|
std::string token;
|
||||||
Lexer::Type type;
|
Lexer2::Type type;
|
||||||
while (l.token (token, type))
|
while (l.token (token, type))
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
|
|
||||||
// Parse for syntax checking and operator replacement.
|
// Parse for syntax checking and operator replacement.
|
||||||
if (_debug)
|
if (_debug)
|
||||||
@@ -153,13 +153,13 @@ void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
|
|||||||
void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
|
void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
|
||||||
{
|
{
|
||||||
// Reduce e to a vector of tokens.
|
// Reduce e to a vector of tokens.
|
||||||
Lexer l (e);
|
Lexer2 l (e);
|
||||||
l.ambiguity (_ambiguity);
|
l.ambiguity (_ambiguity);
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > tokens;
|
std::vector <std::pair <std::string, Lexer2::Type> > tokens;
|
||||||
std::string token;
|
std::string token;
|
||||||
Lexer::Type type;
|
Lexer2::Type type;
|
||||||
while (l.token (token, type))
|
while (l.token (token, type))
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
|
|
||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug ("[1;37;42mFILTER[0m Postfix " + dump (tokens));
|
context.debug ("[1;37;42mFILTER[0m Postfix " + dump (tokens));
|
||||||
@@ -172,15 +172,15 @@ void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
|
|||||||
void Eval::compileExpression (const std::string& e)
|
void Eval::compileExpression (const std::string& e)
|
||||||
{
|
{
|
||||||
// Reduce e to a vector of tokens.
|
// Reduce e to a vector of tokens.
|
||||||
Lexer l (e);
|
Lexer2 l (e);
|
||||||
l.ambiguity (_ambiguity);
|
l.ambiguity (_ambiguity);
|
||||||
std::string token;
|
std::string token;
|
||||||
Lexer::Type type;
|
Lexer2::Type type;
|
||||||
while (l.token (token, type))
|
while (l.token (token, type))
|
||||||
{
|
{
|
||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug ("Lexer '" + token + "' " + Lexer::type_name (type));
|
context.debug ("Lexer '" + token + "' " + Lexer2::typeToString (type));
|
||||||
_compiled.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
_compiled.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse for syntax checking and operator replacement.
|
// Parse for syntax checking and operator replacement.
|
||||||
@@ -236,7 +236,7 @@ void Eval::getBinaryOperators (std::vector <std::string>& all)
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
void Eval::evaluatePostfixStack (
|
void Eval::evaluatePostfixStack (
|
||||||
const std::vector <std::pair <std::string, Lexer::Type> >& tokens,
|
const std::vector <std::pair <std::string, Lexer2::Type> >& tokens,
|
||||||
Variant& result) const
|
Variant& result) const
|
||||||
{
|
{
|
||||||
if (tokens.size () == 0)
|
if (tokens.size () == 0)
|
||||||
@@ -245,11 +245,11 @@ void Eval::evaluatePostfixStack (
|
|||||||
// This is stack used by the postfix evaluator.
|
// This is stack used by the postfix evaluator.
|
||||||
std::vector <Variant> values;
|
std::vector <Variant> values;
|
||||||
|
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >::const_iterator token;
|
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator token;
|
||||||
for (token = tokens.begin (); token != tokens.end (); ++token)
|
for (token = tokens.begin (); token != tokens.end (); ++token)
|
||||||
{
|
{
|
||||||
// Unary operators.
|
// Unary operators.
|
||||||
if (token->second == Lexer::typeOperator &&
|
if (token->second == Lexer2::Type::op &&
|
||||||
token->first == "!")
|
token->first == "!")
|
||||||
{
|
{
|
||||||
if (values.size () < 1)
|
if (values.size () < 1)
|
||||||
@@ -262,7 +262,7 @@ void Eval::evaluatePostfixStack (
|
|||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
|
context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
|
||||||
}
|
}
|
||||||
else if (token->second == Lexer::typeOperator &&
|
else if (token->second == Lexer2::Type::op &&
|
||||||
token->first == "_neg_")
|
token->first == "_neg_")
|
||||||
{
|
{
|
||||||
if (values.size () < 1)
|
if (values.size () < 1)
|
||||||
@@ -278,7 +278,7 @@ void Eval::evaluatePostfixStack (
|
|||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
|
context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
|
||||||
}
|
}
|
||||||
else if (token->second == Lexer::typeOperator &&
|
else if (token->second == Lexer2::Type::op &&
|
||||||
token->first == "_pos_")
|
token->first == "_pos_")
|
||||||
{
|
{
|
||||||
// The _pos_ operator is a NOP.
|
// The _pos_ operator is a NOP.
|
||||||
@@ -287,7 +287,7 @@ void Eval::evaluatePostfixStack (
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Binary operators.
|
// Binary operators.
|
||||||
else if (token->second == Lexer::typeOperator)
|
else if (token->second == Lexer2::Type::op)
|
||||||
{
|
{
|
||||||
if (values.size () < 2)
|
if (values.size () < 2)
|
||||||
throw std::string (STRING_EVAL_NO_EVAL);
|
throw std::string (STRING_EVAL_NO_EVAL);
|
||||||
@@ -338,24 +338,27 @@ void Eval::evaluatePostfixStack (
|
|||||||
Variant v (token->first);
|
Variant v (token->first);
|
||||||
switch (token->second)
|
switch (token->second)
|
||||||
{
|
{
|
||||||
case Lexer::typeNumber:
|
case Lexer2::Type::number:
|
||||||
case Lexer::typeHex:
|
if (Lexer2::isAllDigits (token->first))
|
||||||
v.cast (Variant::type_integer);
|
{
|
||||||
if (_debug)
|
v.cast (Variant::type_integer);
|
||||||
context.debug (format ("Eval literal number ↑'{1}'", (std::string) v));
|
if (_debug)
|
||||||
|
context.debug (format ("Eval literal number ↑'{1}'", (std::string) v));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
v.cast (Variant::type_real);
|
||||||
|
if (_debug)
|
||||||
|
context.debug (format ("Eval literal decimal ↑'{1}'", (std::string) v));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Lexer::typeDecimal:
|
|
||||||
v.cast (Variant::type_real);
|
|
||||||
if (_debug)
|
|
||||||
context.debug (format ("Eval literal decimal ↑'{1}'", (std::string) v));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case Lexer::typeOperator:
|
case Lexer2::Type::op:
|
||||||
throw std::string (STRING_EVAL_OP_EXPECTED);
|
throw std::string (STRING_EVAL_OP_EXPECTED);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Lexer::typeIdentifier:
|
case Lexer2::Type::identifier:
|
||||||
{
|
{
|
||||||
bool found = false;
|
bool found = false;
|
||||||
std::vector <bool (*)(const std::string&, Variant&)>::const_iterator source;
|
std::vector <bool (*)(const std::string&, Variant&)>::const_iterator source;
|
||||||
@@ -380,20 +383,33 @@ void Eval::evaluatePostfixStack (
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Lexer::typeDate:
|
case Lexer2::Type::date:
|
||||||
v.cast (Variant::type_date);
|
v.cast (Variant::type_date);
|
||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug (format ("Eval literal date ↑'{1}'", (std::string) v));
|
context.debug (format ("Eval literal date ↑'{1}'", (std::string) v));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Lexer::typeDuration:
|
case Lexer2::Type::duration:
|
||||||
v.cast (Variant::type_duration);
|
v.cast (Variant::type_duration);
|
||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug (format ("Eval literal duration ↑'{1}'", (std::string) v));
|
context.debug (format ("Eval literal duration ↑'{1}'", (std::string) v));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Nothing to do.
|
// Nothing to do.
|
||||||
case Lexer::typeString:
|
/*
|
||||||
|
case Lexer2::Type::uuid:
|
||||||
|
case Lexer2::Type::hex:
|
||||||
|
case Lexer2::Type::list:
|
||||||
|
case Lexer2::Type::url:
|
||||||
|
case Lexer2::Type::pair:
|
||||||
|
case Lexer2::Type::separator:
|
||||||
|
case Lexer2::Type::tag:
|
||||||
|
case Lexer2::Type::path:
|
||||||
|
case Lexer2::Type::substitution:
|
||||||
|
case Lexer2::Type::pattern:
|
||||||
|
case Lexer2::Type::word:
|
||||||
|
*/
|
||||||
|
case Lexer2::Type::string:
|
||||||
default:
|
default:
|
||||||
if (_debug)
|
if (_debug)
|
||||||
context.debug (format ("Eval literal string ↑'{1}'", (std::string) v));
|
context.debug (format ("Eval literal string ↑'{1}'", (std::string) v));
|
||||||
@@ -427,7 +443,7 @@ void Eval::evaluatePostfixStack (
|
|||||||
// Primitive --> "(" Logical ")" | Variant
|
// Primitive --> "(" Logical ")" | Variant
|
||||||
//
|
//
|
||||||
void Eval::infixParse (
|
void Eval::infixParse (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix) const
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix) const
|
||||||
{
|
{
|
||||||
int i = 0;
|
int i = 0;
|
||||||
parseLogical (infix, i);
|
parseLogical (infix, i);
|
||||||
@@ -436,17 +452,17 @@ void Eval::infixParse (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Logical --> Regex {( "and" | "or" | "xor" ) Regex}
|
// Logical --> Regex {( "and" | "or" | "xor" ) Regex}
|
||||||
bool Eval::parseLogical (
|
bool Eval::parseLogical (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseRegex (infix, i))
|
parseRegex (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "and" ||
|
(infix[i].first == "and" ||
|
||||||
infix[i].first == "or" ||
|
infix[i].first == "or" ||
|
||||||
infix[i].first == "xor") &&
|
infix[i].first == "xor"))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseRegex (infix, i))
|
if (! parseRegex (infix, i))
|
||||||
@@ -462,16 +478,16 @@ bool Eval::parseLogical (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Regex --> Equality {( "~" | "!~" ) Equality}
|
// Regex --> Equality {( "~" | "!~" ) Equality}
|
||||||
bool Eval::parseRegex (
|
bool Eval::parseRegex (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseEquality (infix, i))
|
parseEquality (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "~" ||
|
(infix[i].first == "~" ||
|
||||||
infix[i].first == "!~") &&
|
infix[i].first == "!~"))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseEquality (infix, i))
|
if (! parseEquality (infix, i))
|
||||||
@@ -487,18 +503,18 @@ bool Eval::parseRegex (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Equality --> Comparative {( "==" | "=" | "!==" | "!=" ) Comparative}
|
// Equality --> Comparative {( "==" | "=" | "!==" | "!=" ) Comparative}
|
||||||
bool Eval::parseEquality (
|
bool Eval::parseEquality (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseComparative (infix, i))
|
parseComparative (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "==" ||
|
(infix[i].first == "==" ||
|
||||||
infix[i].first == "=" ||
|
infix[i].first == "=" ||
|
||||||
infix[i].first == "!==" ||
|
infix[i].first == "!==" ||
|
||||||
infix[i].first == "!=") &&
|
infix[i].first == "!="))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseComparative (infix, i))
|
if (! parseComparative (infix, i))
|
||||||
@@ -514,18 +530,18 @@ bool Eval::parseEquality (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Comparative --> Arithmetic {( "<=" | "<" | ">=" | ">" ) Arithmetic}
|
// Comparative --> Arithmetic {( "<=" | "<" | ">=" | ">" ) Arithmetic}
|
||||||
bool Eval::parseComparative (
|
bool Eval::parseComparative (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseArithmetic (infix, i))
|
parseArithmetic (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "<=" ||
|
(infix[i].first == "<=" ||
|
||||||
infix[i].first == "<" ||
|
infix[i].first == "<" ||
|
||||||
infix[i].first == ">=" ||
|
infix[i].first == ">=" ||
|
||||||
infix[i].first == ">") &&
|
infix[i].first == ">"))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseArithmetic (infix, i))
|
if (! parseArithmetic (infix, i))
|
||||||
@@ -541,16 +557,16 @@ bool Eval::parseComparative (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Arithmetic --> Geometric {( "+" | "-" ) Geometric}
|
// Arithmetic --> Geometric {( "+" | "-" ) Geometric}
|
||||||
bool Eval::parseArithmetic (
|
bool Eval::parseArithmetic (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseGeometric (infix, i))
|
parseGeometric (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "+" ||
|
(infix[i].first == "+" ||
|
||||||
infix[i].first == "-") &&
|
infix[i].first == "-"))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseGeometric (infix, i))
|
if (! parseGeometric (infix, i))
|
||||||
@@ -566,17 +582,17 @@ bool Eval::parseArithmetic (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Geometric --> Tag {( "*" | "/" | "%" ) Tag}
|
// Geometric --> Tag {( "*" | "/" | "%" ) Tag}
|
||||||
bool Eval::parseGeometric (
|
bool Eval::parseGeometric (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseTag (infix, i))
|
parseTag (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "*" ||
|
(infix[i].first == "*" ||
|
||||||
infix[i].first == "/" ||
|
infix[i].first == "/" ||
|
||||||
infix[i].first == "%") &&
|
infix[i].first == "%"))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseTag (infix, i))
|
if (! parseTag (infix, i))
|
||||||
@@ -592,16 +608,16 @@ bool Eval::parseGeometric (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Tag --> Unary {( "_hastag_" | "_notag_" ) Unary}
|
// Tag --> Unary {( "_hastag_" | "_notag_" ) Unary}
|
||||||
bool Eval::parseTag (
|
bool Eval::parseTag (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parseUnary (infix, i))
|
parseUnary (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
|
infix[i].second == Lexer2::Type::op &&
|
||||||
(infix[i].first == "_hastag_" ||
|
(infix[i].first == "_hastag_" ||
|
||||||
infix[i].first == "_notag_") &&
|
infix[i].first == "_notag_"))
|
||||||
infix[i].second == Lexer::typeOperator)
|
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parseUnary (infix, i))
|
if (! parseUnary (infix, i))
|
||||||
@@ -617,7 +633,7 @@ bool Eval::parseTag (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Unary --> [( "-" | "+" | "!" )] Exponent
|
// Unary --> [( "-" | "+" | "!" )] Exponent
|
||||||
bool Eval::parseUnary (
|
bool Eval::parseUnary (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size ())
|
if (i < infix.size ())
|
||||||
@@ -644,15 +660,15 @@ bool Eval::parseUnary (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Exponent --> Primitive ["^" Primitive]
|
// Exponent --> Primitive ["^" Primitive]
|
||||||
bool Eval::parseExponent (
|
bool Eval::parseExponent (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size () &&
|
if (i < infix.size () &&
|
||||||
parsePrimitive (infix, i))
|
parsePrimitive (infix, i))
|
||||||
{
|
{
|
||||||
while (i < infix.size () &&
|
while (i < infix.size () &&
|
||||||
infix[i].first == "^" &&
|
infix[i].second == Lexer2::Type::op &&
|
||||||
infix[i].second == Lexer::typeOperator)
|
infix[i].first == "^")
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (! parsePrimitive (infix, i))
|
if (! parsePrimitive (infix, i))
|
||||||
@@ -668,7 +684,7 @@ bool Eval::parseExponent (
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Primitive --> "(" Logical ")" | Variant
|
// Primitive --> "(" Logical ")" | Variant
|
||||||
bool Eval::parsePrimitive (
|
bool Eval::parsePrimitive (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix,
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix,
|
||||||
int &i) const
|
int &i) const
|
||||||
{
|
{
|
||||||
if (i < infix.size ())
|
if (i < infix.size ())
|
||||||
@@ -706,7 +722,7 @@ bool Eval::parsePrimitive (
|
|||||||
++i;
|
++i;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (infix[i].second != Lexer::typeOperator)
|
else if (infix[i].second != Lexer2::Type::op)
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
return true;
|
return true;
|
||||||
@@ -750,32 +766,32 @@ bool Eval::parsePrimitive (
|
|||||||
// Exit.
|
// Exit.
|
||||||
//
|
//
|
||||||
void Eval::infixToPostfix (
|
void Eval::infixToPostfix (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& infix) const
|
std::vector <std::pair <std::string, Lexer2::Type> >& infix) const
|
||||||
{
|
{
|
||||||
// Short circuit.
|
// Short circuit.
|
||||||
if (infix.size () == 1)
|
if (infix.size () == 1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Result.
|
// Result.
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > postfix;
|
std::vector <std::pair <std::string, Lexer2::Type> > postfix;
|
||||||
|
|
||||||
// Shunting yard.
|
// Shunting yard.
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > op_stack;
|
std::vector <std::pair <std::string, Lexer2::Type> > op_stack;
|
||||||
|
|
||||||
// Operator characteristics.
|
// Operator characteristics.
|
||||||
char type;
|
char type;
|
||||||
int precedence;
|
int precedence;
|
||||||
char associativity;
|
char associativity;
|
||||||
|
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >::iterator token;
|
std::vector <std::pair <std::string, Lexer2::Type> >::iterator token;
|
||||||
for (token = infix.begin (); token != infix.end (); ++token)
|
for (token = infix.begin (); token != infix.end (); ++token)
|
||||||
{
|
{
|
||||||
if (token->second == Lexer::typeOperator &&
|
if (token->second == Lexer2::Type::op &&
|
||||||
token->first == "(")
|
token->first == "(")
|
||||||
{
|
{
|
||||||
op_stack.push_back (*token);
|
op_stack.push_back (*token);
|
||||||
}
|
}
|
||||||
else if (token->second == Lexer::typeOperator &&
|
else if (token->second == Lexer2::Type::op &&
|
||||||
token->first == ")")
|
token->first == ")")
|
||||||
{
|
{
|
||||||
while (op_stack.size () &&
|
while (op_stack.size () &&
|
||||||
@@ -790,7 +806,7 @@ void Eval::infixToPostfix (
|
|||||||
else
|
else
|
||||||
throw std::string ("Mismatched parentheses in expression");
|
throw std::string ("Mismatched parentheses in expression");
|
||||||
}
|
}
|
||||||
else if (token->second == Lexer::typeOperator &&
|
else if (token->second == Lexer2::Type::op &&
|
||||||
identifyOperator (token->first, type, precedence, associativity))
|
identifyOperator (token->first, type, precedence, associativity))
|
||||||
{
|
{
|
||||||
char type2;
|
char type2;
|
||||||
@@ -849,22 +865,20 @@ bool Eval::identifyOperator (
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
std::string Eval::dump (
|
std::string Eval::dump (
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >& tokens) const
|
std::vector <std::pair <std::string, Lexer2::Type> >& tokens) const
|
||||||
{
|
{
|
||||||
// Set up a color mapping.
|
// Set up a color mapping.
|
||||||
std::map <Lexer::Type, Color> color_map;
|
std::map <Lexer2::Type, Color> color_map;
|
||||||
color_map[Lexer::typeNone] = Color ("rgb000 on gray6");
|
color_map[Lexer2::Type::op] = Color ("gray14 on gray6");
|
||||||
color_map[Lexer::typeOperator] = Color ("gray14 on gray6");
|
color_map[Lexer2::Type::number] = Color ("rgb530 on gray6");
|
||||||
color_map[Lexer::typeNumber] = Color ("rgb530 on gray6");
|
color_map[Lexer2::Type::hex] = Color ("rgb303 on gray6");
|
||||||
color_map[Lexer::typeHex] = Color ("rgb303 on gray6");
|
color_map[Lexer2::Type::string] = Color ("rgb550 on gray6");
|
||||||
color_map[Lexer::typeDecimal] = Color ("rgb530 on gray6");
|
color_map[Lexer2::Type::identifier] = Color ("rgb035 on gray6");
|
||||||
color_map[Lexer::typeString] = Color ("rgb550 on gray6");
|
color_map[Lexer2::Type::date] = Color ("rgb150 on gray6");
|
||||||
color_map[Lexer::typeIdentifier] = Color ("rgb035 on gray6");
|
color_map[Lexer2::Type::duration] = Color ("rgb531 on gray6");
|
||||||
color_map[Lexer::typeDate] = Color ("rgb150 on gray6");
|
|
||||||
color_map[Lexer::typeDuration] = Color ("rgb531 on gray6");
|
|
||||||
|
|
||||||
std::string output;
|
std::string output;
|
||||||
std::vector <std::pair <std::string, Lexer::Type> >::const_iterator i;
|
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator i;
|
||||||
for (i = tokens.begin (); i != tokens.end (); ++i)
|
for (i = tokens.begin (); i != tokens.end (); ++i)
|
||||||
{
|
{
|
||||||
if (i != tokens.begin ())
|
if (i != tokens.begin ())
|
||||||
@@ -874,7 +888,7 @@ std::string Eval::dump (
|
|||||||
if (color_map[i->second].nontrivial ())
|
if (color_map[i->second].nontrivial ())
|
||||||
c = color_map[i->second];
|
c = color_map[i->second];
|
||||||
else
|
else
|
||||||
c = color_map[Lexer::typeNone];
|
c = Color ("rgb000 on gray6");
|
||||||
|
|
||||||
output += c.colorize (i->first);
|
output += c.colorize (i->first);
|
||||||
}
|
}
|
||||||
|
|||||||
32
src/Eval.h
32
src/Eval.h
@@ -29,7 +29,7 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <Lexer.h>
|
#include <Lexer2.h>
|
||||||
#include <Variant.h>
|
#include <Variant.h>
|
||||||
|
|
||||||
class Eval
|
class Eval
|
||||||
@@ -53,28 +53,28 @@ public:
|
|||||||
static void getBinaryOperators (std::vector <std::string>&);
|
static void getBinaryOperators (std::vector <std::string>&);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void evaluatePostfixStack (const std::vector <std::pair <std::string, Lexer::Type> >&, Variant&) const;
|
void evaluatePostfixStack (const std::vector <std::pair <std::string, Lexer2::Type> >&, Variant&) const;
|
||||||
void infixToPostfix (std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
void infixToPostfix (std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
void infixParse (std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
void infixParse (std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
bool parseLogical (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseLogical (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseRegex (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseRegex (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseEquality (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseEquality (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseComparative (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseComparative (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseArithmetic (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseArithmetic (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseGeometric (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseGeometric (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseTag (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseTag (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseUnary (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseUnary (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parseExponent (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parseExponent (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool parsePrimitive (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
|
bool parsePrimitive (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
|
||||||
bool identifyOperator (const std::string&, char&, int&, char&) const;
|
bool identifyOperator (const std::string&, char&, int&, char&) const;
|
||||||
|
|
||||||
std::string dump (std::vector <std::pair <std::string, Lexer::Type> >&) const;
|
std::string dump (std::vector <std::pair <std::string, Lexer2::Type> >&) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector <bool (*)(const std::string&, Variant&)> _sources;
|
std::vector <bool (*)(const std::string&, Variant&)> _sources;
|
||||||
bool _ambiguity;
|
bool _ambiguity;
|
||||||
bool _debug;
|
bool _debug;
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > _compiled;
|
std::vector <std::pair <std::string, Lexer2::Type> > _compiled;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
898
src/Lexer.cpp
898
src/Lexer.cpp
@@ -1,898 +0,0 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Copyright 2013 - 2015, Paul Beckingham, Federico Hernandez.
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included
|
|
||||||
// in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
// SOFTWARE.
|
|
||||||
//
|
|
||||||
// http://www.opensource.org/licenses/mit-license.php
|
|
||||||
//
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <cmake.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <utf8.h>
|
|
||||||
#include <ISO8601.h>
|
|
||||||
#include <Date.h>
|
|
||||||
#include <Duration.h>
|
|
||||||
#include <Lexer.h>
|
|
||||||
#include <i18n.h>
|
|
||||||
|
|
||||||
std::string Lexer::dateFormat = "";
|
|
||||||
bool Lexer::isoEnabled = true;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
Lexer::Lexer (const std::string& input)
|
|
||||||
: _input (input)
|
|
||||||
, _i (0)
|
|
||||||
, _shift_counter (0)
|
|
||||||
, _n0 (32)
|
|
||||||
, _n1 (32)
|
|
||||||
, _n2 (32)
|
|
||||||
, _n3 (32)
|
|
||||||
, _boundary01 (false)
|
|
||||||
, _boundary12 (false)
|
|
||||||
, _boundary23 (false)
|
|
||||||
, _ambiguity (true)
|
|
||||||
{
|
|
||||||
// Read 4 chars in preparation. Even if there are < 4. Take a deep breath.
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
|
|
||||||
// Reset because the four shifts above do not represent advancement into the
|
|
||||||
// _input. All subsequents shiftѕ do though.
|
|
||||||
_shift_counter = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
Lexer::~Lexer ()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Walk the input string, looking for transitions.
|
|
||||||
bool Lexer::token (std::string& result, Type& type)
|
|
||||||
{
|
|
||||||
// Start with nothing.
|
|
||||||
result = "";
|
|
||||||
|
|
||||||
// Different types of matching quote: ', ".
|
|
||||||
int quote = 0;
|
|
||||||
|
|
||||||
type = typeNone;
|
|
||||||
while (_n0)
|
|
||||||
{
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case typeNone:
|
|
||||||
if (is_ws (_n0))
|
|
||||||
shift ();
|
|
||||||
else if (_n0 == '"' || _n0 == '\'')
|
|
||||||
{
|
|
||||||
type = typeString;
|
|
||||||
quote = _n0;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == '0' &&
|
|
||||||
_n1 == 'x' &&
|
|
||||||
is_hex_digit (_n2))
|
|
||||||
{
|
|
||||||
type = typeHex;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_dec_digit (_n0))
|
|
||||||
{
|
|
||||||
// Speculatively try a date and duration parse. Longest wins.
|
|
||||||
if (is_date (result))
|
|
||||||
{
|
|
||||||
type = typeDate;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_duration (result))
|
|
||||||
{
|
|
||||||
type = typeDuration;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
type = typeNumber;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == '.' && is_dec_digit (_n1))
|
|
||||||
{
|
|
||||||
type = typeDecimal;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1))
|
|
||||||
{
|
|
||||||
type = typeTag;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_triple_op (_n0, _n1, _n2))
|
|
||||||
{
|
|
||||||
type = typeOperator;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (is_double_op (_n0, _n1, _n2))
|
|
||||||
{
|
|
||||||
type = typeOperator;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (is_single_op (_n0))
|
|
||||||
{
|
|
||||||
type = typeOperator;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (_n0 == '\\')
|
|
||||||
{
|
|
||||||
type = typeIdentifierEscape;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_ident_start (_n0))
|
|
||||||
{
|
|
||||||
if (is_date (result))
|
|
||||||
{
|
|
||||||
type = typeDate;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_duration (result))
|
|
||||||
{
|
|
||||||
type = typeDuration;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
type = typeIdentifier;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw std::string (STRING_LEX_IMMEDIATE_UNK);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeString:
|
|
||||||
if (_n0 == quote)
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
quote = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (_n0 == '\\')
|
|
||||||
{
|
|
||||||
type = typeEscape;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeTag:
|
|
||||||
if (is_ident_start (_n0))
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeIdentifier:
|
|
||||||
if (is_ident (_n0))
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// typeIdentifier is a catch-all type. Anything word-like becomes an
|
|
||||||
// identifier. At this point in the processing, an identifier is found,
|
|
||||||
// and can be matched against a list of potential upgrades.
|
|
||||||
if (result == "_hastag_" ||
|
|
||||||
result == "_notag_" ||
|
|
||||||
result == "_neg_" ||
|
|
||||||
result == "_pos_")
|
|
||||||
type = typeOperator;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeIdentifierEscape:
|
|
||||||
if (_n0 == 'u')
|
|
||||||
{
|
|
||||||
type = typeEscapeUnicode;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type = quote ? typeString : typeIdentifier;
|
|
||||||
result += utf8_character (quote);
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeEscape:
|
|
||||||
if (_n0 == 'x')
|
|
||||||
{
|
|
||||||
type = typeEscapeHex;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == 'u')
|
|
||||||
{
|
|
||||||
type = typeEscapeUnicode;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result += '\\';
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
type = quote ? typeString : typeIdentifier;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeEscapeHex:
|
|
||||||
if (is_hex_digit (_n0) && is_hex_digit (_n1))
|
|
||||||
{
|
|
||||||
result += utf8_character (hex_to_int (_n0, _n1));
|
|
||||||
type = quote ? typeString : typeIdentifier;
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type = quote ? typeString : typeIdentifier;
|
|
||||||
shift ();
|
|
||||||
quote = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeEscapeUnicode:
|
|
||||||
if (is_hex_digit (_n0) &&
|
|
||||||
is_hex_digit (_n1) &&
|
|
||||||
is_hex_digit (_n2) &&
|
|
||||||
is_hex_digit (_n3))
|
|
||||||
{
|
|
||||||
result += utf8_character (hex_to_int (_n0, _n1, _n2, _n3));
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
type = quote ? typeString : typeIdentifier;
|
|
||||||
}
|
|
||||||
else if (_n0 == quote)
|
|
||||||
{
|
|
||||||
type = typeString;
|
|
||||||
shift ();
|
|
||||||
quote = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeNumber:
|
|
||||||
if (is_dec_digit (_n0))
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == '.')
|
|
||||||
{
|
|
||||||
type = typeDecimal;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == 'e' || _n0 == 'E')
|
|
||||||
{
|
|
||||||
type = typeExponentIndicator;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_ident_start (_n0))
|
|
||||||
{
|
|
||||||
type = typeIdentifier;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeDecimal:
|
|
||||||
if (is_dec_digit (_n0))
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == 'e' || _n0 == 'E')
|
|
||||||
{
|
|
||||||
type = typeExponentIndicator;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_ident_start (_n0))
|
|
||||||
{
|
|
||||||
type = typeIdentifier;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeExponentIndicator:
|
|
||||||
if (_n0 == '+' || _n0 == '-')
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_dec_digit (_n0))
|
|
||||||
{
|
|
||||||
type = typeExponent;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (is_ident_start (_n0))
|
|
||||||
{
|
|
||||||
type = typeIdentifier;
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeExponent:
|
|
||||||
if (is_dec_digit (_n0) || _n0 == '.')
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type = typeDecimal;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeHex:
|
|
||||||
if (is_hex_digit (_n0))
|
|
||||||
{
|
|
||||||
result += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
throw std::string (STRING_LEX_TYPE_UNK);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fence post.
|
|
||||||
if (!_n0 && result != "")
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Just like Lexer::token, but no operators, numbers, dates or durations.
|
|
||||||
bool Lexer::word (std::string& token, Type& type)
|
|
||||||
{
|
|
||||||
// Start with nothing.
|
|
||||||
token = "";
|
|
||||||
|
|
||||||
// Different types of matching quote: ', ".
|
|
||||||
int quote = 0;
|
|
||||||
|
|
||||||
type = typeNone;
|
|
||||||
while (_n0)
|
|
||||||
{
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case typeNone:
|
|
||||||
if (is_ws (_n0))
|
|
||||||
shift ();
|
|
||||||
else if (_n0 == '"' || _n0 == '\'')
|
|
||||||
{
|
|
||||||
type = typeString;
|
|
||||||
quote = _n0;
|
|
||||||
token += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type = typeString;
|
|
||||||
token += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeString:
|
|
||||||
if (_n0 == quote)
|
|
||||||
{
|
|
||||||
token += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
quote = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (_n0 == '\\')
|
|
||||||
{
|
|
||||||
type = typeEscape;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (! quote && is_ws (_n0))
|
|
||||||
{
|
|
||||||
shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
token += utf8_character (_n0);
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeEscape:
|
|
||||||
if (_n0 == 'x')
|
|
||||||
{
|
|
||||||
type = typeEscapeHex;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else if (_n0 == 'u')
|
|
||||||
{
|
|
||||||
type = typeEscapeUnicode;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
token += '\\';
|
|
||||||
token += utf8_character (_n0);
|
|
||||||
type = typeString;
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeEscapeHex:
|
|
||||||
if (is_hex_digit (_n0) && is_hex_digit (_n1))
|
|
||||||
{
|
|
||||||
token += utf8_character (hex_to_int (_n0, _n1));
|
|
||||||
type = typeString;
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type = typeString;
|
|
||||||
shift ();
|
|
||||||
quote = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case typeEscapeUnicode:
|
|
||||||
if (is_hex_digit (_n0) &&
|
|
||||||
is_hex_digit (_n1) &&
|
|
||||||
is_hex_digit (_n2) &&
|
|
||||||
is_hex_digit (_n3))
|
|
||||||
{
|
|
||||||
token += utf8_character (hex_to_int (_n0, _n1, _n2, _n3));
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
shift ();
|
|
||||||
type = typeString;
|
|
||||||
}
|
|
||||||
else if (_n0 == quote)
|
|
||||||
{
|
|
||||||
type = typeString;
|
|
||||||
shift ();
|
|
||||||
quote = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
throw std::string (STRING_LEX_TYPE_UNK);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fence post.
|
|
||||||
if (!_n0 && token != "")
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void Lexer::ambiguity (bool value)
|
|
||||||
{
|
|
||||||
_ambiguity = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// No L10N - these are for internal purposes.
|
|
||||||
const std::string Lexer::type_name (const Type& type)
|
|
||||||
{
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case Lexer::typeNone: return "None";
|
|
||||||
case Lexer::typeString: return "String";
|
|
||||||
case Lexer::typeIdentifier: return "Identifier";
|
|
||||||
case Lexer::typeIdentifierEscape: return "IdentifierEscape";
|
|
||||||
case Lexer::typeNumber: return "Number";
|
|
||||||
case Lexer::typeDecimal: return "Decimal";
|
|
||||||
case Lexer::typeExponentIndicator: return "ExponentIndicator";
|
|
||||||
case Lexer::typeExponent: return "Exponent";
|
|
||||||
case Lexer::typeHex: return "Hex";
|
|
||||||
case Lexer::typeOperator: return "Operator";
|
|
||||||
case Lexer::typeEscape: return "Escape";
|
|
||||||
case Lexer::typeEscapeHex: return "EscapeHex";
|
|
||||||
case Lexer::typeEscapeUnicode: return "EscapeUnicode";
|
|
||||||
case Lexer::typeDate: return "Date";
|
|
||||||
case Lexer::typeDuration: return "Duration";
|
|
||||||
case Lexer::typeTag: return "Tag";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Complete Unicode whitespace list.
|
|
||||||
//
|
|
||||||
// http://en.wikipedia.org/wiki/Whitespace_character
|
|
||||||
// Updated 2013-11-18
|
|
||||||
bool Lexer::is_ws (int c)
|
|
||||||
{
|
|
||||||
return (c == 0x0020 || // space Common Separator, space
|
|
||||||
c == 0x0009 || // Common Other, control HT, Horizontal Tab
|
|
||||||
c == 0x000A || // Common Other, control LF, Line feed
|
|
||||||
c == 0x000B || // Common Other, control VT, Vertical Tab
|
|
||||||
c == 0x000C || // Common Other, control FF, Form feed
|
|
||||||
c == 0x000D || // Common Other, control CR, Carriage return
|
|
||||||
c == 0x0085 || // Common Other, control NEL, Next line
|
|
||||||
c == 0x00A0 || // no-break space Common Separator, space
|
|
||||||
c == 0x1680 || // ogham space mark Ogham Separator, space
|
|
||||||
c == 0x180E || // mongolian vowel separator Mongolian Separator, space
|
|
||||||
c == 0x2000 || // en quad Common Separator, space
|
|
||||||
c == 0x2001 || // em quad Common Separator, space
|
|
||||||
c == 0x2002 || // en space Common Separator, space
|
|
||||||
c == 0x2003 || // em space Common Separator, space
|
|
||||||
c == 0x2004 || // three-per-em space Common Separator, space
|
|
||||||
c == 0x2005 || // four-per-em space Common Separator, space
|
|
||||||
c == 0x2006 || // six-per-em space Common Separator, space
|
|
||||||
c == 0x2007 || // figure space Common Separator, space
|
|
||||||
c == 0x2008 || // punctuation space Common Separator, space
|
|
||||||
c == 0x2009 || // thin space Common Separator, space
|
|
||||||
c == 0x200A || // hair space Common Separator, space
|
|
||||||
c == 0x2028 || // line separator Common Separator, line
|
|
||||||
c == 0x2029 || // paragraph separator Common Separator, paragraph
|
|
||||||
c == 0x202F || // narrow no-break space Common Separator, space
|
|
||||||
c == 0x205F || // medium mathematical space Common Separator, space
|
|
||||||
c == 0x3000); // ideographic space Common Separator, space
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_ident_start (int c)
|
|
||||||
{
|
|
||||||
return c && // Include null character check.
|
|
||||||
! is_ws (c) &&
|
|
||||||
! is_dec_digit (c) &&
|
|
||||||
! is_single_op (c);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_ident (int c)
|
|
||||||
{
|
|
||||||
return c && // Include null character check.
|
|
||||||
! is_ws (c) &&
|
|
||||||
! is_single_op (c);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_single_op (int c)
|
|
||||||
{
|
|
||||||
return c == '+' ||
|
|
||||||
c == '-' ||
|
|
||||||
c == '*' ||
|
|
||||||
c == '/' ||
|
|
||||||
c == '(' ||
|
|
||||||
c == ')' ||
|
|
||||||
c == '<' ||
|
|
||||||
c == '>' ||
|
|
||||||
c == '^' ||
|
|
||||||
c == '!' ||
|
|
||||||
c == '%' ||
|
|
||||||
c == '=' ||
|
|
||||||
c == '~';
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_dec_digit (int c)
|
|
||||||
{
|
|
||||||
return c >= '0' && c <= '9';
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::boundary (int left, int right)
|
|
||||||
{
|
|
||||||
// XOR
|
|
||||||
if (isalpha (left) != isalpha (right)) return true;
|
|
||||||
if (isdigit (left) != isdigit (right)) return true;
|
|
||||||
if (isspace (left) != isspace (right)) return true;
|
|
||||||
|
|
||||||
// OR
|
|
||||||
if (ispunct (left) || ispunct (right)) return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Split 'input' into 'words' on Lexer::is_ws boundaries, observing quotes.
|
|
||||||
void Lexer::word_split (std::vector <std::string>& words, const std::string& input)
|
|
||||||
{
|
|
||||||
words.clear ();
|
|
||||||
|
|
||||||
std::string word;
|
|
||||||
Lexer::Type type;
|
|
||||||
Lexer lex (input);
|
|
||||||
while (lex.word (word, type))
|
|
||||||
words.push_back (word);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Split 'input' into 'tokens'.
|
|
||||||
void Lexer::token_split (std::vector <std::string>& words, const std::string& input)
|
|
||||||
{
|
|
||||||
words.clear ();
|
|
||||||
|
|
||||||
std::string word;
|
|
||||||
Lexer::Type type;
|
|
||||||
Lexer lex (input);
|
|
||||||
while (lex.token (word, type))
|
|
||||||
words.push_back (word);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Split 'input' into 'tokens', preserving type.
|
|
||||||
void Lexer::token_split (std::vector <std::pair <std::string, Lexer::Type> >& lexemes, const std::string& input)
|
|
||||||
{
|
|
||||||
lexemes.clear ();
|
|
||||||
|
|
||||||
std::string word;
|
|
||||||
Lexer::Type type;
|
|
||||||
Lexer lex (input);
|
|
||||||
while (lex.token (word, type))
|
|
||||||
lexemes.push_back (std::pair <std::string, Lexer::Type>(word, type));
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void Lexer::dequote (std::string& input)
|
|
||||||
{
|
|
||||||
int quote = input[0];
|
|
||||||
size_t len = input.length ();
|
|
||||||
if ((quote == '\'' || quote == '"') &&
|
|
||||||
quote == input[len - 1])
|
|
||||||
{
|
|
||||||
input = input.substr (1, len - 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_date (std::string& result)
|
|
||||||
{
|
|
||||||
// Try an ISO date parse.
|
|
||||||
if (isoEnabled)
|
|
||||||
{
|
|
||||||
std::string::size_type iso_i = 0;
|
|
||||||
std::string iso_result;
|
|
||||||
ISO8601d iso;
|
|
||||||
iso.ambiguity (_ambiguity);
|
|
||||||
if (iso.parse (_input.substr (_shift_counter), iso_i))
|
|
||||||
{
|
|
||||||
result = _input.substr (_shift_counter, iso_i);
|
|
||||||
while (iso_i--) shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try a legacy rc.dateformat parse here.
|
|
||||||
if (Lexer::dateFormat != "")
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
std::string::size_type legacy_i = 0;
|
|
||||||
Date legacyDate (_input.substr (_shift_counter), legacy_i, Lexer::dateFormat, false, false);
|
|
||||||
result = _input.substr (_shift_counter, legacy_i);
|
|
||||||
while (legacy_i--) shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
catch (...) { /* Never mind. */ }
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_duration (std::string& result)
|
|
||||||
{
|
|
||||||
std::string::size_type iso_i = 0;
|
|
||||||
std::string iso_result;
|
|
||||||
ISO8601p iso;
|
|
||||||
if (iso.parse (_input.substr (_shift_counter), iso_i))
|
|
||||||
{
|
|
||||||
result = _input.substr (_shift_counter, iso_i);
|
|
||||||
while (iso_i--) shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string::size_type dur_i = 0;
|
|
||||||
std::string dur_result;
|
|
||||||
Duration dur;
|
|
||||||
if (dur.parse (_input.substr (_shift_counter), dur_i))
|
|
||||||
{
|
|
||||||
result = _input.substr (_shift_counter, dur_i);
|
|
||||||
while (dur_i--) shift ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_punct (int c) const
|
|
||||||
{
|
|
||||||
if (c == ',' ||
|
|
||||||
c == '.') // Tab
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_num (int c) const
|
|
||||||
{
|
|
||||||
if ((c >= '0' && c <= '9') ||
|
|
||||||
c == '.')
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_triple_op (int c0, int c1, int c2) const
|
|
||||||
{
|
|
||||||
return (c0 == 'a' && c1 == 'n' && c2 == 'd' && _boundary23) ||
|
|
||||||
(c0 == 'x' && c1 == 'o' && c2 == 'r' && _boundary23) ||
|
|
||||||
(c0 == '!' && c1 == '=' && c2 == '=');
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_double_op (int c0, int c1, int c2) const
|
|
||||||
{
|
|
||||||
return (c0 == '=' && c1 == '=') ||
|
|
||||||
(c0 == '!' && c1 == '=') ||
|
|
||||||
(c0 == '<' && c1 == '=') ||
|
|
||||||
(c0 == '>' && c1 == '=') ||
|
|
||||||
(c0 == 'o' && c1 == 'r' && _boundary12) ||
|
|
||||||
(c0 == '|' && c1 == '|') ||
|
|
||||||
(c0 == '&' && c1 == '&') ||
|
|
||||||
(c0 == '!' && c1 == '~');
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::is_hex_digit (int c) const
|
|
||||||
{
|
|
||||||
return (c >= '0' && c <= '9') ||
|
|
||||||
(c >= 'a' && c <= 'f') ||
|
|
||||||
(c >= 'A' && c <= 'F');
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Lexer::decode_escape (int c) const
|
|
||||||
{
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
case 'b': return 0x08;
|
|
||||||
case 'f': return 0x0C;
|
|
||||||
case 'n': return 0x0A;
|
|
||||||
case 'r': return 0x0D;
|
|
||||||
case 't': return 0x09;
|
|
||||||
case 'v': return 0x0B;
|
|
||||||
case '\'': return 0x27;
|
|
||||||
case '"': return 0x22;
|
|
||||||
default: return c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Lexer::hex_to_int (int c) const
|
|
||||||
{
|
|
||||||
if (c >= '0' && c <= '9') return (c - '0');
|
|
||||||
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
|
|
||||||
else return (c - 'A' + 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Lexer::hex_to_int (int c0, int c1) const
|
|
||||||
{
|
|
||||||
return (hex_to_int (c0) << 4) + hex_to_int (c1);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Lexer::hex_to_int (int c0, int c1, int c2, int c3) const
|
|
||||||
{
|
|
||||||
return (hex_to_int (c0) << 12) +
|
|
||||||
(hex_to_int (c1) << 8) +
|
|
||||||
(hex_to_int (c2) << 4) +
|
|
||||||
hex_to_int (c3);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void Lexer::shift ()
|
|
||||||
{
|
|
||||||
_n0 = _n1;
|
|
||||||
_n1 = _n2;
|
|
||||||
_n2 = _n3;
|
|
||||||
_n3 = utf8_next_char (_input, _i);
|
|
||||||
++_shift_counter;
|
|
||||||
|
|
||||||
// Detect type boundaries between characters.
|
|
||||||
_boundary01 = boundary (_n0, _n1);
|
|
||||||
_boundary12 = boundary (_n1, _n2);
|
|
||||||
_boundary23 = boundary (_n2, _n3);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
120
src/Lexer.h
120
src/Lexer.h
@@ -1,120 +0,0 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Copyright 2013 - 2015, Paul Beckingham, Federico Hernandez.
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included
|
|
||||||
// in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
// SOFTWARE.
|
|
||||||
//
|
|
||||||
// http://www.opensource.org/licenses/mit-license.php
|
|
||||||
//
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#ifndef INCLUDED_LEXER
|
|
||||||
#define INCLUDED_LEXER
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
class Lexer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
static std::string dateFormat;
|
|
||||||
static bool isoEnabled;
|
|
||||||
|
|
||||||
enum Type
|
|
||||||
{
|
|
||||||
typeNone = 0,
|
|
||||||
typeString,
|
|
||||||
typeIdentifier,
|
|
||||||
typeIdentifierEscape, // Intermediate
|
|
||||||
typeEscape, // Intermediate
|
|
||||||
typeEscapeHex, // Intermediate
|
|
||||||
typeEscapeUnicode, // Intermediate
|
|
||||||
typeNumber,
|
|
||||||
typeDecimal,
|
|
||||||
typeExponentIndicator, // Intermediate
|
|
||||||
typeExponent, // Intermediate
|
|
||||||
typeHex,
|
|
||||||
typeOperator,
|
|
||||||
typeDate,
|
|
||||||
typeDuration,
|
|
||||||
typeTag,
|
|
||||||
/*
|
|
||||||
Recognizing more types means that Lexer::*_split and Lexer::token approach
|
|
||||||
the ideal form, whereby the command line becomes just one string that is
|
|
||||||
lexed into tokens. Those tokens are then simply dissected by type..
|
|
||||||
|
|
||||||
typeUUID,
|
|
||||||
typePattern,
|
|
||||||
typeSubstitution,
|
|
||||||
typeNameValue,
|
|
||||||
*/
|
|
||||||
};
|
|
||||||
|
|
||||||
Lexer (const std::string&);
|
|
||||||
virtual ~Lexer ();
|
|
||||||
Lexer (const Lexer&); // Not implemented.
|
|
||||||
Lexer& operator= (const Lexer&); // Not implemented.
|
|
||||||
bool operator== (const Lexer&); // Not implemented.
|
|
||||||
bool token (std::string&, Type&);
|
|
||||||
bool word (std::string&, Type&);
|
|
||||||
void ambiguity (bool);
|
|
||||||
|
|
||||||
static const std::string type_name (const Type&);
|
|
||||||
static bool is_ws (int);
|
|
||||||
static bool is_ident_start (int);
|
|
||||||
static bool is_ident (int);
|
|
||||||
static bool is_single_op (int);
|
|
||||||
static bool is_dec_digit (int);
|
|
||||||
static bool boundary (int, int);
|
|
||||||
static void word_split (std::vector <std::string>&, const std::string&);
|
|
||||||
static void token_split (std::vector <std::string>&, const std::string&);
|
|
||||||
static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
|
|
||||||
static void dequote (std::string&);
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool is_date (std::string&);
|
|
||||||
bool is_duration (std::string&);
|
|
||||||
bool is_punct (int) const;
|
|
||||||
bool is_num (int) const;
|
|
||||||
bool is_triple_op (int, int, int) const;
|
|
||||||
bool is_double_op (int, int, int) const;
|
|
||||||
bool is_hex_digit (int) const;
|
|
||||||
int decode_escape (int) const;
|
|
||||||
int hex_to_int (int) const;
|
|
||||||
int hex_to_int (int, int) const;
|
|
||||||
int hex_to_int (int, int, int, int) const;
|
|
||||||
void shift ();
|
|
||||||
|
|
||||||
private:
|
|
||||||
const std::string _input;
|
|
||||||
std::string::size_type _i;
|
|
||||||
std::string::size_type _shift_counter;
|
|
||||||
int _n0;
|
|
||||||
int _n1;
|
|
||||||
int _n2;
|
|
||||||
int _n3;
|
|
||||||
bool _boundary01;
|
|
||||||
bool _boundary12;
|
|
||||||
bool _boundary23;
|
|
||||||
bool _ambiguity;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
@@ -37,13 +37,13 @@ static const int uuid_min_length = 8;
|
|||||||
|
|
||||||
std::string Lexer2::dateFormat = "";
|
std::string Lexer2::dateFormat = "";
|
||||||
bool Lexer2::isoEnabled = true;
|
bool Lexer2::isoEnabled = true;
|
||||||
bool Lexer2::ambiguity = true;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
Lexer2::Lexer2 (const std::string& text)
|
Lexer2::Lexer2 (const std::string& text)
|
||||||
: _text (text)
|
: _text (text)
|
||||||
, _cursor (0)
|
, _cursor (0)
|
||||||
, _eos (text.size ())
|
, _eos (text.size ())
|
||||||
|
, _ambiguity (false)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -52,6 +52,12 @@ Lexer2::~Lexer2 ()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer2::ambiguity (bool value)
|
||||||
|
{
|
||||||
|
_ambiguity = value;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// When a Lexer2 object is constructed with a string, this method walks through
|
// When a Lexer2 object is constructed with a string, this method walks through
|
||||||
// the stream of low-level tokens.
|
// the stream of low-level tokens.
|
||||||
@@ -417,7 +423,7 @@ bool Lexer2::isDate (std::string& token, Lexer2::Type& type)
|
|||||||
{
|
{
|
||||||
std::size_t iso_i = 0;
|
std::size_t iso_i = 0;
|
||||||
ISO8601d iso;
|
ISO8601d iso;
|
||||||
iso.ambiguity (Lexer2::ambiguity);
|
iso.ambiguity (_ambiguity);
|
||||||
if (iso.parse (_text.substr (_cursor), iso_i))
|
if (iso.parse (_text.substr (_cursor), iso_i))
|
||||||
{
|
{
|
||||||
type = Lexer2::Type::date;
|
type = Lexer2::Type::date;
|
||||||
@@ -504,10 +510,13 @@ bool Lexer2::isUUID (std::string& token, Lexer2::Type& type)
|
|||||||
|
|
||||||
if (i >= uuid_min_length)
|
if (i >= uuid_min_length)
|
||||||
{
|
{
|
||||||
token = _text.substr (_cursor, i + 1);
|
token = _text.substr (_cursor, i);
|
||||||
type = Lexer2::Type::uuid;
|
if (! isAllDigits (token))
|
||||||
_cursor += i;
|
{
|
||||||
return true;
|
type = Lexer2::Type::uuid;
|
||||||
|
_cursor += i;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@@ -545,7 +554,7 @@ bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type)
|
|||||||
// Lexer2::Type::number
|
// Lexer2::Type::number
|
||||||
// \d+
|
// \d+
|
||||||
// [ . \d+ ]
|
// [ . \d+ ]
|
||||||
// [ e|E [ +|- ] \d+ ]
|
// [ e|E [ +|- ] \d+ [ . \d+ ] ]
|
||||||
bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
|
bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
|
||||||
{
|
{
|
||||||
std::size_t marker = _cursor;
|
std::size_t marker = _cursor;
|
||||||
@@ -581,6 +590,17 @@ bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
|
|||||||
++marker;
|
++marker;
|
||||||
while (isDigit (_text[marker]))
|
while (isDigit (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
|
|
||||||
|
if (_text[marker] == '.')
|
||||||
|
{
|
||||||
|
++marker;
|
||||||
|
if (isDigit (_text[marker]))
|
||||||
|
{
|
||||||
|
++marker;
|
||||||
|
while (isDigit (_text[marker]))
|
||||||
|
utf8_next_char (_text, marker);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -667,7 +687,7 @@ bool Lexer2::isURL (std::string& token, Lexer2::Type& type)
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Lexer2::Type::pair
|
// Lexer2::Type::pair
|
||||||
// <identifier> : [ <string> | <word> ]
|
// <identifier> :|= [ <string> | <word> ]
|
||||||
bool Lexer2::isPair (std::string& token, Lexer2::Type& type)
|
bool Lexer2::isPair (std::string& token, Lexer2::Type& type)
|
||||||
{
|
{
|
||||||
std::size_t marker = _cursor;
|
std::size_t marker = _cursor;
|
||||||
@@ -698,11 +718,18 @@ bool Lexer2::isPair (std::string& token, Lexer2::Type& type)
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Lexer2::Type::tag
|
// Lexer2::Type::tag
|
||||||
// [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
|
// ^ | <isWhiteSpace> [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
|
||||||
bool Lexer2::isTag (std::string& token, Lexer2::Type& type)
|
bool Lexer2::isTag (std::string& token, Lexer2::Type& type)
|
||||||
{
|
{
|
||||||
std::size_t marker = _cursor;
|
std::size_t marker = _cursor;
|
||||||
|
|
||||||
|
// This test requires a tag to have a preceding space or start a string.
|
||||||
|
// bad: 'a+b' --> identifier tag
|
||||||
|
// good: 'a+b' --> identifier op identifier
|
||||||
|
if (marker > 0 &&
|
||||||
|
! isWhitespace (_text[marker - 1]))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (_text[marker] == '+' ||
|
if (_text[marker] == '+' ||
|
||||||
_text[marker] == '-')
|
_text[marker] == '-')
|
||||||
{
|
{
|
||||||
@@ -926,7 +953,7 @@ bool Lexer2::isWord (std::string& token, Lexer2::Type& type)
|
|||||||
{
|
{
|
||||||
std::size_t marker = _cursor;
|
std::size_t marker = _cursor;
|
||||||
|
|
||||||
while (! isWhitespace (_text[marker]))
|
while (_text[marker] && ! isWhitespace (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
|
|
||||||
if (marker > _cursor)
|
if (marker > _cursor)
|
||||||
|
|||||||
@@ -40,7 +40,6 @@ public:
|
|||||||
// These are overridable.
|
// These are overridable.
|
||||||
static std::string dateFormat;
|
static std::string dateFormat;
|
||||||
static bool isoEnabled;
|
static bool isoEnabled;
|
||||||
static bool ambiguity;
|
|
||||||
|
|
||||||
enum class Type { uuid, number, hex,
|
enum class Type { uuid, number, hex,
|
||||||
string,
|
string,
|
||||||
@@ -54,6 +53,7 @@ public:
|
|||||||
|
|
||||||
Lexer2 (const std::string&);
|
Lexer2 (const std::string&);
|
||||||
~Lexer2 ();
|
~Lexer2 ();
|
||||||
|
void ambiguity (bool);
|
||||||
bool token (std::string&, Lexer2::Type&);
|
bool token (std::string&, Lexer2::Type&);
|
||||||
static std::vector <std::pair <std::string, Lexer2::Type>> tokens (const std::string&);
|
static std::vector <std::pair <std::string, Lexer2::Type>> tokens (const std::string&);
|
||||||
static std::vector <std::string> split (const std::string&);
|
static std::vector <std::string> split (const std::string&);
|
||||||
@@ -101,8 +101,9 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::string _text;
|
std::string _text;
|
||||||
std::size_t _cursor = 0;
|
std::size_t _cursor;
|
||||||
std::size_t _eos = 0;
|
std::size_t _eos;
|
||||||
|
bool _ambiguity;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -32,7 +32,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <Context.h>
|
#include <Context.h>
|
||||||
#include <Filter.h>
|
#include <Filter.h>
|
||||||
#include <Lexer.h>
|
#include <Lexer2.h>
|
||||||
#include <ViewTask.h>
|
#include <ViewTask.h>
|
||||||
#include <i18n.h>
|
#include <i18n.h>
|
||||||
#include <text.h>
|
#include <text.h>
|
||||||
@@ -83,8 +83,8 @@ int CmdCustom::execute (std::string& output)
|
|||||||
|
|
||||||
// Prepend the argument list with those from the report filter.
|
// Prepend the argument list with those from the report filter.
|
||||||
std::string lexeme;
|
std::string lexeme;
|
||||||
Lexer::Type type;
|
Lexer2::Type type;
|
||||||
Lexer lex (reportFilter);
|
Lexer2 lex (reportFilter);
|
||||||
lex.ambiguity (false);
|
lex.ambiguity (false);
|
||||||
while (lex.token (lexeme, type))
|
while (lex.token (lexeme, type))
|
||||||
context.cli.add (lexeme);
|
context.cli.add (lexeme);
|
||||||
|
|||||||
481
test/lexer.t.cpp
481
test/lexer.t.cpp
@@ -28,7 +28,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <test.h>
|
#include <test.h>
|
||||||
#include <Lexer.h>
|
#include <Lexer2.h>
|
||||||
#include <Context.h>
|
#include <Context.h>
|
||||||
|
|
||||||
Context context;
|
Context context;
|
||||||
@@ -36,360 +36,349 @@ Context context;
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
int main (int argc, char** argv)
|
int main (int argc, char** argv)
|
||||||
{
|
{
|
||||||
UnitTest t (212);
|
UnitTest t (211);
|
||||||
|
|
||||||
std::vector <std::pair <std::string, Lexer::Type> > tokens;
|
std::vector <std::pair <std::string, Lexer2::Type> > tokens;
|
||||||
std::string token;
|
std::string token;
|
||||||
Lexer::Type type;
|
Lexer2::Type type;
|
||||||
|
|
||||||
// White space detection.
|
// White space detection.
|
||||||
t.notok (Lexer::is_ws (0x0041), "U+0041 (A) is not ws");
|
t.notok (Lexer2::isWhitespace (0x0041), "U+0041 (A) ! isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x0020), "U+0020 is_ws");
|
t.ok (Lexer2::isWhitespace (0x0020), "U+0020 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x0009), "U+0009 is_ws");
|
t.ok (Lexer2::isWhitespace (0x0009), "U+0009 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x000A), "U+000A is_ws");
|
t.ok (Lexer2::isWhitespace (0x000A), "U+000A isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x000B), "U+000B is_ws");
|
t.ok (Lexer2::isWhitespace (0x000B), "U+000B isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x000C), "U+000C is_ws");
|
t.ok (Lexer2::isWhitespace (0x000C), "U+000C isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x000D), "U+000D is_ws");
|
t.ok (Lexer2::isWhitespace (0x000D), "U+000D isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x0085), "U+0085 is_ws");
|
t.ok (Lexer2::isWhitespace (0x0085), "U+0085 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x00A0), "U+00A0 is_ws");
|
t.ok (Lexer2::isWhitespace (0x00A0), "U+00A0 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x1680), "U+1680 is_ws"); // 10
|
t.ok (Lexer2::isWhitespace (0x1680), "U+1680 isWhitespace"); // 10
|
||||||
t.ok (Lexer::is_ws (0x180E), "U+180E is_ws");
|
t.ok (Lexer2::isWhitespace (0x180E), "U+180E isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2000), "U+2000 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2000), "U+2000 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2001), "U+2001 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2001), "U+2001 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2002), "U+2002 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2002), "U+2002 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2003), "U+2003 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2003), "U+2003 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2004), "U+2004 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2004), "U+2004 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2005), "U+2005 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2005), "U+2005 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2006), "U+2006 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2006), "U+2006 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2007), "U+2007 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2007), "U+2007 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2008), "U+2008 is_ws"); // 20
|
t.ok (Lexer2::isWhitespace (0x2008), "U+2008 isWhitespace"); // 20
|
||||||
t.ok (Lexer::is_ws (0x2009), "U+2009 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2009), "U+2009 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x200A), "U+200A is_ws");
|
t.ok (Lexer2::isWhitespace (0x200A), "U+200A isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2028), "U+2028 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2028), "U+2028 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x2029), "U+2029 is_ws");
|
t.ok (Lexer2::isWhitespace (0x2029), "U+2029 isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x202F), "U+202F is_ws");
|
t.ok (Lexer2::isWhitespace (0x202F), "U+202F isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x205F), "U+205F is_ws");
|
t.ok (Lexer2::isWhitespace (0x205F), "U+205F isWhitespace");
|
||||||
t.ok (Lexer::is_ws (0x3000), "U+3000 is_ws");
|
t.ok (Lexer2::isWhitespace (0x3000), "U+3000 isWhitespace");
|
||||||
|
|
||||||
// static bool Lexer::boundary(int, int);
|
// static bool Lexer2::isBoundary(int, int);
|
||||||
t.ok (Lexer::boundary (' ', 'a'), "' ' --> 'a' = boundary");
|
t.ok (Lexer2::isBoundary (' ', 'a'), "' ' --> 'a' = isBoundary");
|
||||||
t.ok (Lexer::boundary ('a', ' '), "'a' --> ' ' = boundary");
|
t.ok (Lexer2::isBoundary ('a', ' '), "'a' --> ' ' = isBoundary");
|
||||||
t.ok (Lexer::boundary (' ', '+'), "' ' --> '+' = boundary");
|
t.ok (Lexer2::isBoundary (' ', '+'), "' ' --> '+' = isBoundary");
|
||||||
t.ok (Lexer::boundary (' ', ','), "' ' --> ',' = boundary");
|
t.ok (Lexer2::isBoundary (' ', ','), "' ' --> ',' = isBoundary");
|
||||||
t.notok (Lexer::boundary ('3', '4'), "'3' --> '4' = boundary");
|
t.notok (Lexer2::isBoundary ('3', '4'), "'3' --> '4' = isBoundary");
|
||||||
t.ok (Lexer::boundary ('(', '('), "'(' --> '(' = boundary");
|
t.ok (Lexer2::isBoundary ('(', '('), "'(' --> '(' = isBoundary");
|
||||||
t.notok (Lexer::boundary ('r', 'd'), "'r' --> 'd' = boundary");
|
t.notok (Lexer2::isBoundary ('r', 'd'), "'r' --> 'd' = isBoundary");
|
||||||
|
|
||||||
// Should result in no tokens.
|
// Should result in no tokens.
|
||||||
Lexer l0 ("");
|
Lexer2 l0 ("");
|
||||||
t.notok (l0.token (token, type), "'' --> no tokens");
|
t.notok (l0.token (token, type), "'' --> no tokens");
|
||||||
|
|
||||||
// Should result in no tokens.
|
// Should result in no tokens.
|
||||||
Lexer l1 (" \t ");
|
Lexer2 l1 (" \t ");
|
||||||
t.notok (l1.token (token, type), "' \\t ' --> no tokens");
|
t.notok (l1.token (token, type), "' \\t ' --> no tokens");
|
||||||
|
|
||||||
// \u20ac = Euro symbol.
|
// \u20ac = Euro symbol.
|
||||||
Lexer l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) \\u0041 1.2e-3.4 foo.bar and '\\u20ac'");
|
Lexer2 l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'");
|
||||||
|
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l2.token (token, type))
|
while (l2.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30
|
t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30
|
||||||
t.is (Lexer::type_name (tokens[0].second), "Identifier", "tokens[0] = Identifier");
|
t.is (Lexer2::typeName (tokens[0].second), "identifier", "tokens[0] = identifier");
|
||||||
|
|
||||||
t.is (tokens[1].first, "'two \\'three\\''", "tokens[1] = 'two \\'three\\''");
|
t.is (tokens[1].first, "two 'three'", "tokens[1] = 'two 'three''");
|
||||||
t.is (Lexer::type_name (tokens[1].second), "String", "tokens[1] = String");
|
t.is (Lexer2::typeName (tokens[1].second), "string", "tokens[1] = string");
|
||||||
|
|
||||||
t.is (tokens[2].first, "+", "tokens[2] = '+'");
|
t.is (tokens[2].first, "+", "tokens[2] = '+'");
|
||||||
t.is (Lexer::type_name (tokens[2].second), "Operator", "tokens[2] = Operator");
|
t.is (Lexer2::typeName (tokens[2].second), "op", "tokens[2] = op");
|
||||||
|
|
||||||
t.is (tokens[3].first, "456", "tokens[3] = '456'");
|
t.is (tokens[3].first, "456", "tokens[3] = '456'");
|
||||||
t.is (Lexer::type_name (tokens[3].second), "Number", "tokens[3] = Number");
|
t.is (Lexer2::typeName (tokens[3].second), "number", "tokens[3] = number");
|
||||||
|
|
||||||
t.is (tokens[4].first, "-", "tokens[4] = '-'");
|
t.is (tokens[4].first, "-", "tokens[4] = '-'");
|
||||||
t.is (Lexer::type_name (tokens[4].second), "Operator", "tokens[4] = Operator");
|
t.is (Lexer2::typeName (tokens[4].second), "op", "tokens[4] = op");
|
||||||
|
|
||||||
t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40
|
t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40
|
||||||
t.is (Lexer::type_name (tokens[5].second), "Operator", "tokens[5] = Operator");
|
t.is (Lexer2::typeName (tokens[5].second), "op", "tokens[5] = op");
|
||||||
|
|
||||||
t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'");
|
t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'");
|
||||||
t.is (Lexer::type_name (tokens[6].second), "Decimal", "tokens[6] = Decimal");
|
t.is (Lexer2::typeName (tokens[6].second), "number", "tokens[6] = number");
|
||||||
|
|
||||||
t.is (tokens[7].first, "*", "tokens[7] = '*'");
|
t.is (tokens[7].first, "*", "tokens[7] = '*'");
|
||||||
t.is (Lexer::type_name (tokens[7].second), "Operator", "tokens[7] = Operator");
|
t.is (Lexer2::typeName (tokens[7].second), "op", "tokens[7] = op");
|
||||||
|
|
||||||
t.is (tokens[8].first, "2", "tokens[8] = '2'");
|
t.is (tokens[8].first, "2", "tokens[8] = '2'");
|
||||||
t.is (Lexer::type_name (tokens[8].second), "Number", "tokens[8] = Number");
|
t.is (Lexer2::typeName (tokens[8].second), "number", "tokens[8] = number");
|
||||||
|
|
||||||
t.is (tokens[9].first, "-", "tokens[9] = '-'");
|
t.is (tokens[9].first, "-", "tokens[9] = '-'");
|
||||||
t.is (Lexer::type_name (tokens[9].second), "Operator", "tokens[9] = Operator");
|
t.is (Lexer2::typeName (tokens[9].second), "op", "tokens[9] = op");
|
||||||
|
|
||||||
t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50
|
t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50
|
||||||
t.is (Lexer::type_name (tokens[10].second), "Hex", "tokens[10] = Hex");
|
t.is (Lexer2::typeName (tokens[10].second), "hex", "tokens[10] = hex");
|
||||||
|
|
||||||
t.is (tokens[11].first, ")", "tokens[11] = ')'");
|
t.is (tokens[11].first, ")", "tokens[11] = ')'");
|
||||||
t.is (Lexer::type_name (tokens[11].second), "Operator", "tokens[11] = Operator");
|
t.is (Lexer2::typeName (tokens[11].second), "op", "tokens[11] = op");
|
||||||
|
|
||||||
t.is (tokens[12].first, "A", "tokens[12] = \\u0041 --> 'A'");
|
t.is (tokens[12].first, "1.2e-3.4", "tokens[12] = '1.2e-3.4'");
|
||||||
t.is (Lexer::type_name (tokens[12].second), "Identifier", "tokens[12] = Identifier");
|
t.is (Lexer2::typeName (tokens[12].second), "number", "tokens[12] = number");
|
||||||
|
|
||||||
t.is (tokens[13].first, "1.2e-3.4", "tokens[13] = '1.2e-3.4'");
|
t.is (tokens[13].first, "foo.bar", "tokens[13] = 'foo.bar'");
|
||||||
t.is (Lexer::type_name (tokens[13].second), "Decimal", "tokens[13] = Decimal");
|
t.is (Lexer2::typeName (tokens[13].second), "identifier", "tokens[13] = identifier");
|
||||||
|
|
||||||
t.is (tokens[14].first, "foo.bar", "tokens[14] = 'foo.bar'");
|
t.is (tokens[14].first, "and", "tokens[14] = 'and'"); // 60
|
||||||
t.is (Lexer::type_name (tokens[14].second), "Identifier", "tokens[14] = Identifier");
|
t.is (Lexer2::typeName (tokens[14].second), "op", "tokens[14] = op");
|
||||||
|
|
||||||
t.is (tokens[15].first, "and", "tokens[15] = 'and'"); // 60
|
t.is (tokens[15].first, "€", "tokens[15] = \\u20ac --> '€'");
|
||||||
t.is (Lexer::type_name (tokens[15].second), "Operator", "tokens[15] = Operator");
|
t.is (Lexer2::typeName (tokens[15].second), "string", "tokens[15] = string");
|
||||||
|
|
||||||
t.is (tokens[16].first, "'€'", "tokens[16] = \\u20ac --> '€'");
|
|
||||||
t.is (Lexer::type_name (tokens[16].second), "String", "tokens[16] = String");
|
|
||||||
|
|
||||||
// Test for ISO-8601 dates (favoring dates in ambiguous cases).
|
// Test for ISO-8601 dates (favoring dates in ambiguous cases).
|
||||||
Lexer l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z");
|
Lexer2 l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z");
|
||||||
l3.ambiguity (true);
|
l3.ambiguity (true);
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l3.token (token, type))
|
while (l3.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 10, "10 tokens");
|
t.is ((int)tokens.size (), 10, "10 tokens");
|
||||||
t.is (tokens[0].first, "1", "tokens[0] == '1'");
|
t.is (tokens[0].first, "1", "tokens[0] == '1'");
|
||||||
t.is (tokens[0].second, Lexer::typeNumber, "tokens[0] == typeNumber");
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::number, "tokens[0] == Type::number");
|
||||||
t.is (tokens[1].first, "12", "tokens[1] == '12'");
|
t.is (tokens[1].first, "12", "tokens[1] == '12'");
|
||||||
t.is (tokens[1].second, Lexer::typeDate, "tokens[1] == typeDate");
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::date, "tokens[1] == Type::date");
|
||||||
t.is (tokens[2].first, "123", "tokens[2] == '123'");
|
t.is (tokens[2].first, "123", "tokens[2] == '123'");
|
||||||
t.is (tokens[2].second, Lexer::typeNumber, "tokens[2] == typeNumber"); // 70
|
t.is ((int) tokens[2].second, (int) Lexer2::Type::number, "tokens[2] == Type::number"); // 70
|
||||||
t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
|
t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
|
||||||
t.is (tokens[3].second, Lexer::typeDate, "tokens[3] == typeDate");
|
t.is ((int) tokens[3].second, (int) Lexer2::Type::date, "tokens[3] == Type::date");
|
||||||
t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
|
t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
|
||||||
t.is (tokens[4].second, Lexer::typeNumber, "tokens[4] == typeNumber");
|
t.is ((int) tokens[4].second, (int) Lexer2::Type::number, "tokens[4] == Type::number");
|
||||||
t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
|
t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
|
||||||
t.is (tokens[5].second, Lexer::typeDate, "tokens[5] == typeDate");
|
t.is ((int) tokens[5].second, (int) Lexer2::Type::date, "tokens[5] == Type::date");
|
||||||
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
|
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
|
||||||
t.is (tokens[6].second, Lexer::typeNumber, "tokens[6] == typeNumber");
|
t.is ((int) tokens[6].second, (int) Lexer2::Type::number, "tokens[6] == Type::number");
|
||||||
t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'");
|
t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'");
|
||||||
t.is (tokens[7].second, Lexer::typeNumber, "tokens[7] == typeNumber"); // 80
|
t.is ((int) tokens[7].second, (int) Lexer2::Type::number, "tokens[7] == Type::number"); // 80
|
||||||
t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'");
|
t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'");
|
||||||
t.is (tokens[8].second, Lexer::typeDate, "tokens[8] == typeDate");
|
t.is ((int) tokens[8].second, (int) Lexer2::Type::date, "tokens[8] == Type::date");
|
||||||
t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'");
|
t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'");
|
||||||
t.is (tokens[9].second, Lexer::typeDate, "tokens[9] == typeDate");
|
t.is ((int) tokens[9].second, (int) Lexer2::Type::date, "tokens[9] == Type::date");
|
||||||
|
|
||||||
// Test for ISO-8601 dates (favoring numbers in ambiguous cases).
|
// Test for ISO-8601 dates (favoring numbers in ambiguous cases).
|
||||||
Lexer l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z");
|
Lexer2 l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z");
|
||||||
l4.ambiguity (false);
|
l4.ambiguity (false);
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l4.token (token, type))
|
while (l4.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 10, "10 tokens");
|
t.is ((int)tokens.size (), 10, "10 tokens");
|
||||||
t.is (tokens[0].first, "1", "tokens[0] == '1'");
|
t.is (tokens[0].first, "1", "tokens[0] == '1'");
|
||||||
t.is (tokens[0].second, Lexer::typeNumber, "tokens[0] == typeNumber");
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::number, "tokens[0] == Type::number");
|
||||||
t.is (tokens[1].first, "12", "tokens[1] == '12'");
|
t.is (tokens[1].first, "12", "tokens[1] == '12'");
|
||||||
t.is (tokens[1].second, Lexer::typeNumber, "tokens[1] == typeNumber");
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::number, "tokens[1] == Type::number");
|
||||||
t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90
|
t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90
|
||||||
t.is (tokens[2].second, Lexer::typeNumber, "tokens[2] == typeNumber");
|
t.is ((int) tokens[2].second, (int) Lexer2::Type::number, "tokens[2] == Type::number");
|
||||||
t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
|
t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
|
||||||
t.is (tokens[3].second, Lexer::typeNumber, "tokens[3] == typeNumber");
|
t.is ((int) tokens[3].second, (int) Lexer2::Type::number, "tokens[3] == Type::number");
|
||||||
t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
|
t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
|
||||||
t.is (tokens[4].second, Lexer::typeNumber, "tokens[4] == typeNumber");
|
t.is ((int) tokens[4].second, (int) Lexer2::Type::number, "tokens[4] == Type::number");
|
||||||
t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
|
t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
|
||||||
t.is (tokens[5].second, Lexer::typeNumber, "tokens[5] == typeNumber");
|
t.is ((int) tokens[5].second, (int) Lexer2::Type::number, "tokens[5] == Type::number");
|
||||||
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
|
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
|
||||||
t.is (tokens[6].second, Lexer::typeNumber, "tokens[6] == typeNumber");
|
t.is ((int) tokens[6].second, (int) Lexer2::Type::number, "tokens[6] == Type::number");
|
||||||
t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100
|
t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100
|
||||||
t.is (tokens[7].second, Lexer::typeNumber, "tokens[7] == typeNumber");
|
t.is ((int) tokens[7].second, (int) Lexer2::Type::number, "tokens[7] == Type::number");
|
||||||
t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'");
|
t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'");
|
||||||
t.is (tokens[8].second, Lexer::typeDate, "tokens[8] == typeDate");
|
t.is ((int) tokens[8].second, (int) Lexer2::Type::date, "tokens[8] == Type::date");
|
||||||
t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'");
|
t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'");
|
||||||
t.is (tokens[9].second, Lexer::typeDate, "tokens[9] == typeDate");
|
t.is ((int) tokens[9].second, (int) Lexer2::Type::date, "tokens[9] == Type::date");
|
||||||
|
|
||||||
// Test for durations
|
// Test for durations
|
||||||
Lexer l5 ("1second 1minute 2hour 3 days 4w 5mo 6 years");
|
Lexer2 l5 ("1second 1minute 2hour 3 days 4w 5mo 6 years");
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l5.token (token, type))
|
while (l5.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 7, "7 tokens");
|
t.is ((int)tokens.size (), 7, "7 tokens");
|
||||||
t.is (tokens[0].first, "1second", "tokens[0] == '1second'");
|
t.is (tokens[0].first, "1second", "tokens[0] == '1second'");
|
||||||
t.is (tokens[0].second, Lexer::typeDuration, "tokens[0] == typeDuration");
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::duration, "tokens[0] == Type::duration");
|
||||||
t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'");
|
t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'");
|
||||||
t.is (tokens[1].second, Lexer::typeDuration, "tokens[1] == typeDuration"); // 110
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::duration, "tokens[1] == Type::duration"); // 110
|
||||||
t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'");
|
t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'");
|
||||||
t.is (tokens[2].second, Lexer::typeDuration, "tokens[2] == typeDuration");
|
t.is ((int) tokens[2].second, (int) Lexer2::Type::duration, "tokens[2] == Type::duration");
|
||||||
t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'");
|
t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'");
|
||||||
t.is (tokens[3].second, Lexer::typeDuration, "tokens[3] == typeDuration");
|
t.is ((int) tokens[3].second, (int) Lexer2::Type::duration, "tokens[3] == Type::duration");
|
||||||
t.is (tokens[4].first, "4w", "tokens[4] == '4w'");
|
t.is (tokens[4].first, "4w", "tokens[4] == '4w'");
|
||||||
t.is (tokens[4].second, Lexer::typeDuration, "tokens[4] == typeDuration");
|
t.is ((int) tokens[4].second, (int) Lexer2::Type::duration, "tokens[4] == Type::duration");
|
||||||
t.is (tokens[5].first, "5mo", "tokens[5] == '5mo'");
|
t.is (tokens[5].first, "5mo", "tokens[5] == '5mo'");
|
||||||
t.is (tokens[5].second, Lexer::typeDuration, "tokens[5] == typeDuration");
|
t.is ((int) tokens[5].second, (int) Lexer2::Type::duration, "tokens[5] == Type::duration");
|
||||||
t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'");
|
t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'");
|
||||||
t.is (tokens[6].second, Lexer::typeDuration, "tokens[6] == typeDuration"); // 120
|
t.is ((int) tokens[6].second, (int) Lexer2::Type::duration, "tokens[6] == Type::duration"); // 120
|
||||||
|
|
||||||
// All the Eval operators.
|
// All the Eval operators.
|
||||||
Lexer l6 ("P1Y PT1H P1Y1M1DT1H1M1S 1s 1second");
|
Lexer2 l6 ("P1Y PT1H P1Y1M1DT1H1M1S 1s 1second");
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l6.token (token, type))
|
while (l6.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 5, "5 ISO periods");
|
t.is ((int)tokens.size (), 5, "5 ISO periods");
|
||||||
t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'");
|
t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'");
|
||||||
t.is (tokens[0].second, Lexer::typeDuration, "tokens[0] == typeDuration");
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::duration, "tokens[0] == Type::duration");
|
||||||
t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'");
|
t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'");
|
||||||
t.is (tokens[1].second, Lexer::typeDuration, "tokens[1] == typeDuration");
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::duration, "tokens[1] == Type::duration");
|
||||||
t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[2] == 'P1Y1M1DT1H1M1S'");
|
t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[2] == 'P1Y1M1DT1H1M1S'");
|
||||||
t.is (tokens[2].second, Lexer::typeDuration, "tokens[2] == typeDuration");
|
t.is ((int) tokens[2].second, (int) Lexer2::Type::duration, "tokens[2] == Type::duration");
|
||||||
t.is (tokens[3].first, "1s", "tokens[3] == '1s'");
|
t.is (tokens[3].first, "1s", "tokens[3] == '1s'");
|
||||||
t.is (tokens[3].second, Lexer::typeDuration, "tokens[3] == typeDuration");
|
t.is ((int) tokens[3].second, (int) Lexer2::Type::duration, "tokens[3] == Type::duration");
|
||||||
t.is (tokens[4].first, "1second", "tokens[4] == '1second'");
|
t.is (tokens[4].first, "1second", "tokens[4] == '1second'");
|
||||||
t.is (tokens[4].second, Lexer::typeDuration, "tokens[4] == typeDuration");
|
t.is ((int) tokens[4].second, (int) Lexer2::Type::duration, "tokens[4] == Type::duration");
|
||||||
|
|
||||||
// All the Eval operators.
|
// All (int) the Eval operators.
|
||||||
Lexer l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )");
|
Lexer2 l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )");
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l7.token (token, type))
|
while (l7.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 21, "21 operators");
|
t.is ((int)tokens.size (), 21, "21 operators");
|
||||||
t.is (tokens[0].first, "and", "tokens[0] == 'and'");
|
t.is (tokens[0].first, "and", "tokens[0] == 'and'");
|
||||||
t.is (tokens[0].second, Lexer::typeOperator, "tokens[0] == typeOperator"); // 130
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::op, "tokens[0] == Type::op"); // 130
|
||||||
t.is (tokens[1].first, "xor", "tokens[1] == 'xor'");
|
t.is (tokens[1].first, "xor", "tokens[1] == 'xor'");
|
||||||
t.is (tokens[1].second, Lexer::typeOperator, "tokens[1] == typeOperator");
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::op, "tokens[1] == Type::op");
|
||||||
t.is (tokens[2].first, "or", "tokens[2] == 'or'");
|
t.is (tokens[2].first, "or", "tokens[2] == 'or'");
|
||||||
t.is (tokens[2].second, Lexer::typeOperator, "tokens[2] == typeOperator");
|
t.is ((int) tokens[2].second, (int) Lexer2::Type::op, "tokens[2] == Type::op");
|
||||||
t.is (tokens[3].first, "<=", "tokens[3] == '<='");
|
t.is (tokens[3].first, "<=", "tokens[3] == '<='");
|
||||||
t.is (tokens[3].second, Lexer::typeOperator, "tokens[3] == typeOperator");
|
t.is ((int) tokens[3].second, (int) Lexer2::Type::op, "tokens[3] == Type::op");
|
||||||
t.is (tokens[4].first, ">=", "tokens[4] == '>='");
|
t.is (tokens[4].first, ">=", "tokens[4] == '>='");
|
||||||
t.is (tokens[4].second, Lexer::typeOperator, "tokens[4] == typeOperator");
|
t.is ((int) tokens[4].second, (int) Lexer2::Type::op, "tokens[4] == Type::op");
|
||||||
t.is (tokens[5].first, "!~", "tokens[5] == '!~'");
|
t.is (tokens[5].first, "!~", "tokens[5] == '!~'");
|
||||||
t.is (tokens[5].second, Lexer::typeOperator, "tokens[5] == typeOperator"); // 140
|
t.is ((int) tokens[5].second, (int) Lexer2::Type::op, "tokens[5] == Type::op"); // 140
|
||||||
t.is (tokens[6].first, "!=", "tokens[6] == '!='");
|
t.is (tokens[6].first, "!=", "tokens[6] == '!='");
|
||||||
t.is (tokens[6].second, Lexer::typeOperator, "tokens[6] == typeOperator");
|
t.is ((int) tokens[6].second, (int) Lexer2::Type::op, "tokens[6] == Type::op");
|
||||||
t.is (tokens[7].first, "==", "tokens[7] == '=='");
|
t.is (tokens[7].first, "==", "tokens[7] == '=='");
|
||||||
t.is (tokens[7].second, Lexer::typeOperator, "tokens[7] == typeOperator");
|
t.is ((int) tokens[7].second, (int) Lexer2::Type::op, "tokens[7] == Type::op");
|
||||||
t.is (tokens[8].first, "=", "tokens[8] == '='");
|
t.is (tokens[8].first, "=", "tokens[8] == '='");
|
||||||
t.is (tokens[8].second, Lexer::typeOperator, "tokens[8] == typeOperator");
|
t.is ((int) tokens[8].second, (int) Lexer2::Type::op, "tokens[8] == Type::op");
|
||||||
t.is (tokens[9].first, "^", "tokens[9] == '^'");
|
t.is (tokens[9].first, "^", "tokens[9] == '^'");
|
||||||
t.is (tokens[9].second, Lexer::typeOperator, "tokens[9] == typeOperator");
|
t.is ((int) tokens[9].second, (int) Lexer2::Type::op, "tokens[9] == Type::op");
|
||||||
t.is (tokens[10].first, ">", "tokens[10] == '>'");
|
t.is (tokens[10].first, ">", "tokens[10] == '>'");
|
||||||
t.is (tokens[10].second, Lexer::typeOperator, "tokens[10] == typeOperator"); // 150
|
t.is ((int) tokens[10].second, (int) Lexer2::Type::op, "tokens[10] == Type::op"); // 150
|
||||||
t.is (tokens[11].first, "~", "tokens[11] == '~'");
|
t.is (tokens[11].first, "~", "tokens[11] == '~'");
|
||||||
t.is (tokens[11].second, Lexer::typeOperator, "tokens[11] == typeOperator");
|
t.is ((int) tokens[11].second, (int) Lexer2::Type::op, "tokens[11] == Type::op");
|
||||||
t.is (tokens[12].first, "!", "tokens[12] == '!'");
|
t.is (tokens[12].first, "!", "tokens[12] == '!'");
|
||||||
t.is (tokens[12].second, Lexer::typeOperator, "tokens[12] == typeOperator");
|
t.is ((int) tokens[12].second, (int) Lexer2::Type::op, "tokens[12] == Type::op");
|
||||||
t.is (tokens[13].first, "*", "tokens[13] == '*'");
|
t.is (tokens[13].first, "*", "tokens[13] == '*'");
|
||||||
t.is (tokens[13].second, Lexer::typeOperator, "tokens[13] == typeOperator");
|
t.is ((int) tokens[13].second, (int) Lexer2::Type::op, "tokens[13] == Type::op");
|
||||||
t.is (tokens[14].first, "/", "tokens[14] == '/'");
|
t.is (tokens[14].first, "/", "tokens[14] == '/'");
|
||||||
t.is (tokens[14].second, Lexer::typeOperator, "tokens[14] == typeOperator");
|
t.is ((int) tokens[14].second, (int) Lexer2::Type::op, "tokens[14] == Type::op");
|
||||||
t.is (tokens[15].first, "%", "tokens[15] == '%'");
|
t.is (tokens[15].first, "%", "tokens[15] == '%'");
|
||||||
t.is (tokens[15].second, Lexer::typeOperator, "tokens[15] == typeOperator"); // 160
|
t.is ((int) tokens[15].second, (int) Lexer2::Type::op, "tokens[15] == Type::op"); // 160
|
||||||
t.is (tokens[16].first, "+", "tokens[16] == '+'");
|
t.is (tokens[16].first, "+", "tokens[16] == '+'");
|
||||||
t.is (tokens[16].second, Lexer::typeOperator, "tokens[16] == typeOperator");
|
t.is ((int) tokens[16].second, (int) Lexer2::Type::op, "tokens[16] == Type::op");
|
||||||
t.is (tokens[17].first, "-", "tokens[17] == '-'");
|
t.is (tokens[17].first, "-", "tokens[17] == '-'");
|
||||||
t.is (tokens[17].second, Lexer::typeOperator, "tokens[17] == typeOperator");
|
t.is ((int) tokens[17].second, (int) Lexer2::Type::op, "tokens[17] == Type::op");
|
||||||
t.is (tokens[18].first, "<", "tokens[18] == '<'");
|
t.is (tokens[18].first, "<", "tokens[18] == '<'");
|
||||||
t.is (tokens[18].second, Lexer::typeOperator, "tokens[18] == typeOperator");
|
t.is ((int) tokens[18].second, (int) Lexer2::Type::op, "tokens[18] == Type::op");
|
||||||
t.is (tokens[19].first, "(", "tokens[19] == '('");
|
t.is (tokens[19].first, "(", "tokens[19] == '('");
|
||||||
t.is (tokens[19].second, Lexer::typeOperator, "tokens[19] == typeOperator");
|
t.is ((int) tokens[19].second, (int) Lexer2::Type::op, "tokens[19] == Type::op");
|
||||||
t.is (tokens[20].first, ")", "tokens[20] == ')'");
|
t.is (tokens[20].first, ")", "tokens[20] == ')'");
|
||||||
t.is (tokens[20].second, Lexer::typeOperator, "tokens[20] == typeOperator"); // 170
|
t.is ((int) tokens[20].second, (int)Lexer2::Type::op, "tokens[20] == Type::op"); // 170
|
||||||
|
|
||||||
// Test ordinal dates.
|
// Test ordinal dates.
|
||||||
Lexer l8 ("9th 10th");
|
Lexer2 l8 ("9th 10th");
|
||||||
l8.ambiguity (false);
|
l8.ambiguity (false);
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l8.token (token, type))
|
while (l8.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 2, "2 tokens");
|
t.is ((int)tokens.size (), 2, "2 tokens");
|
||||||
t.is (tokens[0].first, "9th", "tokens[0] == '9th'");
|
t.is (tokens[0].first, "9th", "tokens[0] == '9th'");
|
||||||
t.is (tokens[0].second, Lexer::typeIdentifier, "tokens[0] == typeIdentifier");
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::identifier, "tokens[0] == Type::identifier");
|
||||||
t.is (tokens[1].first, "10th", "tokens[1] == '10th'");
|
t.is (tokens[1].first, "10th", "tokens[1] == '10th'");
|
||||||
t.is (tokens[1].second, Lexer::typeIdentifier, "tokens[1] == typeIdentifier");
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::identifier, "tokens[1] == Type::identifier");
|
||||||
|
|
||||||
// Test tag recognition.
|
// Test tag recognition.
|
||||||
Lexer l9 ("+with -WITHOUT + 2");
|
Lexer2 l9 ("+with -WITHOUT + 2");
|
||||||
l9.ambiguity (false);
|
l9.ambiguity (false);
|
||||||
tokens.clear ();
|
tokens.clear ();
|
||||||
while (l9.token (token, type))
|
while (l9.token (token, type))
|
||||||
{
|
{
|
||||||
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
|
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n";
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 4, "4 tokens");
|
t.is ((int)tokens.size (), 4, "4 tokens");
|
||||||
t.is (tokens[0].first, "+with", "tokens[0] == '+with'");
|
t.is (tokens[0].first, "+with", "tokens[0] == '+with'");
|
||||||
t.is (tokens[0].second, Lexer::typeTag, "tokens[0] == typeTag");
|
t.is ((int) tokens[0].second, (int) Lexer2::Type::tag, "tokens[0] == Type::tag");
|
||||||
t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'");
|
t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'");
|
||||||
t.is (tokens[1].second, Lexer::typeTag, "tokens[1] == typeTag");
|
t.is ((int) tokens[1].second, (int) Lexer2::Type::tag, "tokens[1] == Type::tag");
|
||||||
t.is (tokens[2].first, "+", "tokens[2] == '+'");
|
t.is (tokens[2].first, "+", "tokens[2] == '+'");
|
||||||
t.is (tokens[2].second, Lexer::typeOperator, "tokens[2] == typeOperator");
|
t.is ((int) tokens[2].second, (int) Lexer2::Type::op, "tokens[2] == Type::op");
|
||||||
t.is (tokens[3].first, "2", "tokens[3] == '2'");
|
t.is (tokens[3].first, "2", "tokens[3] == '2'");
|
||||||
t.is (tokens[3].second, Lexer::typeNumber, "tokens[3] == typeNumber");
|
t.is ((int) tokens[3].second, (int) Lexer2::Type::number, "tokens[3] == Type::number");
|
||||||
|
|
||||||
// void word_split (std::vector<std::string>&, const std::string&);
|
// void split (std::vector<std::string>&, const std::string&);
|
||||||
std::string unsplit = " ( A or B ) ";
|
std::string unsplit = " ( A or B ) ";
|
||||||
std::vector <std::string> items;
|
std::vector <std::string> items;
|
||||||
Lexer::word_split (items, unsplit);
|
items = Lexer2::split (unsplit);
|
||||||
t.is (items.size (), (size_t) 5, "word_split ' ( A or B ) '");
|
t.is (items.size (), (size_t) 5, "split ' ( A or B ) '");
|
||||||
t.is (items[0], "(", "word_split ' ( A or B ) ' -> [0] '('");
|
t.is (items[0], "(", "split ' ( A or B ) ' -> [0] '('");
|
||||||
t.is (items[1], "A", "word_split ' ( A or B ) ' -> [1] 'A'");
|
t.is (items[1], "A", "split ' ( A or B ) ' -> [1] 'A'");
|
||||||
t.is (items[2], "or", "word_split ' ( A or B ) ' -> [2] 'or'");
|
t.is (items[2], "or", "split ' ( A or B ) ' -> [2] 'or'");
|
||||||
t.is (items[3], "B", "word_split ' ( A or B ) ' -> [3] 'B'");
|
t.is (items[3], "B", "split ' ( A or B ) ' -> [3] 'B'");
|
||||||
t.is (items[4], ")", "word_split ' ( A or B ) ' -> [4] ')'");
|
t.is (items[4], ")", "split ' ( A or B ) ' -> [4] ')'");
|
||||||
|
|
||||||
// Test simple mode with contrived tokens that ordinarily split.
|
// Test simple mode with contrived tokens that ordinarily split.
|
||||||
unsplit = " +-* a+b 12.3e4 'c d'";
|
unsplit = " +-* a+b 12.3e4 'c d'";
|
||||||
Lexer::word_split (items, unsplit);
|
items = Lexer2::split (unsplit);
|
||||||
t.is (items.size (), (size_t) 4, "word_split ' +-* a+b 12.3e4 'c d''");
|
t.is (items.size (), (size_t) 8, "split ' +-* a+b 12.3e4 'c d''");
|
||||||
t.is (items[0], "+-*", "word_split ' +-* a+b 12.3e4 'c d'' -> [0] '+-*'");
|
t.is (items[0], "+", "split ' +-* a+b 12.3e4 'c d'' -> [0] '+'");
|
||||||
t.is (items[1], "a+b", "word_split ' +-* a+b 12.3e4 'c d'' -> [1] 'a+b'");
|
t.is (items[1], "-", "split ' +-* a+b 12.3e4 'c d'' -> [1] '-'");
|
||||||
t.is (items[2], "12.3e4", "word_split ' +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'");
|
t.is (items[2], "*", "split ' +-* a+b 12.3e4 'c d'' -> [2] '*'");
|
||||||
t.is (items[3], "'c d'", "word_split ' +-* a+b 12.3e4 'c d'' -> [3] 'c d'");
|
t.is (items[3], "a", "split ' +-* a+b 12.3e4 'c d'' -> [3] 'a'");
|
||||||
|
t.is (items[4], "+", "split ' +-* a+b 12.3e4 'c d'' -> [4] '+'");
|
||||||
|
t.is (items[5], "b", "split ' +-* a+b 12.3e4 'c d'' -> [5] 'b'");
|
||||||
|
t.is (items[6], "12.3e4", "split ' +-* a+b 12.3e4 'c d'' -> [6] '12.3e4'");
|
||||||
|
t.is (items[7], "c d", "split ' +-* a+b 12.3e4 'c d'' -> [7] 'c d'");
|
||||||
|
|
||||||
// Test common expression element.
|
// Test common expression element.
|
||||||
unsplit = "name=value";
|
unsplit = "name=value";
|
||||||
Lexer::token_split (items, unsplit);
|
items = Lexer2::split (unsplit);
|
||||||
t.is (items.size (), (size_t) 3, "token_split 'name=value'");
|
t.is (items.size (), (size_t) 1, "split 'name=value'");
|
||||||
if (items.size () == 3)
|
|
||||||
{
|
|
||||||
t.is (items[0], "name", "token_split 'name=value' -> [0] 'name'");
|
|
||||||
t.is (items[1], "=", "token_split 'name=value' -> [1] '='");
|
|
||||||
t.is (items[2], "value", "token_split 'name=value' -> [2] 'value'");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
t.fail ("token_split 'name=value' -> [0] 'name'");
|
|
||||||
t.fail ("token_split 'name=value' -> [1] '='");
|
|
||||||
t.fail ("token_split 'name=value' -> [2] 'value'");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test unterminated tokens.
|
// Test unterminated tokens.
|
||||||
unsplit = " ordinary ";
|
unsplit = " ordinary ";
|
||||||
Lexer::token_split (items, unsplit);
|
items = Lexer2::split (unsplit);
|
||||||
t.is (items.size (), (size_t) 1, "token_split 'ordinary' --> 1 token");
|
t.is (items.size (), (size_t) 1, "split 'ordinary' --> 1 token");
|
||||||
t.is (items[0], "ordinary", "token_split 'ordinary' --> 'ordinary'");
|
t.is (items[0], "ordinary", "split 'ordinary' --> 'ordinary'");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user