Lexer: Migrated to unicodeLatinDigit
This commit is contained in:
@@ -143,21 +143,12 @@ const std::string Lexer::typeName (const Lexer::Type& type)
|
|||||||
return "unknown";
|
return "unknown";
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Digits 0-9.
|
|
||||||
//
|
|
||||||
// TODO This list should be derived from the Unicode database.
|
|
||||||
bool Lexer::isDigit (int c)
|
|
||||||
{
|
|
||||||
return c >= 0x30 && c <= 0x39;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool Lexer::isIdentifierStart (int c)
|
bool Lexer::isIdentifierStart (int c)
|
||||||
{
|
{
|
||||||
return c && // Include null character check.
|
return c && // Include null character check.
|
||||||
! unicodeWhitespace (c) &&
|
! unicodeWhitespace (c) &&
|
||||||
! isDigit (c) &&
|
! unicodeLatinDigit (c) &&
|
||||||
! isSingleCharOperator (c) &&
|
! isSingleCharOperator (c) &&
|
||||||
! isPunctuation (c);
|
! isPunctuation (c);
|
||||||
}
|
}
|
||||||
@@ -219,7 +210,7 @@ bool Lexer::isBoundary (int left, int right)
|
|||||||
|
|
||||||
// XOR
|
// XOR
|
||||||
if (unicodeLatinAlpha (left) != unicodeLatinAlpha (right)) return true;
|
if (unicodeLatinAlpha (left) != unicodeLatinAlpha (right)) return true;
|
||||||
if (isDigit (left) != isDigit (right)) return true;
|
if (unicodeLatinDigit (left) != unicodeLatinDigit (right)) return true;
|
||||||
if (unicodeWhitespace (left) != unicodeWhitespace (right)) return true;
|
if (unicodeWhitespace (left) != unicodeWhitespace (right)) return true;
|
||||||
|
|
||||||
// OR
|
// OR
|
||||||
@@ -248,13 +239,13 @@ bool Lexer::isHardBoundary (int left, int right)
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool Lexer::isPunctuation (int c)
|
bool Lexer::isPunctuation (int c)
|
||||||
{
|
{
|
||||||
return isprint (c) &&
|
return isprint (c) &&
|
||||||
c != ' ' &&
|
c != ' ' &&
|
||||||
c != '@' &&
|
c != '@' &&
|
||||||
c != '#' &&
|
c != '#' &&
|
||||||
c != '$' &&
|
c != '$' &&
|
||||||
c != '_' &&
|
c != '_' &&
|
||||||
! isDigit (c) &&
|
! unicodeLatinDigit (c) &&
|
||||||
! unicodeLatinAlpha (c);
|
! unicodeLatinAlpha (c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -368,7 +359,7 @@ std::string Lexer::commify (const std::string& data)
|
|||||||
int i;
|
int i;
|
||||||
for (int i = 0; i < (int) data.length (); ++i)
|
for (int i = 0; i < (int) data.length (); ++i)
|
||||||
{
|
{
|
||||||
if (Lexer::isDigit (data[i]))
|
if (unicodeLatinDigit (data[i]))
|
||||||
end = i;
|
end = i;
|
||||||
|
|
||||||
if (data[i] == '.')
|
if (data[i] == '.')
|
||||||
@@ -386,11 +377,11 @@ std::string Lexer::commify (const std::string& data)
|
|||||||
int consecutiveDigits = 0;
|
int consecutiveDigits = 0;
|
||||||
for (; i >= 0; --i)
|
for (; i >= 0; --i)
|
||||||
{
|
{
|
||||||
if (Lexer::isDigit (data[i]))
|
if (unicodeLatinDigit (data[i]))
|
||||||
{
|
{
|
||||||
result += data[i];
|
result += data[i];
|
||||||
|
|
||||||
if (++consecutiveDigits == 3 && i && Lexer::isDigit (data[i - 1]))
|
if (++consecutiveDigits == 3 && i && unicodeLatinDigit (data[i - 1]))
|
||||||
{
|
{
|
||||||
result += ',';
|
result += ',';
|
||||||
consecutiveDigits = 0;
|
consecutiveDigits = 0;
|
||||||
@@ -410,11 +401,11 @@ std::string Lexer::commify (const std::string& data)
|
|||||||
int consecutiveDigits = 0;
|
int consecutiveDigits = 0;
|
||||||
for (; i >= 0; --i)
|
for (; i >= 0; --i)
|
||||||
{
|
{
|
||||||
if (Lexer::isDigit (data[i]))
|
if (unicodeLatinDigit (data[i]))
|
||||||
{
|
{
|
||||||
result += data[i];
|
result += data[i];
|
||||||
|
|
||||||
if (++consecutiveDigits == 3 && i && Lexer::isDigit (data[i - 1]))
|
if (++consecutiveDigits == 3 && i && unicodeLatinDigit (data[i - 1]))
|
||||||
{
|
{
|
||||||
result += ',';
|
result += ',';
|
||||||
consecutiveDigits = 0;
|
consecutiveDigits = 0;
|
||||||
@@ -625,19 +616,19 @@ bool Lexer::isNumber (std::string& token, Lexer::Type& type)
|
|||||||
{
|
{
|
||||||
std::size_t marker = _cursor;
|
std::size_t marker = _cursor;
|
||||||
|
|
||||||
if (isDigit (_text[marker]))
|
if (unicodeLatinDigit (_text[marker]))
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
while (isDigit (_text[marker]))
|
while (unicodeLatinDigit (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
|
|
||||||
if (_text[marker] == '.')
|
if (_text[marker] == '.')
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
if (isDigit (_text[marker]))
|
if (unicodeLatinDigit (_text[marker]))
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
while (isDigit (_text[marker]))
|
while (unicodeLatinDigit (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -651,19 +642,19 @@ bool Lexer::isNumber (std::string& token, Lexer::Type& type)
|
|||||||
_text[marker] == '-')
|
_text[marker] == '-')
|
||||||
++marker;
|
++marker;
|
||||||
|
|
||||||
if (isDigit (_text[marker]))
|
if (unicodeLatinDigit (_text[marker]))
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
while (isDigit (_text[marker]))
|
while (unicodeLatinDigit (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
|
|
||||||
if (_text[marker] == '.')
|
if (_text[marker] == '.')
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
if (isDigit (_text[marker]))
|
if (unicodeLatinDigit (_text[marker]))
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
while (isDigit (_text[marker]))
|
while (unicodeLatinDigit (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -693,10 +684,10 @@ bool Lexer::isInteger (std::string& token, Lexer::Type& type)
|
|||||||
{
|
{
|
||||||
std::size_t marker = _cursor;
|
std::size_t marker = _cursor;
|
||||||
|
|
||||||
if (isDigit (_text[marker]))
|
if (unicodeLatinDigit (_text[marker]))
|
||||||
{
|
{
|
||||||
++marker;
|
++marker;
|
||||||
while (isDigit (_text[marker]))
|
while (unicodeLatinDigit (_text[marker]))
|
||||||
utf8_next_char (_text, marker);
|
utf8_next_char (_text, marker);
|
||||||
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
token = _text.substr (_cursor, marker - _cursor);
|
||||||
|
|||||||
@@ -61,7 +61,6 @@ public:
|
|||||||
|
|
||||||
// Static helpers.
|
// Static helpers.
|
||||||
static const std::string typeName (const Lexer::Type&);
|
static const std::string typeName (const Lexer::Type&);
|
||||||
static bool isDigit (int);
|
|
||||||
static bool isIdentifierStart (int);
|
static bool isIdentifierStart (int);
|
||||||
static bool isIdentifierNext (int);
|
static bool isIdentifierNext (int);
|
||||||
static bool isSingleCharOperator (int);
|
static bool isSingleCharOperator (int);
|
||||||
|
|||||||
@@ -42,6 +42,7 @@
|
|||||||
#include <Datetime.h>
|
#include <Datetime.h>
|
||||||
#include <Duration.h>
|
#include <Duration.h>
|
||||||
#include <format.h>
|
#include <format.h>
|
||||||
|
#include <unicode.h>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
#include <main.h>
|
#include <main.h>
|
||||||
|
|
||||||
@@ -238,7 +239,7 @@ Datetime getNextRecurrence (Datetime& current, std::string& period)
|
|||||||
return current + (days * 86400);
|
return current + (days * 86400);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (Lexer::isDigit (period[0]) &&
|
else if (unicodeLatinDigit (period[0]) &&
|
||||||
period[period.length () - 1] == 'm')
|
period[period.length () - 1] == 'm')
|
||||||
{
|
{
|
||||||
int increment = strtol (period.substr (0, period.length () - 1).c_str (), NULL, 10);
|
int increment = strtol (period.substr (0, period.length () - 1).c_str (), NULL, 10);
|
||||||
@@ -291,7 +292,7 @@ Datetime getNextRecurrence (Datetime& current, std::string& period)
|
|||||||
return Datetime (y, m, d, ho, mi, se);
|
return Datetime (y, m, d, ho, mi, se);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (Lexer::isDigit (period[0]) && period[period.length () - 1] == 'q')
|
else if (unicodeLatinDigit (period[0]) && period[period.length () - 1] == 'q')
|
||||||
{
|
{
|
||||||
int increment = strtol (period.substr (0, period.length () - 1).c_str (), NULL, 10);
|
int increment = strtol (period.substr (0, period.length () - 1).c_str (), NULL, 10);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user