From 2b747e2df2b2c028dae7ca080813cce02ac19b62 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Thu, 2 Jan 2014 01:12:38 -0500 Subject: [PATCH] Unit Tests - Merged libexpr Lexer tests. - Updated .gitignore. --- test/.gitignore | 4 + test/CMakeLists.txt | 2 +- test/lexer.t.cpp | 305 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 test/lexer.t.cpp diff --git a/test/.gitignore b/test/.gitignore index 07ec0f769..7fb32d3f1 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -11,10 +11,14 @@ dom.t duration.t file.t i18n.t +iso8601d.t +iso8601p.t json.t +lexer.t list.t msg.t nibbler.t +old_duration.t path.t rx.t t.t diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4572f3b6c..4ffe966b3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,7 +9,7 @@ include_directories (${CMAKE_SOURCE_DIR} set (test_SRCS autocomplete.t color.t config.t date.t directory.t dom.t old_duration.t file.t i18n.t json.t list.t msg.t nibbler.t path.t rx.t t.t t2.t taskmod.t tdb2.t text.t tree.t uri.t utf8.t util.t - view.t width.t json_test iso8601d.t iso8601p.t duration.t) + view.t width.t json_test iso8601d.t iso8601p.t duration.t lexer.t) message ("-- Configuring run_all") if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp new file mode 100644 index 000000000..f850650fd --- /dev/null +++ b/test/lexer.t.cpp @@ -0,0 +1,305 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2013 - 2014, Göteborg Bit Factory. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + +Context context; + +//////////////////////////////////////////////////////////////////////////////// +int main (int argc, char** argv) +{ + UnitTest t (170); + + std::vector > tokens; + std::string token; + Lexer::Type type; + + // White space detection. + t.notok (Lexer::is_ws (0x0041), "U+0041 (A) is not ws"); + t.ok (Lexer::is_ws (0x0020), "U+0020 is_ws"); + t.ok (Lexer::is_ws (0x0009), "U+0009 is_ws"); + t.ok (Lexer::is_ws (0x000A), "U+000A is_ws"); + t.ok (Lexer::is_ws (0x000B), "U+000B is_ws"); + t.ok (Lexer::is_ws (0x000C), "U+000C is_ws"); + t.ok (Lexer::is_ws (0x000D), "U+000D is_ws"); + t.ok (Lexer::is_ws (0x0085), "U+0085 is_ws"); + t.ok (Lexer::is_ws (0x00A0), "U+00A0 is_ws"); + t.ok (Lexer::is_ws (0x1680), "U+1680 is_ws"); // 10 + t.ok (Lexer::is_ws (0x180E), "U+180E is_ws"); + t.ok (Lexer::is_ws (0x2000), "U+2000 is_ws"); + t.ok (Lexer::is_ws (0x2001), "U+2001 is_ws"); + t.ok (Lexer::is_ws (0x2002), "U+2002 is_ws"); + t.ok (Lexer::is_ws (0x2003), "U+2003 is_ws"); + t.ok (Lexer::is_ws (0x2004), "U+2004 is_ws"); + t.ok (Lexer::is_ws (0x2005), "U+2005 is_ws"); + t.ok (Lexer::is_ws (0x2006), "U+2006 is_ws"); + t.ok (Lexer::is_ws (0x2007), "U+2007 is_ws"); + t.ok (Lexer::is_ws (0x2008), "U+2008 is_ws"); // 20 + t.ok (Lexer::is_ws (0x2009), "U+2009 is_ws"); + t.ok (Lexer::is_ws (0x200A), "U+200A is_ws"); + t.ok (Lexer::is_ws (0x2028), "U+2028 is_ws"); + t.ok (Lexer::is_ws (0x2029), "U+2029 is_ws"); + t.ok (Lexer::is_ws (0x202F), "U+202F is_ws"); + t.ok (Lexer::is_ws (0x205F), "U+205F is_ws"); + t.ok (Lexer::is_ws (0x3000), "U+3000 is_ws"); + + // Should result in no tokens. + Lexer l0 (""); + t.notok (l0.token (token, type), "'' --> no tokens"); + + // Should result in no tokens. + Lexer l1 (" \t "); + t.notok (l1.token (token, type), "' \\t ' --> no tokens"); + + // \u20ac = Euro symbol. + Lexer l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) \\u0041 1.2e-3.4 foo.bar and '\\u20ac'"); + + tokens.clear (); + while (l2.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30 + t.is (Lexer::type_name (tokens[0].second), "Identifier", "tokens[0] = Identifier"); + + t.is (tokens[1].first, "two 'three'", "tokens[1] = 'two \\'three\\''"); + t.is (Lexer::type_name (tokens[1].second), "String", "tokens[1] = String"); + + t.is (tokens[2].first, "+", "tokens[2] = '+'"); + t.is (Lexer::type_name (tokens[2].second), "Operator", "tokens[2] = Operator"); + + t.is (tokens[3].first, "456", "tokens[3] = '456'"); + t.is (Lexer::type_name (tokens[3].second), "Number", "tokens[3] = Number"); + + t.is (tokens[4].first, "-", "tokens[4] = '-'"); + t.is (Lexer::type_name (tokens[4].second), "Operator", "tokens[4] = Operator"); + + t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40 + t.is (Lexer::type_name (tokens[5].second), "Operator", "tokens[5] = Operator"); + + t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'"); + t.is (Lexer::type_name (tokens[6].second), "Decimal", "tokens[6] = Decimal"); + + t.is (tokens[7].first, "*", "tokens[7] = '*'"); + t.is (Lexer::type_name (tokens[7].second), "Operator", "tokens[7] = Operator"); + + t.is (tokens[8].first, "2", "tokens[8] = '2'"); + t.is (Lexer::type_name (tokens[8].second), "Number", "tokens[8] = Number"); + + t.is (tokens[9].first, "-", "tokens[9] = '-'"); + t.is (Lexer::type_name (tokens[9].second), "Operator", "tokens[9] = Operator"); + + t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50 + t.is (Lexer::type_name (tokens[10].second), "Hex", "tokens[10] = Hex"); + + t.is (tokens[11].first, ")", "tokens[11] = ')'"); + t.is (Lexer::type_name (tokens[11].second), "Operator", "tokens[11] = Operator"); + + t.is (tokens[12].first, "A", "tokens[12] = \\u0041 --> 'A'"); + t.is (Lexer::type_name (tokens[12].second), "Identifier", "tokens[12] = Identifier"); + + t.is (tokens[13].first, "1.2e-3.4", "tokens[13] = '1.2e-3.4'"); + t.is (Lexer::type_name (tokens[13].second), "Decimal", "tokens[13] = Decimal"); + + t.is (tokens[14].first, "foo.bar", "tokens[14] = 'foo.bar'"); + t.is (Lexer::type_name (tokens[14].second), "Identifier", "tokens[14] = Identifier"); + + t.is (tokens[15].first, "and", "tokens[15] = 'and'"); // 60 + t.is (Lexer::type_name (tokens[15].second), "Operator", "tokens[15] = Operator"); + + t.is (tokens[16].first, "€", "tokens[16] = \\u20ac --> '€'"); + t.is (Lexer::type_name (tokens[16].second), "String", "tokens[16] = String"); + + // Test for ISO-8601 dates (favoring dates in ambiguous cases). + Lexer l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); + l3.ambiguity (true); + tokens.clear (); + while (l3.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is ((int)tokens.size (), 10, "10 tokens"); + t.is (tokens[0].first, "1", "tokens[0] == '1'"); + t.is (tokens[0].second, Lexer::typeNumber, "tokens[0] == typeNumber"); + t.is (tokens[1].first, "12", "tokens[1] == '12'"); + t.is (tokens[1].second, Lexer::typeDate, "tokens[1] == typeDate"); + t.is (tokens[2].first, "123", "tokens[2] == '123'"); + t.is (tokens[2].second, Lexer::typeNumber, "tokens[2] == typeNumber"); // 70 + t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); + t.is (tokens[3].second, Lexer::typeDate, "tokens[3] == typeDate"); + t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); + t.is (tokens[4].second, Lexer::typeNumber, "tokens[4] == typeNumber"); + t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); + t.is (tokens[5].second, Lexer::typeDate, "tokens[5] == typeDate"); + t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); + t.is (tokens[6].second, Lexer::typeNumber, "tokens[6] == typeNumber"); + t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); + t.is (tokens[7].second, Lexer::typeNumber, "tokens[7] == typeNumber"); // 80 + t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); + t.is (tokens[8].second, Lexer::typeDate, "tokens[8] == typeDate"); + t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); + t.is (tokens[9].second, Lexer::typeDate, "tokens[9] == typeDate"); + + // Test for ISO-8601 dates (favoring numbers in ambiguous cases). + Lexer l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); + l4.ambiguity (false); + tokens.clear (); + while (l4.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is ((int)tokens.size (), 10, "10 tokens"); + t.is (tokens[0].first, "1", "tokens[0] == '1'"); + t.is (tokens[0].second, Lexer::typeNumber, "tokens[0] == typeNumber"); + t.is (tokens[1].first, "12", "tokens[1] == '12'"); + t.is (tokens[1].second, Lexer::typeNumber, "tokens[1] == typeNumber"); + t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90 + t.is (tokens[2].second, Lexer::typeNumber, "tokens[2] == typeNumber"); + t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); + t.is (tokens[3].second, Lexer::typeNumber, "tokens[3] == typeNumber"); + t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); + t.is (tokens[4].second, Lexer::typeNumber, "tokens[4] == typeNumber"); + t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); + t.is (tokens[5].second, Lexer::typeNumber, "tokens[5] == typeNumber"); + t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); + t.is (tokens[6].second, Lexer::typeNumber, "tokens[6] == typeNumber"); + t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100 + t.is (tokens[7].second, Lexer::typeNumber, "tokens[7] == typeNumber"); + t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); + t.is (tokens[8].second, Lexer::typeDate, "tokens[8] == typeDate"); + t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); + t.is (tokens[9].second, Lexer::typeDate, "tokens[9] == typeDate"); + + // Test for durations + Lexer l5 ("second 1minute 2hour 3 days 4w 5mos 6 years"); + tokens.clear (); + while (l5.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is ((int)tokens.size (), 7, "7 tokens"); + t.is (tokens[0].first, "second", "tokens[0] == 'second'"); + t.is (tokens[0].second, Lexer::typeDuration, "tokens[0] == typeDuration"); + t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'"); + t.is (tokens[1].second, Lexer::typeDuration, "tokens[1] == typeDuration"); // 110 + t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'"); + t.is (tokens[2].second, Lexer::typeDuration, "tokens[2] == typeDuration"); + t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'"); + t.is (tokens[3].second, Lexer::typeDuration, "tokens[3] == typeDuration"); + t.is (tokens[4].first, "4w", "tokens[4] == '4w'"); + t.is (tokens[4].second, Lexer::typeDuration, "tokens[4] == typeDuration"); + t.is (tokens[5].first, "5mos", "tokens[5] == '5mos'"); + t.is (tokens[5].second, Lexer::typeDuration, "tokens[5] == typeDuration"); + t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'"); + t.is (tokens[6].second, Lexer::typeDuration, "tokens[6] == typeDuration"); // 120 + + // All the Eval operators. + Lexer l6 ("P1Y PT1H P1Y1M1DT1H1M1S"); + tokens.clear (); + while (l6.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is ((int)tokens.size (), 3, "3 ISO periods"); + t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'"); + t.is (tokens[0].second, Lexer::typeDuration, "tokens[0] == typeDuration"); + t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'"); + t.is (tokens[1].second, Lexer::typeDuration, "tokens[1] == typeDuration"); + t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[1] == 'P1Y1M1DT1H1M1S'"); + t.is (tokens[2].second, Lexer::typeDuration, "tokens[1] == typeDuration"); + + // All the Eval operators. + Lexer l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )"); + tokens.clear (); + while (l7.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is ((int)tokens.size (), 21, "21 operators"); + t.is (tokens[0].first, "and", "tokens[0] == 'and'"); + t.is (tokens[0].second, Lexer::typeOperator, "tokens[0] == typeOperator"); // 130 + t.is (tokens[1].first, "xor", "tokens[1] == 'xor'"); + t.is (tokens[1].second, Lexer::typeOperator, "tokens[1] == typeOperator"); + t.is (tokens[2].first, "or", "tokens[2] == 'or'"); + t.is (tokens[2].second, Lexer::typeOperator, "tokens[2] == typeOperator"); + t.is (tokens[3].first, "<=", "tokens[3] == '<='"); + t.is (tokens[3].second, Lexer::typeOperator, "tokens[3] == typeOperator"); + t.is (tokens[4].first, ">=", "tokens[4] == '>='"); + t.is (tokens[4].second, Lexer::typeOperator, "tokens[4] == typeOperator"); + t.is (tokens[5].first, "!~", "tokens[5] == '!~'"); + t.is (tokens[5].second, Lexer::typeOperator, "tokens[5] == typeOperator"); // 140 + t.is (tokens[6].first, "!=", "tokens[6] == '!='"); + t.is (tokens[6].second, Lexer::typeOperator, "tokens[6] == typeOperator"); + t.is (tokens[7].first, "==", "tokens[7] == '=='"); + t.is (tokens[7].second, Lexer::typeOperator, "tokens[7] == typeOperator"); + t.is (tokens[8].first, "=", "tokens[8] == '='"); + t.is (tokens[8].second, Lexer::typeOperator, "tokens[8] == typeOperator"); + t.is (tokens[9].first, "^", "tokens[9] == '^'"); + t.is (tokens[9].second, Lexer::typeOperator, "tokens[9] == typeOperator"); + t.is (tokens[10].first, ">", "tokens[10] == '>'"); + t.is (tokens[10].second, Lexer::typeOperator, "tokens[10] == typeOperator"); // 150 + t.is (tokens[11].first, "~", "tokens[11] == '~'"); + t.is (tokens[11].second, Lexer::typeOperator, "tokens[11] == typeOperator"); + t.is (tokens[12].first, "!", "tokens[12] == '!'"); + t.is (tokens[12].second, Lexer::typeOperator, "tokens[12] == typeOperator"); + t.is (tokens[13].first, "*", "tokens[13] == '*'"); + t.is (tokens[13].second, Lexer::typeOperator, "tokens[13] == typeOperator"); + t.is (tokens[14].first, "/", "tokens[14] == '/'"); + t.is (tokens[14].second, Lexer::typeOperator, "tokens[14] == typeOperator"); + t.is (tokens[15].first, "%", "tokens[15] == '%'"); + t.is (tokens[15].second, Lexer::typeOperator, "tokens[15] == typeOperator"); // 160 + t.is (tokens[16].first, "+", "tokens[16] == '+'"); + t.is (tokens[16].second, Lexer::typeOperator, "tokens[16] == typeOperator"); + t.is (tokens[17].first, "-", "tokens[17] == '-'"); + t.is (tokens[17].second, Lexer::typeOperator, "tokens[17] == typeOperator"); + t.is (tokens[18].first, "<", "tokens[18] == '<'"); + t.is (tokens[18].second, Lexer::typeOperator, "tokens[18] == typeOperator"); + t.is (tokens[19].first, "(", "tokens[19] == '('"); + t.is (tokens[19].second, Lexer::typeOperator, "tokens[19] == typeOperator"); + t.is (tokens[20].first, ")", "tokens[20] == ')'"); + t.is (tokens[20].second, Lexer::typeOperator, "tokens[20] == typeOperator"); // 170 + + return 0; +} + +////////////////////////////////////////////////////////////////////////////////