Expressions

- Implemented sequence --> infix converter. - Added new Lexer code. - Added Lexer unit tests.
2011-06-06 01:46:11 -04:00
parent 86dcec8aea
commit ed8454c202
10 changed files with 1247 additions and 70 deletions
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -7,9 +7,9 @@ include_directories (${CMAKE_SOURCE_DIR}
                     ${TASK_INCLUDE_DIRS})

 set (test_SRCS arguments.t att.t autocomplete.t color.t config.t date.t
-               directory.t dom.t duration.t file.t filt.t i18n.t json.t list.t
-               nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t t.t
-               taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t
+               directory.t dom.t duration.t file.t filt.t i18n.t json.t lexer.t
+               list.t nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t
+               t.t taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t
               json_test)

 add_custom_target (test ./run_all DEPENDS ${test_SRCS}
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@@ -0,0 +1,331 @@
+////////////////////////////////////////////////////////////////////////////////
+// taskwarrior - a command line task list manager.
+//
+// Copyright 2006 - 2011, Paul Beckingham.
+// All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation; either version 2 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the
+//
+//     Free Software Foundation, Inc.,
+//     51 Franklin Street, Fifth Floor,
+//     Boston, MA
+//     02110-1301
+//     USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <Lexer.h>
+#include <Context.h>
+#include <Test.h>
+
+Context context;
+
+////////////////////////////////////////////////////////////////////////////////
+int main (int argc, char** argv)
+{
+  UnitTest t (80);
+
+  std::string input = "This is a test.";
+  std::vector <std::string> tokens;
+  {
+    Lexer l (input);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 8, "'This is a test.' -> 'This| |is| |a| |test|.'");
+  if (tokens.size () == 8)
+  {
+    t.is (tokens[0], "This", "'This is a test.' [0] -> 'This'");
+    t.is (tokens[1], " ",    "'This is a test.' [1] -> ' '");
+    t.is (tokens[2], "is",   "'This is a test.' [2] -> 'is'");
+    t.is (tokens[3], " ",    "'This is a test.' [3] -> ' '");
+    t.is (tokens[4], "a",    "'This is a test.' [4] -> 'a'");
+    t.is (tokens[5], " ",    "'This is a test.' [5] -> ' '");
+    t.is (tokens[6], "test", "'This is a test.' [6] -> 'test'");
+    t.is (tokens[7], ".",    "'This is a test.' [7] -> '.'");
+  }
+  else
+  {
+    t.skip ("'This is a test.' [0] -> 'This'");
+    t.skip ("'This is a test.' [1] -> ' '");
+    t.skip ("'This is a test.' [2] -> 'is'");
+    t.skip ("'This is a test.' [3] -> ' '");
+    t.skip ("'This is a test.' [4] -> 'a'");
+    t.skip ("'This is a test.' [5] -> ' '");
+    t.skip ("'This is a test.' [6] -> 'test'");
+    t.skip ("'This is a test.' [7] -> '.'");
+  }
+
+  input = "a12bcd345efgh6789";
+  {
+    Lexer l (input);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 6, "'a12bcd345efgh6789' -> 'a|12|bcd|345|efgh|6789'");
+  if (tokens.size () == 6)
+  {
+    t.is (tokens[0], "a",    "'a12bcd345efgh6789' [0] -> 'a'");
+    t.is (tokens[1], "12",   "'a12bcd345efgh6789' [1] -> '12'");
+    t.is (tokens[2], "bcd",  "'a12bcd345efgh6789' [2] -> 'bcd'");
+    t.is (tokens[3], "345",  "'a12bcd345efgh6789' [3] -> '345'");
+    t.is (tokens[4], "efgh", "'a12bcd345efgh6789' [4] -> 'efgh'");
+    t.is (tokens[5], "6789", "'a12bcd345efgh6789' [5] -> '6789'");
+  }
+  else
+  {
+    t.skip ("'a12bcd345efgh6789' [0] -> 'a'");
+    t.skip ("'a12bcd345efgh6789' [1] -> '12'");
+    t.skip ("'a12bcd345efgh6789' [2] -> 'bcd'");
+    t.skip ("'a12bcd345efgh6789' [3] -> '345'");
+    t.skip ("'a12bcd345efgh6789' [4] -> 'efgh'");
+    t.skip ("'a12bcd345efgh6789' [5] -> '6789'");
+  }
+
+  // Let's throw some ugly Perl at it.
+  input = "my $variable_name = 'single string';";
+  {
+    Lexer l (input);
+    l.categorizeAsAlpha ('_');
+    l.coalesceQuoted (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 9, "'my $variable_name = 'single string';' -> 'my| |$|variable_name| |=| |'|single string|'|;'");
+  if (tokens.size () == 9)
+  {
+    t.is (tokens[0], "my",              "'my $variable_name = 'single string';' [0] -> 'my'");
+    t.is (tokens[1], " ",               "'my $variable_name = 'single string';' [1] -> ' '");
+    t.is (tokens[2], "$",               "'my $variable_name = 'single string';' [2] -> '$'");
+    t.is (tokens[3], "variable_name",   "'my $variable_name = 'single string';' [3] -> 'variable_name'");
+    t.is (tokens[4], " ",               "'my $variable_name = 'single string';' [4] -> ' '");
+    t.is (tokens[5], "=",               "'my $variable_name = 'single string';' [5] -> '='");
+    t.is (tokens[6], " ",               "'my $variable_name = 'single string';' [6] -> ' '");
+    t.is (tokens[7], "'single string'", "'my $variable_name = 'single string';' [8] -> ''single string''");
+    t.is (tokens[8], ";",               "'my $variable_name = 'single string';' [10] -> ';'");
+  }
+  else
+  {
+    t.skip ("'my $variable_name = 'single string';' [0] -> 'my'");
+    t.skip ("'my $variable_name = 'single string';' [1] -> ' '");
+    t.skip ("'my $variable_name = 'single string';' [2] -> '$'");
+    t.skip ("'my $variable_name = 'single string';' [3] -> 'variable_name'");
+    t.skip ("'my $variable_name = 'single string';' [4] -> ' '");
+    t.skip ("'my $variable_name = 'single string';' [5] -> '='");
+    t.skip ("'my $variable_name = 'single string';' [6] -> ' '");
+    t.skip ("'my $variable_name = 'single string';' [8] -> ''single string''");
+    t.skip ("'my $variable_name = 'single string';' [10] -> ';'");
+  }
+
+  // Now exercise all the configurable coalescence.
+  input = "ab  12 'a'";
+  {
+    Lexer l (input);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 8, "'ab  12 'a'' -> 'ab| | |12| |'|a|''");
+  if (tokens.size () == 8)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], " ",  "'ab  12 'a'' [1] -> ' '");
+    t.is (tokens[2], " ",  "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], "12", "'ab  12 'a'' [3] -> '12'");
+    t.is (tokens[4], " ",  "'ab  12 'a'' [4] -> ' '");
+    t.is (tokens[5], "'",  "'ab  12 'a'' [5] -> '''");
+    t.is (tokens[6], "a",  "'ab  12 'a'' [6] -> 'a'");
+    t.is (tokens[7], "'",  "'ab  12 'a'' [7] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> ' '");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> '12'");
+    t.skip ("'ab  12 'a'' [4] -> ' '");
+    t.skip ("'ab  12 'a'' [5] -> '''");
+    t.skip ("'ab  12 'a'' [6] -> 'a'");
+    t.skip ("'ab  12 'a'' [7] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceAlpha (false);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 9, "'ab  12 'a'' -> 'a|b| | |12| |'|a|''");
+  if (tokens.size () == 9)
+  {
+    t.is (tokens[0], "a",  "'ab  12 'a'' [0] -> 'a'");
+    t.is (tokens[1], "b",  "'ab  12 'a'' [1] -> 'b'");
+    t.is (tokens[2], " ",  "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], " ",  "'ab  12 'a'' [3] -> ' '");
+    t.is (tokens[4], "12", "'ab  12 'a'' [4] -> '12'");
+    t.is (tokens[5], " ",  "'ab  12 'a'' [5] -> ' '");
+    t.is (tokens[6], "'",  "'ab  12 'a'' [6] -> '''");
+    t.is (tokens[7], "a",  "'ab  12 'a'' [7] -> 'a'");
+    t.is (tokens[8], "'",  "'ab  12 'a'' [8] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'a'");
+    t.skip ("'ab  12 'a'' [1] -> 'b'");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> ' '");
+    t.skip ("'ab  12 'a'' [4] -> '12'");
+    t.skip ("'ab  12 'a'' [5] -> ' '");
+    t.skip ("'ab  12 'a'' [6] -> '''");
+    t.skip ("'ab  12 'a'' [7] -> 'a'");
+    t.skip ("'ab  12 'a'' [8] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceDigits (false);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 9, "'ab  12 'a'' -> 'ab| | |1|2| |'|a|''");
+  if (tokens.size () == 9)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], " ",  "'ab  12 'a'' [1] -> ' '");
+    t.is (tokens[2], " ",  "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], "1",  "'ab  12 'a'' [3] -> '1'");
+    t.is (tokens[4], "2",  "'ab  12 'a'' [4] -> '2'");
+    t.is (tokens[5], " ",  "'ab  12 'a'' [5] -> ' '");
+    t.is (tokens[6], "'",  "'ab  12 'a'' [6] -> '''");
+    t.is (tokens[7], "a",  "'ab  12 'a'' [7] -> 'a'");
+    t.is (tokens[8], "'",  "'ab  12 'a'' [8] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> ' '");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> '1'");
+    t.skip ("'ab  12 'a'' [4] -> '2'");
+    t.skip ("'ab  12 'a'' [5] -> ' '");
+    t.skip ("'ab  12 'a'' [6] -> '''");
+    t.skip ("'ab  12 'a'' [7] -> 'a'");
+    t.skip ("'ab  12 'a'' [8] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceQuoted (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 6, "'ab  12 'a'' -> 'ab| | |12| |'a''");
+  if (tokens.size () == 6)
+  {
+    t.is (tokens[0], "ab",  "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], " ",   "'ab  12 'a'' [1] -> ' '");
+    t.is (tokens[2], " ",   "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], "12",  "'ab  12 'a'' [3] -> '12'");
+    t.is (tokens[4], " ",   "'ab  12 'a'' [4] -> ' '");
+    t.is (tokens[5], "'a'", "'ab  12 'a'' [5] -> ''a''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> ' '");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> '12'");
+    t.skip ("'ab  12 'a'' [4] -> ' '");
+    t.skip ("'ab  12 'a'' [5] -> ''a''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceWhite (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 7, "'ab  12 'a'' -> 'ab|  |12| |'|a|''");
+  if (tokens.size () == 7)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], "  ", "'ab  12 'a'' [1] -> '  '");
+    t.is (tokens[2], "12", "'ab  12 'a'' [2] -> '12'");
+    t.is (tokens[3], " ",  "'ab  12 'a'' [3] -> ' '");
+    t.is (tokens[4], "'",  "'ab  12 'a'' [4] -> '''");
+    t.is (tokens[5], "a",  "'ab  12 'a'' [5] -> 'a'");
+    t.is (tokens[6], "'",  "'ab  12 'a'' [6] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> '  '");
+    t.skip ("'ab  12 'a'' [2] -> '12'");
+    t.skip ("'ab  12 'a'' [3] -> ' '");
+    t.skip ("'ab  12 'a'' [4] -> '''");
+    t.skip ("'ab  12 'a'' [5] -> 'a'");
+    t.skip ("'ab  12 'a'' [6] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.skipWhitespace (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 5, "'ab  12 'a'' -> 'ab|12|'|a|''");
+  if (tokens.size () == 5)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], "12", "'ab  12 'a'' [1] -> '12'");
+    t.is (tokens[2], "'",  "'ab  12 'a'' [2] -> '''");
+    t.is (tokens[3], "a",  "'ab  12 'a'' [3] -> 'a'");
+    t.is (tokens[4], "'",  "'ab  12 'a'' [4] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> '12'");
+    t.skip ("'ab  12 'a'' [2] -> '''");
+    t.skip ("'ab  12 'a'' [3] -> 'a'");
+    t.skip ("'ab  12 'a'' [4] -> '''");
+  }
+
+  // Special tokens
+  input = "a := 1";
+  {
+    Lexer l (input);
+    l.skipWhitespace (true);
+    l.specialToken (":=");
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 3, "'a := 1' -> 'a|:=|1'");
+  if (tokens.size () == 3)
+  {
+    t.is (tokens[0], "a",  "'a := 1' [0] -> 'a'");
+    t.is (tokens[1], ":=", "'a := 1' [1] -> ':='");
+    t.is (tokens[2], "1",  "'a := 1' [2] -> '1'");
+  }
+  else
+  {
+    t.skip ("'a := 1' [0] -> 'a'");
+    t.skip ("'a := 1' [1] -> ':='");
+    t.skip ("'a := 1' [2] -> '1'");
+  }
+
+  return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+