diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index bbc30bd40..063c23de5 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -36,7 +36,7 @@ Context context; //////////////////////////////////////////////////////////////////////////////// int main (int argc, char** argv) { - UnitTest t (806); + UnitTest t (912); std::vector > tokens; std::string token; @@ -390,6 +390,151 @@ int main (int argc, char** argv) } } + // Test individual token classification. + Lexer l4; + + // Pattern + t.ok (l4.token ("/foo/") == Lexer::Type::pattern, "token Lexer::Type:pattern 1"); + t.ok (l4.token ("/a\\/b/") == Lexer::Type::pattern, "token Lexer::Type:pattern 2"); + t.ok (l4.token ("/'/") == Lexer::Type::pattern, "token Lexer::Type:pattern 3"); + + // Substitution + t.ok (l4.token ("/from/to/g") == Lexer::Type::substitution, "token Lexer::Type:substitution 1"); + t.ok (l4.token ("/from/to/") == Lexer::Type::substitution, "token Lexer::Type:substitution 2"); + + // Tag + t.ok (l4.token ("+tag") == Lexer::Type::tag, "token Lexer::Type:tag 1"); + t.ok (l4.token ("-tag") == Lexer::Type::tag, "token Lexer::Type:tag 2"); + t.ok (l4.token ("+@tag") == Lexer::Type::tag, "token Lexer::Type:tag 3"); + + // Path + t.ok (l4.token ("/long/path/to/file.txt") == Lexer::Type::path, "token Lexer::Type:path 1"); + + // Word + t.ok (l4.token ("9th") == Lexer::Type::word, "token Lexer::Type:word 1"); + t.ok (l4.token ("10th") == Lexer::Type::word, "token Lexer::Type:word 2"); + + // DOM + t.ok (l4.token ("foo") == Lexer::Type::dom, "token Lexer::Type:dom 1"); + t.ok (l4.token ("Çirçös") == Lexer::Type::dom, "token Lexer::Type:dom 2"); + t.ok (l4.token ("☺") == Lexer::Type::dom, "token Lexer::Type:dom 3"); + t.ok (l4.token ("name") == Lexer::Type::dom, "token Lexer::Type:dom 4"); + t.ok (l4.token ("f1") == Lexer::Type::dom, "token Lexer::Type:dom 5"); + t.ok (l4.token ("foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 6"); + t.ok (l4.token ("1.foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 7"); + t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e7520b749.foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 8"); + t.ok (l4.token ("today") == Lexer::Type::dom, "token Lexer::Type:dom 9"); + + // URL + t.ok (l4.token ("http://tasktools.org") == Lexer::Type::url, "token Lexer::Type:url 1"); + t.ok (l4.token ("https://bug.tasktools.org") == Lexer::Type::url, "token Lexer::Type:url 2"); + + // String + t.ok (l4.token ("'one two'") == Lexer::Type::string, "token Lexer::Type:string 1"); + t.ok (l4.token ("\"three\"") == Lexer::Type::string, "token Lexer::Type:string 2"); + t.ok (l4.token ("'\\''") == Lexer::Type::string, "token Lexer::Type:string 3"); + t.ok (l4.token ("\"\\\"\"") == Lexer::Type::string, "token Lexer::Type:string 4"); + t.ok (l4.token ("\"\tfoo\t\"") == Lexer::Type::string, "token Lexer::Type:string 5"); + t.ok (l4.token ("\"\\u20A43\"") == Lexer::Type::string, "token Lexer::Type:string 6"); + t.ok (l4.token ("\"U+20AC4\"") == Lexer::Type::string, "token Lexer::Type:string 7"); + + // Number + t.ok (l4.token ("1") == Lexer::Type::number, "token Lexer::Type:number 1"); + t.ok (l4.token ("3.14") == Lexer::Type::number, "token Lexer::Type:number 2"); + t.ok (l4.token ("6.02217e23") == Lexer::Type::number, "token Lexer::Type:number 3"); + t.ok (l4.token ("1.2e-3.4") == Lexer::Type::number, "token Lexer::Type:number 4"); + + // Hex + t.ok (l4.token ("0x2f") == Lexer::Type::hex, "token Lexer::Type:hex 1"); + + // Set (1,2,4-7,9) + t.ok (l4.token ("1,2") == Lexer::Type::set, "token Lexer::Type:set 1"); + t.ok (l4.token ("1-2") == Lexer::Type::set, "token Lexer::Type:set 2"); + t.ok (l4.token ("1-2,4") == Lexer::Type::set, "token Lexer::Type:set 3"); + t.ok (l4.token ("1-2,4,6-8") == Lexer::Type::set, "token Lexer::Type:set 4"); + t.ok (l4.token ("1-2,4,6-8,10-12") == Lexer::Type::set, "token Lexer::Type:set 5"); + + // Pair + t.ok (l4.token ("name:value") == Lexer::Type::pair, "token Lexer::Type:pair 1"); + t.ok (l4.token ("name=value") == Lexer::Type::pair, "token Lexer::Type:pair 2"); + t.ok (l4.token ("name:=value") == Lexer::Type::pair, "token Lexer::Type:pair 3"); + t.ok (l4.token ("name.mod:value") == Lexer::Type::pair, "token Lexer::Type:pair 4"); + t.ok (l4.token ("name.mod=value") == Lexer::Type::pair, "token Lexer::Type:pair 5"); + t.ok (l4.token ("name:") == Lexer::Type::pair, "token Lexer::Type:pair 6"); + t.ok (l4.token ("name=") == Lexer::Type::pair, "token Lexer::Type:pair 7"); + t.ok (l4.token ("name.mod:") == Lexer::Type::pair, "token Lexer::Type:pair 8"); + t.ok (l4.token ("name.mod=") == Lexer::Type::pair, "token Lexer::Type:pair 9"); + t.ok (l4.token ("pro:'P 1'") == Lexer::Type::pair, "token Lexer::Type:pair 10"); + t.ok (l4.token ("rc:x") == Lexer::Type::pair, "token Lexer::Type:pair 11"); + t.ok (l4.token ("rc.name:value") == Lexer::Type::pair, "token Lexer::Type:pair 12"); + t.ok (l4.token ("rc.name=value") == Lexer::Type::pair, "token Lexer::Type:pair 13"); + t.ok (l4.token ("rc.name:=value") == Lexer::Type::pair, "token Lexer::Type:pair 14"); + t.ok (l4.token ("due:='eow - 2d'") == Lexer::Type::pair, "token Lexer::Type:pair 15"); + + // Operator - complete set + t.ok (l4.token ("^") == Lexer::Type::op, "token Lexer::Type:op 1"); + t.ok (l4.token ("!") == Lexer::Type::op, "token Lexer::Type:op 2"); + t.ok (l4.token ("_neg_") == Lexer::Type::op, "token Lexer::Type:op 3"); + t.ok (l4.token ("_pos_") == Lexer::Type::op, "token Lexer::Type:op 4"); + t.ok (l4.token ("_hastag_") == Lexer::Type::op, "token Lexer::Type:op 5"); + t.ok (l4.token ("_notag_") == Lexer::Type::op, "token Lexer::Type:op 6"); + t.ok (l4.token ("*") == Lexer::Type::op, "token Lexer::Type:op 7"); + t.ok (l4.token ("/") == Lexer::Type::op, "token Lexer::Type:op 8"); + t.ok (l4.token ("%") == Lexer::Type::op, "token Lexer::Type:op 9"); + t.ok (l4.token ("+") == Lexer::Type::op, "token Lexer::Type:op 10"); + t.ok (l4.token ("-") == Lexer::Type::op, "token Lexer::Type:op 11"); + t.ok (l4.token ("<=") == Lexer::Type::op, "token Lexer::Type:op 12"); + t.ok (l4.token (">=") == Lexer::Type::op, "token Lexer::Type:op 13"); + t.ok (l4.token (">") == Lexer::Type::op, "token Lexer::Type:op 14"); + t.ok (l4.token ("<") == Lexer::Type::op, "token Lexer::Type:op 15"); + t.ok (l4.token ("=") == Lexer::Type::op, "token Lexer::Type:op 16"); + t.ok (l4.token ("==") == Lexer::Type::op, "token Lexer::Type:op 17"); + t.ok (l4.token ("!=") == Lexer::Type::op, "token Lexer::Type:op 18"); + t.ok (l4.token ("!==") == Lexer::Type::op, "token Lexer::Type:op 19"); + t.ok (l4.token ("~") == Lexer::Type::op, "token Lexer::Type:op 20"); + t.ok (l4.token ("!~") == Lexer::Type::op, "token Lexer::Type:op 21"); + t.ok (l4.token ("and") == Lexer::Type::op, "token Lexer::Type:op 22"); + t.ok (l4.token ("or") == Lexer::Type::op, "token Lexer::Type:op 23"); + t.ok (l4.token ("xor") == Lexer::Type::op, "token Lexer::Type:op 24"); + t.ok (l4.token ("(") == Lexer::Type::op, "token Lexer::Type:op 25"); + t.ok (l4.token (")") == Lexer::Type::op, "token Lexer::Type:op 26"); + + // Word that starts wih 'or', which is an operator, but should be ignored. + t.ok (l4.token ("ordinary") == Lexer::Type::dom, "token Lexer::Type:dom 1"); + + // UUID + t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e7520b749") == Lexer::Type::uuid, "token Lexer::Type:uuid 1"); + t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e752") == Lexer::Type::uuid, "token Lexer::Type:uuid 2"); + t.ok (l4.token ("a360fc44-315c-4366-b70c") == Lexer::Type::uuid, "token Lexer::Type:uuid 3"); + t.ok (l4.token ("a360fc44-315c-4366") == Lexer::Type::uuid, "token Lexer::Type:uuid 4"); + t.ok (l4.token ("a360fc44-315c") == Lexer::Type::uuid, "token Lexer::Type:uuid 5"); + t.ok (l4.token ("a360fc44") == Lexer::Type::uuid, "token Lexer::Type:uuid 6"); + + // Date + t.ok (l4.token ("2015-W01") == Lexer::Type::date, "token Lexer::Type:date 1"); + t.ok (l4.token ("2015-02-17") == Lexer::Type::date, "token Lexer::Type:date 2"); + t.ok (l4.token ("2013-11-29T22:58:00Z") == Lexer::Type::date, "token Lexer::Type:date 3"); + t.ok (l4.token ("20131129T225800Z") == Lexer::Type::date, "token Lexer::Type:date 4"); + + // Duration + t.ok (l4.token ("year") == Lexer::Type::duration, "token Lexer::Type:duration 1"); + t.ok (l4.token ("4weeks") == Lexer::Type::duration, "token Lexer::Type:duration 2"); + t.ok (l4.token ("PT23H") == Lexer::Type::duration, "token Lexer::Type:duration 3"); + t.ok (l4.token ("1second") == Lexer::Type::duration, "token Lexer::Type:duration 4"); + t.ok (l4.token ("1s") == Lexer::Type::duration, "token Lexer::Type:duration 5"); + t.ok (l4.token ("1minute") == Lexer::Type::duration, "token Lexer::Type:duration 6"); + t.ok (l4.token ("2hour") == Lexer::Type::duration, "token Lexer::Type:duration 7"); + t.ok (l4.token ("3 days") == Lexer::Type::duration, "token Lexer::Type:duration 8"); + t.ok (l4.token ("4w") == Lexer::Type::duration, "token Lexer::Type:duration 9"); + t.ok (l4.token ("5mo") == Lexer::Type::duration, "token Lexer::Type:duration 10"); + t.ok (l4.token ("6 years") == Lexer::Type::duration, "token Lexer::Type:duration 11"); + t.ok (l4.token ("P1Y") == Lexer::Type::duration, "token Lexer::Type:duration 12"); + t.ok (l4.token ("PT1H") == Lexer::Type::duration, "token Lexer::Type:duration 13"); + t.ok (l4.token ("P1Y1M1DT1H1M1S") == Lexer::Type::duration, "token Lexer::Type:duration 14"); + + // Misc + t.ok (l4.token ("--") == Lexer::Type::separator, "token Lexer::Type:separator 1"); + return 0; }