CLI2: Integrated Lexer::decomposePair

2015-07-08 09:48:15 -04:00
parent e6c4f48a48
commit 5a21945a0d
1 changed files with 126 additions and 142 deletions
--- a/src/CLI2.cpp
+++ b/src/CLI2.cpp
@@ -1031,163 +1031,147 @@ void CLI2::desugarFilterAttributes ()
    if (a._lextype == Lexer::Type::pair &&
        a.hasTag ("FILTER"))
    {
-      changes = true;
+      std::string raw = a.attribute ("raw");
-
+      std::string name;
-      auto raw   = a.attribute ("raw");
+      std::string mod;
-      auto dot   = raw.find ('.');
+      std::string sep;
-      auto colon = raw.find (':');
+      std::string value;
-      if (colon == std::string::npos)
+      if (Lexer::decomposePair (raw, name, mod, sep, value))
        colon = raw.find ('=');
      std::string name  = "";
      std::string mod   = "";
      std::string value = "";
      // If the dot appears after the colon, then it is part of the value, and
      // should be ignored.
      if (dot != std::string::npos &&
          dot < colon)
      {
-        name = raw.substr (0, dot);
+        if (value == "")
-        mod  = raw.substr (dot + 1, colon - dot - 1);
+          value = "''";
      }
      else
      {
        name = raw.substr (0, colon);
      }
-      value = raw.substr (colon + 1);
+        bool found = false;
-      if (value == "")
+        std::string canonical;
-        value = "''";
+        if (canonicalize (canonical, "pseudo", name))
        {
          A2 lhs (raw, Lexer::Type::identifier);
          lhs.attribute ("canonical", canonical);
          lhs.attribute ("value", value);
          lhs.tag ("PSEUDO");
          reconstructed.push_back (lhs);
          found = true;
        }
        else if (canonicalize (canonical, "attribute", name) ||
                 canonicalize (canonical, "uda",       name))
        {
          // TODO The "!" modifier is being dropped.
-      bool found = false;
+          A2 lhs (name, Lexer::Type::dom);
-      std::string canonical;
+          lhs.tag ("FILTER");
-      if (canonicalize (canonical, "pseudo", name))
+          lhs.attribute ("canonical", canonical);
-      {
+          lhs.attribute ("modifier", mod);
-        A2 lhs (raw, Lexer::Type::identifier);
+
-        lhs.attribute ("canonical", canonical);
+          A2 op ("", Lexer::Type::op);
-        lhs.attribute ("value", value);
+          op.tag ("FILTER");
-        lhs.tag ("PSEUDO");
+
-        reconstructed.push_back (lhs);
+          A2 rhs ("", Lexer::Type::string);
-        found = true;
+          rhs.tag ("FILTER");
-      }
+
-      else
+          // Special case for '<name>:<value>'.
-      {
+          if (mod == "")
          {
            op.attribute ("raw", "=");
            rhs.attribute ("raw", value);
          }
          else if (mod == "before" || mod == "under" || mod == "below")
          {
            op.attribute ("raw", "<");
            rhs.attribute ("raw", value);
          }
          else if (mod == "after" || mod == "over" || mod == "above")
          {
            op.attribute ("raw", ">");
            rhs.attribute ("raw", value);
          }
          else if (mod == "none")
          {
            op.attribute ("raw", "==");
            rhs.attribute ("raw", "''");
          }
          else if (mod == "any")
          {
            op.attribute ("raw", "!==");
            rhs.attribute ("raw", "''");
          }
          else if (mod == "is" || mod == "equals")
          {
            op.attribute ("raw", "==");
            rhs.attribute ("raw", value);
          }
          else if (mod == "isnt" || mod == "not")
          {
            op.attribute ("raw", "!==");
            rhs.attribute ("raw", value);
          }
          else if (mod == "has" || mod == "contains")
          {
            op.attribute ("raw", "~");
            rhs.attribute ("raw", value);
          }
          else if (mod == "hasnt")
          {
            op.attribute ("raw", "!~");
            rhs.attribute ("raw", value);
          }
          else if (mod == "startswith" || mod == "left")
          {
            op.attribute ("raw", "~");
            rhs.attribute ("raw", "^" + value);
          }
          else if (mod == "endswith" || mod == "right")
          {
            op.attribute ("raw", "~");
            rhs.attribute ("raw", value + "$");
          }
          else if (mod == "word")
          {
            op.attribute ("raw", "~");
 #if defined (DARWIN)
            rhs.attribute ("raw", value);
 #elif defined (SOLARIS)
            rhs.attribute ("raw", "\\<" + value + "\\>");
 #else
            rhs.attribute ("raw", "\\b" + value + "\\b");
 #endif
          }
          else if (mod == "noword")
          {
            op.attribute ("raw", "!~");
 #if defined (DARWIN)
            rhs.attribute ("raw", value);
 #elif defined (SOLARIS)
            rhs.attribute ("raw", "\\<" + value + "\\>");
 #else
            rhs.attribute ("raw", "\\b" + value + "\\b");
 #endif
          }
          else
            throw format (STRING_PARSER_UNKNOWN_ATTMOD, mod);
          reconstructed.push_back (lhs);
          reconstructed.push_back (op);
          reconstructed.push_back (rhs);
          found = true;
        }
        // If the name does not canonicalize to either an attribute or a UDA
        // then it is not a recognized Lexer::Type::pair, so downgrade it to
        // Lexer::Type::word.
-        if (! canonicalize (canonical, "attribute", name) &&
+        else
            ! canonicalize (canonical, "uda", name))
        {
          a._lextype = Lexer::Type::word;
          continue;
        }
-        // TODO The "!" modifier is being dropped.
+        if (found)
-
+          changes = true;
        A2 lhs (name, Lexer::Type::dom);
        lhs.tag ("FILTER");
        lhs.attribute ("canonical", canonical);
        lhs.attribute ("modifier", mod);
        A2 op ("", Lexer::Type::op);
        op.tag ("FILTER");
        A2 rhs ("", Lexer::Type::string);
        rhs.tag ("FILTER");
        // Special case for '<name>:<value>'.
        if (mod == "")
        {
          op.attribute ("raw", "=");
          rhs.attribute ("raw", value);
        }
        else if (mod == "before" || mod == "under" || mod == "below")
        {
          op.attribute ("raw", "<");
          rhs.attribute ("raw", value);
        }
        else if (mod == "after" || mod == "over" || mod == "above")
        {
          op.attribute ("raw", ">");
          rhs.attribute ("raw", value);
        }
        else if (mod == "none")
        {
          op.attribute ("raw", "==");
          rhs.attribute ("raw", "''");
        }
        else if (mod == "any")
        {
          op.attribute ("raw", "!==");
          rhs.attribute ("raw", "''");
        }
        else if (mod == "is" || mod == "equals")
        {
          op.attribute ("raw", "==");
          rhs.attribute ("raw", value);
        }
        else if (mod == "isnt" || mod == "not")
        {
          op.attribute ("raw", "!==");
          rhs.attribute ("raw", value);
        }
        else if (mod == "has" || mod == "contains")
        {
          op.attribute ("raw", "~");
          rhs.attribute ("raw", value);
        }
        else if (mod == "hasnt")
        {
          op.attribute ("raw", "!~");
          rhs.attribute ("raw", value);
        }
        else if (mod == "startswith" || mod == "left")
        {
          op.attribute ("raw", "~");
          rhs.attribute ("raw", "^" + value);
        }
        else if (mod == "endswith" || mod == "right")
        {
          op.attribute ("raw", "~");
          rhs.attribute ("raw", value + "$");
        }
        else if (mod == "word")
        {
          op.attribute ("raw", "~");
 #if defined (DARWIN)
          rhs.attribute ("raw", value);
 #elif defined (SOLARIS)
          rhs.attribute ("raw", "\\<" + value + "\\>");
 #else
          rhs.attribute ("raw", "\\b" + value + "\\b");
 #endif
        }
        else if (mod == "noword")
        {
          op.attribute ("raw", "!~");
 #if defined (DARWIN)
          rhs.attribute ("raw", value);
 #elif defined (SOLARIS)
          rhs.attribute ("raw", "\\<" + value + "\\>");
 #else
          rhs.attribute ("raw", "\\b" + value + "\\b");
 #endif
        }
        else
-          throw format (STRING_PARSER_UNKNOWN_ATTMOD, mod);
+          reconstructed.push_back (a);
        reconstructed.push_back (lhs);
        reconstructed.push_back (op);
        reconstructed.push_back (rhs);
        found = true;
      }
-
+      // Failed to decompose.
      if (found)
        changes = true;
      else
        reconstructed.push_back (a);
    }
    // Not a FILTER pair.
    else
      reconstructed.push_back (a);
  }