Feature #158

- Added feature #158, regular expression support for filters and substitutions. - Added unit tests to att.t and filter.t. - Updated docs.
2010-11-26 17:26:03 -05:00
parent 09ce815fc5
commit fe65d28f99
10 changed files with 392 additions and 72 deletions
--- a/src/Att.cpp
+++ b/src/Att.cpp
@@ -29,13 +29,14 @@
 #include <sstream>
 #include <stdlib.h>
 #include <string.h>
-#include "text.h"
-#include "Color.h"
-#include "util.h"
-#include "Date.h"
-#include "Duration.h"
-#include "Context.h"
-#include "Att.h"
+#include <text.h>
+#include <rx.h>
+#include <Color.h>
+#include <util.h>
+#include <Date.h>
+#include <Duration.h>
+#include <Context.h>
+#include <Att.h>

 extern Context context;

@@ -533,11 +534,22 @@ void Att::parse (Nibbler& n)
 ////////////////////////////////////////////////////////////////////////////////
 // "this" is the attribute that has modifiers.  "other" is the attribute from a
 // Record that does not have modifiers, but may have a value.
+//
+// In other words, the filter:
+//   task list description.contains:foo
+//
+// Is represented with:
+//   this  = filter (description.contains:foo)
+//   other = actual task data to be matched
+//
 bool Att::match (const Att& other) const
 {
  // All matches are assumed to pass, any short-circuit on non-match.
  bool case_sensitive = context.config.getBoolean ("search.case.sensitive");

+  // Are regular expressions being used in place of string comparison?
+  bool regex = context.config.getBoolean ("regex");
+
  // If there are no mods, just perform a straight compare on value.
  if (mMod == "")
  {
@@ -557,7 +569,13 @@ bool Att::match (const Att& other) const
    }
    else
    {
-      if (!compare (mValue, other.mValue, (bool) case_sensitive))
+      if (regex)
+      {
+        std::string pattern = "^" + mValue + "$";
+        if (!regexMatch (other.mValue, pattern, case_sensitive))
+          return false;
+      }
+      else if (!compare (mValue, other.mValue, (bool) case_sensitive))
        return false;
    }
  }
@@ -565,21 +583,38 @@ bool Att::match (const Att& other) const
  // has = contains as a substring.
  else if (mMod == "has" || mMod == "contains") // TODO i18n
  {
-    if (find (other.mValue, mValue, (bool) case_sensitive) == std::string::npos)
+    if (regex)
+    {
+      if (!regexMatch (other.mValue, mValue, case_sensitive))
+        return false;
+    }
+    else if (find (other.mValue, mValue, (bool) case_sensitive) == std::string::npos)
      return false;
  }

  // is = equal.  Nop.
  else if (mMod == "is" || mMod == "equals") // TODO i18n
  {
-    if (!compare (mValue, other.mValue, (bool) case_sensitive))
+    if (regex)
+    {
+      std::string pattern = "^" + mValue + "$";
+      if (!regexMatch (other.mValue, pattern, case_sensitive))
+        return false;
+    }
+    else if (!compare (mValue, other.mValue, (bool) case_sensitive))
      return false;
  }

  // isnt = not equal.
  else if (mMod == "isnt" || mMod == "not") // TODO i18n
  {
-    if (compare (mValue, other.mValue, (bool) case_sensitive))
+    if (regex)
+    {
+      std::string pattern = "^" + mValue + "$";
+      if (!regexMatch (other.mValue, pattern, case_sensitive))
+        return false;
+    }
+    else if (compare (mValue, other.mValue, (bool) case_sensitive))
      return false;
  }

@@ -600,29 +635,52 @@ bool Att::match (const Att& other) const
  // startswith = first characters must match.
  else if (mMod == "startswith" || mMod == "left") // TODO i18n
  {
-    if (other.mValue.length () < mValue.length ())
-      return false;
+    if (regex)
+    {
+      std::string pattern = "^" + mValue;
+      if (!regexMatch (other.mValue, pattern, case_sensitive))
+        return false;
+    }
+    else
+    {
+      if (other.mValue.length () < mValue.length ())
+        return false;

      if (!compare (mValue, other.mValue.substr (0, mValue.length ()), (bool) case_sensitive))
-      return false;
+        return false;
+    }
  }

  // endswith = last characters must match.
  else if (mMod == "endswith" || mMod == "right") // TODO i18n
  {
-    if (other.mValue.length () < mValue.length ())
-      return false;
+    if (regex)
+    {
+      std::string pattern = mValue + "$";
+      if (!regexMatch (other.mValue, pattern, case_sensitive))
+        return false;
+    }
+    else
+    {
+      if (other.mValue.length () < mValue.length ())
+        return false;

-    if (!compare (mValue, other.mValue.substr (
-                    other.mValue.length () - mValue.length (),
-                    std::string::npos), (bool) case_sensitive))
-      return false;
+      if (!compare (mValue, other.mValue.substr (
+                      other.mValue.length () - mValue.length (),
+                      std::string::npos), (bool) case_sensitive))
+        return false;
+    }
  }

  // hasnt = does not contain as a substring.
  else if (mMod == "hasnt") // TODO i18n
  {
-    if (find (other.mValue, mValue, (bool) case_sensitive) != std::string::npos)
+    if (regex)
+    {
+      if (regexMatch (other.mValue, mValue, case_sensitive))
+        return false;
+    }
+    else if (find (other.mValue, mValue, (bool) case_sensitive) != std::string::npos)
      return false;
  }

@@ -705,29 +763,57 @@ bool Att::match (const Att& other) const
  // word = contains as a substring, with word boundaries.
  else if (mMod == "word") // TODO i18n
  {
-    // Fail if the substring is not found.
-    std::string::size_type sub = find (other.mValue, mValue, (bool) case_sensitive);
-    if (sub == std::string::npos)
-      return false;
+    if (regex)
+    {
+      std::vector <int> start;
+      std::vector <int> end;
+      if (!regexMatch (start, end, other.mValue, mValue, case_sensitive))
+        return false;

-    // Also fail if there is no word boundary at beginning and end.
-    if (!isWordStart (other.mValue, sub))
-      return false;
+      if (!isWordStart (other.mValue, start[0]))
+        return false;

-    if (!isWordEnd (other.mValue, sub + mValue.length () - 1))
-      return false;
+      if (!isWordEnd (other.mValue, end[0]))
+        return false;
+    }
+    else
+    {
+      // Fail if the substring is not found.
+      std::string::size_type sub = find (other.mValue, mValue, (bool) case_sensitive);
+      if (sub == std::string::npos)
+        return false;
+
+      // Also fail if there is no word boundary at beginning and end.
+      if (!isWordStart (other.mValue, sub))
+        return false;
+
+      if (!isWordEnd (other.mValue, sub + mValue.length () - 1))
+        return false;
+    }
  }

  // noword = does not contain as a substring, with word boundaries.
  else if (mMod == "noword") // TODO i18n
  {
-    // Fail if the substring is not found.
-    std::string::size_type sub = find (other.mValue, mValue);
-    if (sub != std::string::npos &&
-        isWordStart (other.mValue, sub) &&
-        isWordEnd (other.mValue, sub + mValue.length () - 1))
+    if (regex)
    {
-      return false;
+      std::vector <int> start;
+      std::vector <int> end;
+      if (regexMatch (start, end, other.mValue, mValue, case_sensitive) &&
+          isWordStart (other.mValue, start[0])                     &&
+          isWordEnd (other.mValue, end[0]))
+        return false;
+    }
+    else
+    {
+      // Fail if the substring is not found.
+      std::string::size_type sub = find (other.mValue, mValue);
+      if (sub != std::string::npos &&
+          isWordStart (other.mValue, sub) &&
+          isWordEnd (other.mValue, sub + mValue.length () - 1))
+      {
+        return false;
+      }
    }
  }

--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -81,6 +81,7 @@ std::string Config::defaults =
  "recurrence.limit=1                             # Number of future recurring pending tasks\n"
  "undo.style=side                                # Undo style - can be 'side', or 'diff'\n"
  "burndown.bias=0.666                            # Weighted mean bias toward recent data\n"
+  "regex=no                                       # Assume all search/filter strings are regexes\n"
  "\n"
  "# Dates\n"
  "dateformat=m/d/Y                               # Preferred input and display date format\n"
--- a/src/Subst.cpp
+++ b/src/Subst.cpp
@@ -30,6 +30,7 @@
 #include <Directory.h>
 #include <Context.h>
 #include <text.h>
+#include <rx.h>
 #include <i18n.h>

 extern Context context;
@@ -131,58 +132,112 @@ void Subst::apply (

  if (mFrom != "")
  {
-    if (mGlobal)
+    if (context.config.getBoolean ("regex"))
    {
+      // Insert capturing parentheses, if necessary.
+      std::string pattern;
+      if (mFrom.find ('(') != std::string::npos)
+        pattern = mFrom;
+      else
+        pattern = "(" + mFrom + ")";
+
+      std::vector <int> start;
+      std::vector <int> end;
+
      // Perform all subs on description.
      int counter = 0;
-      pattern = 0;
-
-      while ((pattern = find (description, mFrom, pattern, sensitive)) != std::string::npos)
+      if (regexMatch (start, end, description, pattern, sensitive))
      {
-        description.replace (pattern, mFrom.length (), mTo);
-        pattern += mTo.length ();
-
-        if (++counter > 1000)
-          throw ("Terminated substitution because more than a thousand changes were made - infinite loop protection.");
-      }
-
-      // Perform all subs on annotations.
-      counter = 0;
-      pattern = 0;
-      std::vector <Att>::iterator i;
-      for (i = annotations.begin (); i != annotations.end (); ++i)
-      {
-        std::string annotation = i->value ();
-        while ((pattern = find (annotation, mFrom, pattern, sensitive)) != std::string::npos)
+        for (unsigned int i = 0; i < start.size (); ++i)
        {
-          annotation.replace (pattern, mFrom.length (), mTo);
-          pattern += mTo.length ();
-
-          i->value (annotation);
+          description.replace (start[i], end[i] - start[i], mTo);
+          if (!mGlobal)
+            break;

          if (++counter > 1000)
            throw ("Terminated substitution because more than a thousand changes were made - infinite loop protection.");
        }
      }
+
+      // Perform all subs on annotations.
+      counter = 0;
+      std::vector <Att>::iterator i;
+      for (i = annotations.begin (); i != annotations.end (); ++i)
+      {
+        std::string annotation = i->value ();
+        start.clear ();
+        end.clear ();
+
+        if (regexMatch (start, end, annotation, pattern, sensitive))
+        {
+          for (unsigned int match = 0; match < start.size (); ++match)
+          {
+            annotation.replace (start[match], end[match] - start[match], mTo);
+            i->value (annotation);
+            if (!mGlobal)
+              break;
+
+            if (++counter > 1000)
+              throw ("Terminated substitution because more than a thousand changes were made - infinite loop protection.");
+          }
+        }
+      }
    }
    else
    {
-      // Perform first description substitution.
-      if ((pattern = find (description, mFrom, sensitive)) != std::string::npos)
-        description.replace (pattern, mFrom.length (), mTo);
-
-      // Failing that, perform the first annotation substitution.
-      else
+      if (mGlobal)
      {
+        // Perform all subs on description.
+        int counter = 0;
+        pattern = 0;
+
+        while ((pattern = find (description, mFrom, pattern, sensitive)) != std::string::npos)
+        {
+          description.replace (pattern, mFrom.length (), mTo);
+          pattern += mTo.length ();
+
+          if (++counter > 1000)
+            throw ("Terminated substitution because more than a thousand changes were made - infinite loop protection.");
+        }
+
+        // Perform all subs on annotations.
+        counter = 0;
+        pattern = 0;
        std::vector <Att>::iterator i;
        for (i = annotations.begin (); i != annotations.end (); ++i)
        {
          std::string annotation = i->value ();
-          if ((pattern = find (annotation, mFrom, sensitive)) != std::string::npos)
+          while ((pattern = find (annotation, mFrom, pattern, sensitive)) != std::string::npos)
          {
            annotation.replace (pattern, mFrom.length (), mTo);
+            pattern += mTo.length ();
+
            i->value (annotation);
-            break;
+
+            if (++counter > 1000)
+              throw ("Terminated substitution because more than a thousand changes were made - infinite loop protection.");
+          }
+        }
+      }
+      else
+      {
+        // Perform first description substitution.
+        if ((pattern = find (description, mFrom, sensitive)) != std::string::npos)
+          description.replace (pattern, mFrom.length (), mTo);
+
+        // Failing that, perform the first annotation substitution.
+        else
+        {
+          std::vector <Att>::iterator i;
+          for (i = annotations.begin (); i != annotations.end (); ++i)
+          {
+            std::string annotation = i->value ();
+            if ((pattern = find (annotation, mFrom, sensitive)) != std::string::npos)
+            {
+              annotation.replace (pattern, mFrom.length (), mTo);
+              i->value (annotation);
+              break;
+            }
          }
        }
      }
--- a/src/command.cpp
+++ b/src/command.cpp
@@ -897,7 +897,7 @@ int handleShow (std::string& outs)
      "color.burndown.pending color.burndown.started color.overdue color.pri.H "
      "color.pri.L color.pri.M color.pri.none color.recurring color.tagged "
      "color.footnote color.header color.debug color.alternate color.calendar.today "
-      "color.calendar.due color.calendar.due.today color.calendar.overdue "
+      "color.calendar.due color.calendar.due.today color.calendar.overdue regex "
      "color.calendar.weekend color.calendar.holiday color.calendar.weeknumber "
      "color.summary.background color.summary.bar color.history.add "
      "color.history.done color.history.delete color.undo.before "
--- a/src/tests/att.t.cpp
+++ b/src/tests/att.t.cpp
@@ -34,7 +34,7 @@ Context context;
 ////////////////////////////////////////////////////////////////////////////////
 int main (int argc, char** argv)
 {
-  UnitTest t (99);
+  UnitTest t (117);

  Att a;
  t.notok (a.valid ("name"),            "Att::valid name         -> fail");
@@ -86,7 +86,7 @@ int main (int argc, char** argv)
  t.is (a6.value_int (), 7, "Att::value_int set/get");
  t.is (a6.value (), "7", "Att::value 7");

-  // Att::mod
+  // Att::mod - straight comparisons.
  bool good = true;
  try {a6.mod ("is");} catch (...) {good = false;}
  t.ok (good, "Att::mod (is)");
@@ -159,6 +159,81 @@ int main (int argc, char** argv)
  try {a6.mod ("unrecognized");} catch (...) {good = false;}
  t.notok (good, "Att::mod (unrecognized)");

+  // Att::mod - regex comparisons.
+  context.config.set ("regex", "on");
+
+  good = true;
+  try {a6.mod ("is");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (is)");
+
+  good = true;
+  try {a6.mod ("before");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (before)");
+
+  good = true;
+  try {a6.mod ("after");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (after)");
+
+  good = true;
+  try {a6.mod ("none");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (none)");
+
+  good = true;
+  try {a6.mod ("any");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (any)");
+
+  good = true;
+  try {a6.mod ("over");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (over)");
+
+  good = true;
+  try {a6.mod ("under");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (under)");
+
+  good = true;
+  try {a6.mod ("above");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (above)");
+
+  good = true;
+  try {a6.mod ("below");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (below)");
+
+  good = true;
+  try {a6.mod ("isnt");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (isnt)");
+
+  good = true;
+  try {a6.mod ("has");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (has)");
+
+  good = true;
+  try {a6.mod ("contains");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (contains)");
+
+  good = true;
+  try {a6.mod ("hasnt");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (hasnt)");
+
+  good = true;
+  try {a6.mod ("startswith");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (startswith)");
+
+  good = true;
+  try {a6.mod ("endswith");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (endswith)");
+
+  good = true;
+  try {a6.mod ("word");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (word)");
+
+  good = true;
+  try {a6.mod ("noword");} catch (...) {good = false;}
+  t.ok (good, "Att::mod (noword)");
+
+  good = true;
+  try {a6.mod ("unrecognized");} catch (...) {good = false;}
+  t.notok (good, "Att::mod (unrecognized)");
+
  // Att::parse
  Nibbler n ("");
  Att a7;
--- a/src/tests/filter.t
+++ b/src/tests/filter.t
@@ -28,7 +28,7 @@

 use strict;
 use warnings;
-use Test::More tests => 131;
+use Test::More tests => 180;

 # Create the rc file.
 if (open my $fh, '>', 'filter.rc')
@@ -209,6 +209,70 @@ unlike ($output, qr/five/,  'r5');
 unlike ($output, qr/six/,   'r6');
 unlike ($output, qr/seven/, 'r7');

+# Regex filters.
+$output = qx{../task rc:filter.rc list rc.regex:on project:[A-Z]};
+like   ($output, qr/one/,   's1');
+like   ($output, qr/two/,   's2');
+like   ($output, qr/three/, 's3');
+unlike ($output, qr/four/,  's4');
+unlike ($output, qr/five/,  's5');
+unlike ($output, qr/six/,   's6');
+unlike ($output, qr/seven/, 's7');
+
+$output = qx{../task rc:filter.rc list rc.regex:on project:.};
+like   ($output, qr/one/,   't1');
+like   ($output, qr/two/,   't2');
+like   ($output, qr/three/, 't3');
+unlike ($output, qr/four/,  't4');
+unlike ($output, qr/five/,  't5');
+unlike ($output, qr/six/,   't6');
+unlike ($output, qr/seven/, 't7');
+
+$output = qx{../task rc:filter.rc rc.regex:on list fo\{2\}};
+like   ($output, qr/one/,   'u1');
+unlike ($output, qr/two/,   'u2');
+unlike ($output, qr/three/, 'u3');
+unlike ($output, qr/four/,  'u4');
+unlike ($output, qr/five/,  'u5');
+like   ($output, qr/six/,   'u6');
+like   ($output, qr/seven/, 'u7');
+
+$output = qx{../task rc:filter.rc rc.regex:on list f.. b..};
+unlike ($output, qr/one/,   'v1');
+unlike ($output, qr/two/,   'v2');
+unlike ($output, qr/three/, 'v3');
+unlike ($output, qr/four/,  'v4');
+unlike ($output, qr/five/,  'v5');
+unlike ($output, qr/six/,   'v6');
+like   ($output, qr/seven/, 'v7');
+
+$output = qx{../task rc:filter.rc rc.regex:on list ^s};
+unlike ($output, qr/one/,   'w1');
+unlike ($output, qr/two/,   'w2');
+unlike ($output, qr/three/, 'w3');
+unlike ($output, qr/four/,  'w4');
+unlike ($output, qr/five/,  'w5');
+like   ($output, qr/six/,   'w6');
+like   ($output, qr/seven/, 'w7');
+
+$output = qx{../task rc:filter.rc rc.regex:on list ^.i};
+unlike ($output, qr/one/,   'x1');
+unlike ($output, qr/two/,   'x2');
+unlike ($output, qr/three/, 'x3');
+unlike ($output, qr/four/,  'x4');
+like   ($output, qr/five/,  'x5');
+like   ($output, qr/six/,   'x6');
+unlike ($output, qr/seven/, 'x7');
+
+$output = qx{../task rc:filter.rc rc.regex:on list "two|five"};
+unlike ($output, qr/one/,   'y1');
+like   ($output, qr/two/,   'y2');
+unlike ($output, qr/three/, 'y3');
+unlike ($output, qr/four/,  'y4');
+like   ($output, qr/five/,  'y5');
+unlike ($output, qr/six/,   'y6');
+unlike ($output, qr/seven/, 'y7');
+
 # Cleanup.
 unlink 'pending.data';
 ok (!-r 'pending.data', 'Removed pending.data');