diff --git a/src/RX.cpp b/src/RX.cpp index 4d8f2fc2b..c3d93ee52 100644 --- a/src/RX.cpp +++ b/src/RX.cpp @@ -96,7 +96,7 @@ void RX::compile () int result; if ((result = regcomp (&_regex, _pattern.c_str (), - REG_EXTENDED | /*REG_NOSUB |*/ REG_NEWLINE | + REG_EXTENDED | REG_NEWLINE | (_case_sensitive ? 0 : REG_ICASE))) != 0) { char message[256]; @@ -125,16 +125,21 @@ bool RX::match ( if (!_compiled) compile (); - regmatch_t rm[RX_MAX_MATCHES]; - if (regexec (&_regex, in.c_str (), RX_MAX_MATCHES, rm, 0) == 0) + regmatch_t rm[2]; + int offset = 0; + int length = in.length (); + while (regexec (&_regex, in.c_str () + offset, 2, &rm[0], 0) == 0 && + offset < length) { - for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i) - matches.push_back (in.substr (rm[i].rm_so, rm[i].rm_eo - rm[i].rm_so)); + matches.push_back (in.substr (rm[0].rm_so + offset, rm[0].rm_eo - rm[0].rm_so)); + offset += rm[0].rm_eo; - return true; + // Protection against zero-width patterns causing infinite loops. + if (rm[0].rm_so == rm[0].rm_eo) + ++offset; } - return false; + return matches.size () ? true : false; } //////////////////////////////////////////////////////////////////////////////// @@ -146,19 +151,22 @@ bool RX::match ( if (!_compiled) compile (); - regmatch_t rm[RX_MAX_MATCHES]; - if (regexec (&_regex, in.c_str (), RX_MAX_MATCHES, rm, 0) == 0) + regmatch_t rm[2]; + int offset = 0; + int length = in.length (); + while (regexec (&_regex, in.c_str () + offset, 2, &rm[0], 0) == 0 && + offset < length) { - for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i) - { - start.push_back (rm[i].rm_so); - end.push_back (rm[i].rm_eo); - } + start.push_back (rm[0].rm_so + offset); + end.push_back (rm[0].rm_eo + offset); + offset += rm[0].rm_eo; - return true; + // Protection against zero-width patterns causing infinite loops. + if (rm[0].rm_so == rm[0].rm_eo) + ++offset; } - return false; + return start.size () ? true : false; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/RX.h b/src/RX.h index f505b1e3b..651029823 100644 --- a/src/RX.h +++ b/src/RX.h @@ -33,8 +33,6 @@ #include #include -#define RX_MAX_MATCHES 64 - class RX { public: diff --git a/test/rx.t.cpp b/test/rx.t.cpp index 0ed5969e5..43454b2c8 100644 --- a/test/rx.t.cpp +++ b/test/rx.t.cpp @@ -32,7 +32,7 @@ Context context; int main (int argc, char** argv) { - UnitTest ut (16); + UnitTest ut (21); std::string text = "This is a test."; @@ -40,10 +40,10 @@ int main (int argc, char** argv) ut.ok (r1.match (text), text + " =~ /i. /"); std::vector matches; - RX r2 ("(i.) ", false); - ut.ok (r2.match (matches, text), text + " =~ /(i.) /"); - ut.ok (matches.size () == 1, "1 match"); - ut.is (matches[0], "is", "$1 == is"); + ut.ok (r1.match (matches, text), text + " =~ /i. /"); + ut.ok (matches.size () == 2, "2 match"); + ut.is (matches[0], "is ", "$1 == is\\s"); + ut.is (matches[1], "is ", "$1 == is\\s"); text = "abcdefghijklmnopqrstuvwxyz"; @@ -68,17 +68,27 @@ int main (int argc, char** argv) std::vector results; std::vector start; std::vector end; - RX r8 ("(e..)", true); - ut.ok (r8.match (results, text), "(e..) there are matches"); - ut.ok (r8.match (start, end, text), "(e..) there are matches"); - ut.is (results.size (), (size_t) 1, "(e..) == 1 match"); - ut.is (results[0], "est", "(e..)[0] == 'est'"); - ut.is (start[0], 11, "(e..)[0] == 11->"); - ut.is (end[0], 14, "(e..)[0] == ->14"); + RX r8 ("e..", true); + ut.ok (r8.match (results, text), "e.. there are matches"); + ut.ok (r8.match (start, end, text), "e.. there are matches"); + ut.is (results.size (), (size_t) 4, "e.. == 4 matches"); + ut.is (results[0], "est", "e..[0] == 'est'"); + ut.is (start[0], 11, "e..[0] == 11->"); + ut.is (end[0], 14, "e..[0] == ->14"); - RX r9 ("\\bthe\\b"); + results.clear (); + RX r9 ("e", true); + ut.ok (r9.match (results, text), "e there are matches"); + ut.is (results.size (), (size_t) 6, "e == 6 matches"); + + start.clear (); + end.clear (); + ut.ok (r9.match (start, end, text), "e there are matches"); + ut.is (start.size (), (size_t) 6, "e == 6 matches"); + + RX r10 ("\\bthe\\b"); text = "this is the end."; - ut.ok (r9.match (text), text + " =~ /\\bthe\\b/"); + ut.ok (r10.match (text), text + " =~ /\\bthe\\b/"); return 0; }