TW-1709: Parsing bug when doing "task undo"

- Thanks to Scott Kostyshak.
2016-02-23 21:05:53 -05:00
parent 31f0804207
commit 029f3af578
2 changed files with 58 additions and 80 deletions
--- a/2
+++ b/2
@@ -35,6 +35,8 @@
 - TW-1704 Use Task::identifier to reference the Task in the output
 - TW-1705 Directories in .task/hooks should not be reported as invalid hooks
          (thanks to Tomas Babej).
 - TW-1709 Parsing bug when doing "task undo"
          (thanks to Scott Kostyshak).
 - TW-1710 Setting wait date on status:completed / status:deleted
          (thanks to Daniel Shahaf).
 - TW-1714 Starting recurring task starts all recurrences
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -225,22 +225,7 @@ int longestLine (const std::string& input)
 }
 ////////////////////////////////////////////////////////////////////////////////
-// Walk the input text looking for a break point.  A break point is one of:
+// Break UTF8 text into chunks no more than width characters.
 //   - EOS
 //   - \n
 //   - last space before 'length' characters
 //   - last punctuation (, ; . :) before 'length' characters, even if not
 //     followed by a space
 //   - first 'length' characters
 //
 // text       "one two three\n  four"
 // bytes       0123456789012 3456789
 // characters  1234567890a23 4567890
 //
 // leading_ws
 // ws             ^   ^       ^^
 // punct
 // break                     ^
 bool extractLine (
  std::string& line,
  const std::string& text,
@@ -249,91 +234,82 @@ bool extractLine (
  unsigned int& offset)
 {
  // Terminate processing.
  // Note: bytes vs bytes.
  if (offset >= text.length ())
    return false;
-  std::string::size_type last_last_bytes = offset;
+  int line_length                     {0};
-  std::string::size_type last_bytes = offset;
+  int character                       {0};
-  std::string::size_type bytes = offset;
+  std::string::size_type lastWordEnd  {std::string::npos};
-  unsigned int last_ws = 0;
+  bool something                      {false};
-  int character;
+  std::string::size_type cursor       {offset};
-  int char_width = 0;
+  std::string::size_type prior_cursor {offset};
-  int line_width = 0;
+  while ((character = utf8_next_char (text, cursor)))
  while (1)
  {
-    last_last_bytes = last_bytes;
+    // Premature EOL.
-    last_bytes = bytes;
+    if (character == '\n')
    character = utf8_next_char (text, bytes);
    if (character == 0 ||
        character == '\n')
    {
-      line = text.substr (offset, last_bytes - offset);
+      line = text.substr (offset, line_length);
-      offset = bytes;
+      offset = cursor;
-      break;
+      return true;
    }
    else if (character == ' ')
      last_ws = last_bytes;
-    char_width = mk_wcwidth (character);
+    if (! Lexer::isWhitespace (character))
    if (line_width + char_width > width)
    {
-      int last_last_character = text[last_last_bytes];
+      something = true;
-      int last_character = text[last_bytes];
+      if (! text[cursor] || Lexer::isWhitespace (text[cursor]))
        lastWordEnd = prior_cursor;
    }
-      // [case 1] one| two --> last_last != 32, last == 32, ws == 0
+    line_length += mk_wcwidth (character);
-      if (last_last_character != ' ' &&
+
-          last_character      == ' ')
+    if (line_length >= width)
    {
      // Backtrack to previous word end.
      if (lastWordEnd != std::string::npos)
      {
-        line = text.substr (offset, last_bytes - offset);
+        // Eat one WS after lastWordEnd.
-        offset = last_bytes + 1;
+        std::string::size_type lastBreak = lastWordEnd;
-        break;
+        utf8_next_char (text, lastBreak);
        // Position offset at following char.
        std::string::size_type nextStart = lastBreak;
        utf8_next_char (text, nextStart);
        line = text.substr (offset, lastBreak - offset);
        offset = nextStart;
        return true;
      }
-      // [case 2] one |two --> last_last == 32, last != 32, ws != 0
+      // No backtrack, possible hyphenation.
-      else if (last_last_character == ' ' &&
+      else if (hyphenate)
               last_character      != ' ' &&
               last_ws             != 0)
      {
-        line = text.substr (offset, last_bytes - offset - 1);
+        line = text.substr (offset, prior_cursor - offset) + "-";
-        offset = last_bytes;
+        offset = prior_cursor;
-        break;
+        return true;
      }
-      else if (last_last_character != ' ' &&
+      // No hyphenation, just truncation.
-               last_character      != ' ')
+      else
      {
-        // [case 3] one t|wo --> last_last != 32, last != 32, ws != 0
+        line = text.substr (offset, prior_cursor - offset);
-        if (last_ws != 0)
+        offset = cursor;
-        {
+        return true;
          line = text.substr (offset, last_ws - offset);
          offset = last_ws + 1;
          break;
        }
        // [case 4] on|e two --> last_last != 32, last != 32, ws == 0
        else
        {
          if (hyphenate)
          {
            line = text.substr (offset, last_bytes - offset - 1) + "-";
            offset = last_last_bytes;
          }
          else
          {
            line = text.substr (offset, last_bytes - offset);
            offset = last_bytes;
          }
        }
        break;
      }
    }
-    line_width += char_width;
+    // Hindsight.
    prior_cursor = cursor;
  }
-  return true;
+  // Residual text.
  if (something)
  {
    line = text.substr (offset, cursor - offset);
     offset = cursor;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////