From de5e4e134c65999f2ddb417d289804bb87be6019 Mon Sep 17 00:00:00 2001 From: "Dustin J. Mitchell" Date: Fri, 9 Nov 2018 20:48:29 -0500 Subject: [PATCH] parse into tasks --- src/main.rs | 107 ++---------------- src/task.rs | 17 +++ src/tdb2/ff4.rs | 223 ++++++++++++++++++++++++++++++++++++++ src/tdb2/mod.rs | 14 +++ src/{ => tdb2}/nibbler.rs | 3 + 5 files changed, 265 insertions(+), 99 deletions(-) create mode 100644 src/task.rs create mode 100644 src/tdb2/ff4.rs create mode 100644 src/tdb2/mod.rs rename src/{ => tdb2}/nibbler.rs (98%) diff --git a/src/main.rs b/src/main.rs index 169c99a71..af111278e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,103 +1,12 @@ -mod nibbler; +mod tdb2; +mod task; -use std::str; - -use std::io::{stdin, BufRead, Result, Error, ErrorKind}; -use nibbler::Nibbler; - -/// Decode the given byte slice into a string using Taskwarrior JSON's escaping The slice is -/// assumed to be ASCII; unicode escapes within it will be expanded. -// TODO: return Cow -fn json_decode(value: &[u8]) -> String { - let length = value.len(); - let mut rv = String::with_capacity(length); - - let mut pos = 0; - while pos < length { - let v = value[pos]; - if v == b'\\' { - pos += 1; - if pos == length { - rv.push(v as char); - break; - } - let v = value[pos]; - match v { - b'"' | b'\\' | b'/' => rv.push(v as char), - b'b' => rv.push(8 as char), - b'f' => rv.push(12 as char), - b'n' => rv.push('\n' as char), - b'r' => rv.push('\r' as char), - b't' => rv.push('\t' as char), - b'u' => panic!("omg please no"), - _ => { - rv.push(b'\\' as char); - rv.push(v as char); - } - } - } else { - rv.push(v as char) - } - pos += 1; - } - - rv -} - -fn decode(value: String) -> String { - if let Some(_) = value.find('&') { - return value.replace("&open;", "[").replace("&close;", "]"); - } - value -} - -fn parse_ff4(line: &str) -> Result<()> { - let mut nib = Nibbler::new(line.as_bytes()); - println!("{}", line); - - if !nib.skip(b'[') { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - if let Some(line) = nib.get_until(b']') { - let mut nib = Nibbler::new(line); - while !nib.depleted() { - if let Some(name) = nib.get_until(b':') { - if !nib.skip(b':') { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - if let Some(value) = nib.get_quoted(b'"') { - let value = json_decode(value); - let value = decode(value); - println!("{}={}", str::from_utf8(name).unwrap(), value); - } else { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - nib.skip(b' '); - } else { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - } - } else { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - if !nib.skip(b']') { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - if !nib.depleted() { - return Err(Error::new(ErrorKind::Other, "bad line")); - } - Ok(()) -} - -fn parse_tdb2() -> Result<()> { - let input = stdin(); - for line in input.lock().lines() { - parse_ff4(&line?)?; - } - Ok(()) -} +use tdb2::parse; +use std::io::stdin; fn main() { - parse_tdb2().unwrap(); - println!("Done"); + let input = stdin(); + parse(input.lock()).unwrap().iter().for_each(|t| { + println!("{:?}", t); + }); } diff --git a/src/task.rs b/src/task.rs new file mode 100644 index 000000000..4e1085588 --- /dev/null +++ b/src/task.rs @@ -0,0 +1,17 @@ +use std::collections::HashMap; + +#[derive(Debug)] +pub struct Task { + data: HashMap, +} + +impl Task { + /// Construct a Task from a hashmap containing named properties + pub fn from_data(data: HashMap) -> Self { + Self { data } + } + + pub fn description(&self) -> &str { + self.data.get("description").unwrap() + } +} diff --git a/src/tdb2/ff4.rs b/src/tdb2/ff4.rs new file mode 100644 index 000000000..69c9a81cd --- /dev/null +++ b/src/tdb2/ff4.rs @@ -0,0 +1,223 @@ +use std::str; +use std::io::{Result, Error, ErrorKind}; +use std::collections::HashMap; + +use super::nibbler::Nibbler; +use super::super::task::Task; + +/// Rust implementation of part of utf8_codepoint from Taskwarrior's src/utf8.cpp +/// +/// Note that the original function will return garbage for invalid hex sequences; +/// this panics instead. +fn hex_to_unicode(value: &[u8]) -> String { + if value.len() < 4 { + panic!(format!("unicode escape too short -- {:?}", value)); + } + + fn nyb(c: u8) -> u16 { + match c { + b'0'...b'9' => (c - b'0') as u16, + b'a'...b'f' => (c - b'a' + 10) as u16, + b'A'...b'F' => (c - b'A' + 10) as u16, + _ => panic!(format!("invalid hex character {:?}", c)), + } + }; + + let words = [ + nyb(value[0]) << 12 | nyb(value[1]) << 8 | nyb(value[2]) << 4 | nyb(value[3]), + ]; + return String::from_utf16(&words[..]).unwrap(); +} + +/// Rust implementation of JSON::decode in Taskwarrior's src/JSON.cpp +/// +/// Decode the given byte slice into a string using Taskwarrior JSON's escaping The slice is +/// assumed to be ASCII; unicode escapes within it will be expanded. +fn json_decode(value: &[u8]) -> String { + let length = value.len(); + let mut rv = String::with_capacity(length); + + let mut pos = 0; + while pos < length { + let v = value[pos]; + if v == b'\\' { + pos += 1; + if pos == length { + rv.push(v as char); + break; + } + let v = value[pos]; + match v { + b'"' | b'\\' | b'/' => rv.push(v as char), + b'b' => rv.push(8 as char), + b'f' => rv.push(12 as char), + b'n' => rv.push('\n' as char), + b'r' => rv.push('\r' as char), + b't' => rv.push('\t' as char), + b'u' => { + rv.push_str(&hex_to_unicode(&value[pos + 1..])); + pos += 4; + } + _ => { + rv.push(b'\\' as char); + rv.push(v as char); + } + } + } else { + rv.push(v as char) + } + pos += 1; + } + + rv +} + +/// Rust implementation of Task::decode in Taskwarrior's src/Task.cpp +/// +/// Note that the docstring for the C++ function does not match the +/// implementation! +fn decode(value: String) -> String { + if let Some(_) = value.find('&') { + return value.replace("&open;", "[").replace("&close;", "]"); + } + value +} + +/// Parse an "FF4" formatted task line. From Task::parse in Taskwarrior's src/Task.cpp. +/// +/// While Taskwarrior supports additional formats, this is the only format supported by rask. +pub(super) fn parse_ff4(line: &str) -> Result { + let mut nib = Nibbler::new(line.as_bytes()); + let mut data = HashMap::new(); + + if !nib.skip(b'[') { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + if let Some(line) = nib.get_until(b']') { + let mut nib = Nibbler::new(line); + while !nib.depleted() { + if let Some(name) = nib.get_until(b':') { + if !nib.skip(b':') { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + if let Some(value) = nib.get_quoted(b'"') { + let value = json_decode(value); + let value = decode(value); + data.insert(String::from_utf8(name.to_vec()).unwrap(), value); + } else { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + nib.skip(b' '); + } else { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + } + } else { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + if !nib.skip(b']') { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + if !nib.depleted() { + return Err(Error::new(ErrorKind::Other, "bad line")); + } + Ok(Task::from_data(data)) +} + +#[cfg(test)] +mod test { + use super::{decode, json_decode, hex_to_unicode, parse_ff4}; + + #[test] + fn test_hex_to_unicode_digits() { + assert_eq!(hex_to_unicode(b"1234"), "\u{1234}"); + } + + #[test] + fn test_hex_to_unicode_lower() { + assert_eq!(hex_to_unicode(b"abcd"), "\u{abcd}"); + } + + #[test] + fn test_hex_to_unicode_upper() { + assert_eq!(hex_to_unicode(b"ABCD"), "\u{abcd}"); + } + + #[test] + fn test_json_decode_no_change() { + assert_eq!(json_decode(b"abcd"), "abcd"); + } + + #[test] + fn test_json_decode_escape_quote() { + assert_eq!(json_decode(b"ab\\\"cd"), "ab\"cd"); + } + + #[test] + fn test_json_decode_escape_backslash() { + assert_eq!(json_decode(b"ab\\\\cd"), "ab\\cd"); + } + + #[test] + fn test_json_decode_escape_frontslash() { + assert_eq!(json_decode(b"ab\\/cd"), "ab/cd"); + } + + #[test] + fn test_json_decode_escape_b() { + assert_eq!(json_decode(b"ab\\bcd"), "ab\x08cd"); + } + + #[test] + fn test_json_decode_escape_f() { + assert_eq!(json_decode(b"ab\\fcd"), "ab\x0ccd"); + } + + #[test] + fn test_json_decode_escape_n() { + assert_eq!(json_decode(b"ab\\ncd"), "ab\ncd"); + } + + #[test] + fn test_json_decode_escape_r() { + assert_eq!(json_decode(b"ab\\rcd"), "ab\rcd"); + } + + #[test] + fn test_json_decode_escape_t() { + assert_eq!(json_decode(b"ab\\tcd"), "ab\tcd"); + } + + #[test] + fn test_json_decode_escape_other() { + assert_eq!(json_decode(b"ab\\xcd"), "ab\\xcd"); + } + + #[test] + fn test_json_decode_escape_eos() { + assert_eq!(json_decode(b"ab\\"), "ab\\"); + } + + #[test] + fn test_json_decode_escape_unicode() { + assert_eq!(json_decode(b"ab\\u1234"), "ab\u{1234}"); + } + + #[test] + fn test_decode_no_change() { + let s = "abcd " efgh &".to_string(); + assert_eq!(decode(s.clone()), s); + } + + #[test] + fn test_decode_multi() { + let s = "abcd &open; efgh &close; &open".to_string(); + assert_eq!(decode(s), "abcd [ efgh ] &open".to_string()); + } + + #[test] + fn test_parse_ff4() { + let task = parse_ff4("[description:\"desc\"]").unwrap(); + assert_eq!(task.description(), "desc"); + } +} diff --git a/src/tdb2/mod.rs b/src/tdb2/mod.rs new file mode 100644 index 000000000..ac15032bb --- /dev/null +++ b/src/tdb2/mod.rs @@ -0,0 +1,14 @@ +mod nibbler; +mod ff4; + +use std::io::{BufRead, Result}; +use super::task::Task; +use self::ff4::parse_ff4; + +pub(super) fn parse(reader: impl BufRead) -> Result> { + let mut tasks = vec![]; + for line in reader.lines() { + tasks.push(parse_ff4(&line?)?); + } + Ok(tasks) +} diff --git a/src/nibbler.rs b/src/tdb2/nibbler.rs similarity index 98% rename from src/nibbler.rs rename to src/tdb2/nibbler.rs index dbc3df920..57e4fe136 100644 --- a/src/nibbler.rs +++ b/src/tdb2/nibbler.rs @@ -1,3 +1,6 @@ +//! A minimal implementation of the "Nibbler" parsing utility from the Taskwarrior +//! source. + pub struct Nibbler<'a> { input: &'a [u8], cursor: usize,