From ae93d91b6ed80ae4ba2c0daf05b6ba9fac50a217 Mon Sep 17 00:00:00 2001 From: "Dustin J. Mitchell" Date: Sun, 2 Feb 2020 18:46:27 -0500 Subject: [PATCH] more CLI compatibility WIP --- src/bin/task.rs | 6 +- src/cli/a2.rs | 273 +++++++++++++++++++++++++++++ src/cli/cli2.rs | 345 +++++++++++++++++++++++++++++++++++++ src/cli/mod.rs | 3 +- src/{cli => util}/lexer.rs | 252 ++++++++++++++++++--------- src/util/mod.rs | 1 + 6 files changed, 798 insertions(+), 82 deletions(-) create mode 100644 src/cli/a2.rs create mode 100644 src/cli/cli2.rs rename src/{cli => util}/lexer.rs (94%) diff --git a/src/bin/task.rs b/src/bin/task.rs index 8149374d5..c2affafda 100644 --- a/src/bin/task.rs +++ b/src/bin/task.rs @@ -11,8 +11,8 @@ fn main() { .about("Replacement for TaskWarrior") .subcommand( SubCommand::with_name("add").about("adds a task").arg( - Arg::with_name("descrpition") - .help("task descrpition") + Arg::with_name("description") + .help("task description") .required(true), ), ) @@ -35,7 +35,7 @@ fn main() { .new_task( uuid, Status::Pending, - matches.value_of("descrpition").unwrap().into(), + matches.value_of("description").unwrap().into(), ) .unwrap(); } diff --git a/src/cli/a2.rs b/src/cli/a2.rs new file mode 100644 index 000000000..0d39cd1a9 --- /dev/null +++ b/src/cli/a2.rs @@ -0,0 +1,273 @@ +//! Re-implementation of TaskWarrior's A2 module. + +use crate::util::lexer::*; +use std::collections::{HashMap, HashSet}; +use std::fmt; + +/// A2 represents a single argument. +#[derive(Clone)] +pub(crate) struct A2 { + pub(crate) lextype: Type, + tags: HashSet, + attributes: HashMap, +} + +impl A2 { + pub(crate) fn new>(raw: S, lextype: Type) -> A2 { + let mut attributes = HashMap::new(); + attributes.insert("raw".into(), raw.into()); + let mut rv = A2 { + lextype, + tags: HashSet::new(), + attributes, + }; + rv.decompose(); + rv + } + + /// Return true if the given tag exists in this argument. + pub(crate) fn has_tag>(&self, tag: S) -> bool { + self.tags.contains(tag.as_ref()) + } + + /// Add the given tag to this argument. + pub(crate) fn tag>(&mut self, tag: S) { + self.tags.insert(tag.into()); + } + + /// Remove the given tag from this argument. + pub(crate) fn untag>(&mut self, tag: S) { + self.tags.remove(tag.as_ref()); + } + + /// Set the given attribute + pub(crate) fn set_attribute, S2: Into>( + &mut self, + name: S1, + value: S2, + ) { + self.attributes.insert(name.into(), value.into()); + } + + /// Get the given attribute + pub(crate) fn get_attribute>(&self, name: S) -> Option<&str> { + self.attributes.get(name.as_ref()).map(|s| s.as_ref()) + } + + /// Get either the canonical or raw form (attribute) + pub(crate) fn get_token(&self) -> &str { + self.attributes + .get("canonical") + .or_else(|| self.attributes.get("raw")) + .unwrap() + .as_ref() + } + + /// Decompose the raw form into tags and attributes based on the lextype: + /// + /// * Tag - + /// - "name" is the tag name + /// - "sign" is the sign (`+` or `-`) + /// * Substitution + /// - "from" is the first part + /// - "to" is the second part + /// - "flags' is the substitution flag, or empty string + /// * Pair + /// - "name" + /// - "modifier" + /// - "separator" + /// - "value" are the parts of the pair (a pair has four parts..?) + /// - tag "RC" is set if the name is "rc" with no modifier + /// - tag "CONFIG" is set if the name is "rc" with a monitor + /// * Pattern + /// - "pattern" is the pattern value + /// - "flags" is the pattern flag, or empty string + /// + /// all other types are left unchanged + pub(crate) fn decompose(&mut self) { + let raw = self.get_attribute("raw").unwrap(); + match self.lextype { + Type::Tag => { + let (sign, name) = (raw[..1].to_string(), raw[1..].to_string()); + self.set_attribute("sign", sign); + self.set_attribute("name", name); + } + Type::Substitution => { + let DecomposedSubstitution { from, to, flags } = + decompose_substitution(raw).unwrap(); + self.set_attribute("from", from); + self.set_attribute("to", to); + self.set_attribute("flags", flags); + } + Type::Pair => { + let DecomposedPair { + name, + modifier, + separator, + value, + } = decompose_pair(raw).unwrap(); + + if &name == "rc" { + if &modifier != "" { + self.tag("CONFIG"); + } else { + self.tag("RC"); + } + } + + self.set_attribute("name", name); + self.set_attribute("modifier", modifier); + self.set_attribute("separator", separator); + self.set_attribute("value", value); + } + Type::Pattern => { + let DecomposedPattern { pattern, flags } = decompose_pattern(raw).unwrap(); + + self.set_attribute("pattern", pattern); + self.set_attribute("flags", flags); + } + _ => (), + } + } +} + +impl fmt::Debug for A2 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "A2{}{:?}", "{", self.lextype)?; + let mut tags = self.tags.iter().collect::>(); + tags.sort(); + for tag in tags { + write!(f, ", {}", tag)?; + } + let mut attributes = self.attributes.iter().collect::>(); + attributes.sort(); + for (name, value) in attributes { + write!(f, ", {}={:?}", name, value)?; + } + write!(f, "{}", "}")?; + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn tags() { + let mut a2 = A2::new("ident", Type::Identifier); + assert!(!a2.has_tag("foo")); + a2.tag("foo"); + assert!(a2.has_tag("foo")); + a2.untag("foo"); + assert!(!a2.has_tag("foo")); + } + + #[test] + fn raw_attribute() { + let a2 = A2::new("ident", Type::Identifier); + assert_eq!(a2.get_attribute("raw"), Some("ident")); + } + + #[test] + fn set_get_attribute() { + let mut a2 = A2::new("ident", Type::Identifier); + assert_eq!(a2.get_attribute("foo"), None); + a2.set_attribute("foo", "bar"); + assert_eq!(a2.get_attribute("foo"), Some("bar")); + a2.set_attribute("foo", "bing"); + assert_eq!(a2.get_attribute("foo"), Some("bing")); + } + + #[test] + fn get_token_raw() { + let a2 = A2::new("ident", Type::Identifier); + assert_eq!(a2.get_token(), "ident"); + } + + #[test] + fn get_token_canonical() { + let mut a2 = A2::new("ident", Type::Identifier); + a2.set_attribute("canonical", "identifier"); + assert_eq!(a2.get_token(), "identifier"); + } + + #[test] + fn decompose_tag() { + let mut a2 = A2::new("+foo", Type::Tag); + a2.decompose(); + assert_eq!(a2.get_attribute("sign"), Some("+")); + assert_eq!(a2.get_attribute("name"), Some("foo")); + } + + #[test] + fn decompose_substitution() { + let mut a2 = A2::new("/foo/bar/g", Type::Substitution); + a2.decompose(); + assert_eq!(a2.get_attribute("from"), Some("foo")); + assert_eq!(a2.get_attribute("to"), Some("bar")); + assert_eq!(a2.get_attribute("flags"), Some("g")); + } + + #[test] + fn decompose_pair() { + let mut a2 = A2::new("thing.foo:bar", Type::Pair); + a2.decompose(); + assert_eq!(a2.get_attribute("name"), Some("thing")); + assert_eq!(a2.get_attribute("modifier"), Some("foo")); + assert_eq!(a2.get_attribute("separator"), Some(":")); + assert_eq!(a2.get_attribute("value"), Some("bar")); + assert!(!a2.has_tag("RC")); + assert!(!a2.has_tag("CONFIG")); + } + + #[test] + fn decompose_pair_rc() { + let mut a2 = A2::new("rc:bar", Type::Pair); + a2.decompose(); + assert_eq!(a2.get_attribute("name"), Some("rc")); + assert_eq!(a2.get_attribute("modifier"), Some("")); + assert_eq!(a2.get_attribute("separator"), Some(":")); + assert_eq!(a2.get_attribute("value"), Some("bar")); + assert!(a2.has_tag("RC")); + assert!(!a2.has_tag("CONFIG")); + } + + #[test] + fn decompose_pair_config() { + let mut a2 = A2::new("rc.foo:bar", Type::Pair); + a2.decompose(); + assert_eq!(a2.get_attribute("name"), Some("rc")); + assert_eq!(a2.get_attribute("modifier"), Some("foo")); + assert_eq!(a2.get_attribute("separator"), Some(":")); + assert_eq!(a2.get_attribute("value"), Some("bar")); + assert!(!a2.has_tag("RC")); + assert!(a2.has_tag("CONFIG")); + } + + #[test] + fn decompose_pattern() { + let mut a2 = A2::new("/foobar/g", Type::Pattern); + a2.decompose(); + assert_eq!(a2.get_attribute("pattern"), Some("foobar")); + assert_eq!(a2.get_attribute("flags"), Some("g")); + } + + #[test] + fn decompose_other() { + let mut a2 = A2::new("123", Type::Number); + a2.decompose(); + assert_eq!(a2.get_attribute("raw"), Some("123")); + } + + #[test] + fn debug() { + let mut a2 = A2::new("/ab/g", Type::Pattern); + a2.decompose(); + a2.tag("FOO"); + assert_eq!( + format!("{:?}", a2), + "A2{Pattern, FOO, flags=\"g\", pattern=\"ab\", raw=\"/ab/g\"}" + ); + } +} diff --git a/src/cli/cli2.rs b/src/cli/cli2.rs new file mode 100644 index 000000000..2415abace --- /dev/null +++ b/src/cli/cli2.rs @@ -0,0 +1,345 @@ +//! Reimplementation of the CLI2 class in TaskWarrior. +//! +//! This class is sparsely tested in TaskWarrior, but the intent is to replicate its functionality +//! reliably enough that any command-line accepted by TaskWarrior will also be accepted by this +//! implementation. + +use super::a2::A2; +use crate::util::lexer::{dequote, read_word_quoted, was_quoted, Lexer, Type}; +use std::collections::{HashMap, HashSet}; + +#[derive(Default)] +pub(crate) struct CLI2 { + entities: HashMap>, + aliases: HashMap, + original_args: Vec, + args: Vec, + id_ranges: Vec<(String, String)>, + uuid_list: Vec, + context_filter_added: bool, +} + +impl CLI2 { + pub(crate) fn new() -> CLI2 { + CLI2 { + ..Default::default() + } + } + + /// Add an alias + pub(crate) fn alias, S2: Into>(&mut self, name: S1, value: S2) { + self.aliases.insert(name.into(), value.into()); + } + + /// Add an entity category thing ?? + pub(crate) fn entity, S2: Into>(&mut self, category: S1, name: S2) { + self.entities + .entry(category.into()) + .or_insert_with(|| HashSet::new()) + .insert(name.into()); + } + + /// Capture a single argument, tagged as ORIGINAL + pub(crate) fn add>(&mut self, argument: S) { + let mut arg = A2::new(argument, Type::Word); + arg.tag("ORIGINAL"); + self.original_args.push(arg); + self.args.clear(); + } + + /// Capture a set of arguments, inserted immediately after the binary. + /// There must be at least one argument set already. The new args are not + /// tagged as ORIGINAL. + /// + /// Note that this is in no way equivalent to calling `add` in a loop! + pub(crate) fn add_args>(&mut self, arguments: Vec) { + let mut replacement = vec![self.original_args[0].clone()]; + for arg in arguments { + replacement.push(A2::new(arg, Type::Word)); + } + for arg in self.original_args.drain(1..) { + replacement.push(arg); + } + self.original_args = replacement; + self.args.clear(); + } + + /// Perform the command-line analysis after arguments are added with `add` and `add_args`. + pub(crate) fn analyze(&mut self) { + self.args.clear(); + self.handle_arg0(); + self.lex_arguments(); + // self.alias_expansion(); - TODO + if !self.find_command() { + self.default_command(); + assert!(self.find_command()); // default_command guarantees this + } + // self.demotion(); - TODO + // self.canonicalizeNames(); - TODO + // self.categorizeArgs(); - TODO + // self.parenthesizeOriginalFilter(); - TODO + } + + /// Handle the first argument, indicating the invoked binary. + fn handle_arg0(&mut self) { + // NOTE: this omits the special handling for "cal" and "calendar" + self.original_args[0].tag("BINARY"); + } + + /// Use the lexer to process all arguments (except the first, handled by handle_arg0). + /// + /// All arguments must be individually and wholly recognized by the Lexer. Any argument not + /// recognized is considered a lexer::Type::Word. + /// + /// As a side effect, tags all arguments after a terminator ('--') with TERMINATED. + fn lex_arguments(&mut self) { + let mut terminated = false; + + // Note: Starts iterating at index 1, because ::handleArg0 has already + // processed it. + for arg in &self.original_args[1..] { + let raw = arg.get_attribute("raw").unwrap(); + let quoted = was_quoted(raw); + + // Process single-token arguments. + let mut lex = Lexer::new(raw); + match lex.token() { + // if we got a token and it goes to EOS (quoted pairs automatically go to EOS) + Some((lexeme, mut lextype)) + if lex.is_eos() || (quoted && lextype == Type::Pair) => + { + if !terminated && lextype == Type::Separator { + terminated = true; + } else if terminated { + lextype = Type::Word; + } + + let mut lexed_arg = A2::new(raw, lextype); + if terminated { + lexed_arg.tag("TERMINATED"); + } + if quoted { + lexed_arg.tag("QUOTED"); + } + if arg.has_tag("ORIGINAL") { + lexed_arg.tag("ORIGINAL"); + } + self.args.push(lexed_arg) + } + // ..otherwise, process "muktiple-token" arguments + _ => { + // TODO: this is kind of insane and almost certainly wrong, but + // implements what the C++ code does.. + let quote = "'"; + let escaped = format!("'{}'", raw.replace(quote, "\\'")); + let mut lexed_arg; + if let Some((word, _)) = read_word_quoted(&escaped, quote, 0) { + let word = dequote(&word, "'\""); + lexed_arg = A2::new(word, Type::Word); + } else { + // "This branch may have no use-case"! + lexed_arg = A2::new(raw, Type::Word); + lexed_arg.tag("UNKNOWN"); + } + if quoted { + lexed_arg.tag("QUOTED"); + } + if arg.has_tag("ORIGINAL") { + lexed_arg.tag("ORIGINAL"); + } + self.args.push(lexed_arg) + } + } + } + /* + println!("lexed args:"); + for arg in &self.args { + println!("{:?}", arg); + } + */ + } + + /// Scan all arguments and if any are an exact match for a command name, then tag as CMD. If an + /// argument is an exact match for an attribute, despite being an inexact match for a command, + /// then it is not a command. + fn find_command(&mut self) -> bool { + for (i, arg) in self.args.iter().enumerate() { + let raw = arg.get_attribute("raw").unwrap(); + let canonical; + + if self.exact_match_entity("cmd", raw) { + canonical = raw.into(); + } else if self.exact_match_entity("attribute", raw) { + continue; + } else if let Some(cannon) = self.canonicalize_entity("cmd", raw) { + canonical = cannon; + } else { + continue; + } + + let mut arg = arg.clone(); + arg.set_attribute("canonical", canonical); + arg.tag("CMD"); + + // TODO: apply "command DNA" + + self.args[i] = arg; + + return true; + } + + false + } + + /// Set a default command argument. Look for situations that require defaults: + /// + /// 1. If no command was found, and no ID/UUID, and if rc.default.command is + /// configured, inject the lexed tokens from rc.default.command. + /// + /// 2. If no command was found, but an ID/UUID was found, then assume a command + /// of 'information'. + fn default_command(&mut self) { + let mut found_command = false; + let mut found_sequence = false; + + for arg in &self.args { + if arg.has_tag("CMD") { + found_command = true; + } + if arg.lextype == Type::Uuid || arg.lextype == Type::Number { + found_sequence = true; + } + } + + if !found_command { + if !found_sequence { + unreachable!(); // TODO (requires default.command, context, etc.) + } else { + let mut info = A2::new("information", Type::Word); + info.tag("ASSUMED"); + info.tag("CMD"); + self.args.insert(0, info); + } + } + } + + /// Search for 'value' in _entities category, return canonicalized value. + fn canonicalize_entity(&self, category: &str, value: &str) -> Option { + // TODO: for the moment this only accepts exact matches + if let Some(names) = self.entities.get(category) { + if names.contains(value) { + Some(value.into()) + } else { + None + } + } else { + None + } + } + + /// Search for exact 'value' in _entities category. + fn exact_match_entity(&self, category: &str, value: &str) -> bool { + if let Some(names) = self.entities.get(category) { + names.contains(value) + } else { + false + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn assert_args(args: &Vec, exp: Vec<&str>) { + assert_eq!( + args.iter().map(|a| format!("{:?}", a)).collect::>(), + exp.iter().map(|s| s.to_string()).collect::>(), + ); + } + + #[test] + fn alias() { + let mut c = CLI2::new(); + c.alias("foo", "bar"); + assert_eq!(c.aliases.get("foo"), Some(&"bar".to_string())); + } + + #[test] + fn entities() { + let mut c = CLI2::new(); + c.entity("cat", "foo"); + c.entity("cat", "bar"); + let mut exp = HashSet::new(); + exp.insert("foo".to_string()); + exp.insert("bar".to_string()); + assert_eq!(c.entities.get("cat"), Some(&exp)); + } + + #[test] + fn add() { + let mut c = CLI2::new(); + c.add("foo"); + c.add("bar"); + assert_eq!( + c.original_args + .iter() + .map(|a| format!("{:?}", a)) + .collect::>(), + vec![ + "A2{Word, ORIGINAL, raw=\"foo\"}", + "A2{Word, ORIGINAL, raw=\"bar\"}" + ] + ); + } + + #[test] + fn add_args() { + let mut c = CLI2::new(); + c.add("0"); + c.add("1"); + c.add("2"); + c.add_args(vec!["foo", "bar"]); + assert_args( + &c.original_args, + vec![ + "A2{Word, ORIGINAL, raw=\"0\"}", + "A2{Word, raw=\"foo\"}", + "A2{Word, raw=\"bar\"}", + "A2{Word, ORIGINAL, raw=\"1\"}", + "A2{Word, ORIGINAL, raw=\"2\"}", + ], + ); + } + + #[test] + fn analyze_example_cmdline() { + let mut c = CLI2::new(); + c.entity("cmd", "next"); + c.add("arg0"); + c.add("rc.gc=0"); + c.add("next"); + c.add("+PENDING"); + c.add("due:tomorrow"); + c.analyze(); + assert_args( + &c.args, + vec![ + "A2{Pair, CONFIG, ORIGINAL, modifier=\"gc\", name=\"rc\", raw=\"rc.gc=0\", separator=\"=\", value=\"0\"}", + "A2{Identifier, CMD, ORIGINAL, canonical=\"next\", raw=\"next\"}", + "A2{Tag, ORIGINAL, name=\"PENDING\", raw=\"+PENDING\", sign=\"+\"}", + "A2{Pair, ORIGINAL, modifier=\"\", name=\"due\", raw=\"due:tomorrow\", separator=\":\", value=\"tomorrow\"}", + ], + ); + } + + #[test] + fn exact_match_entity() { + let mut c = CLI2::new(); + c.entity("cmd", "next"); + c.entity("cmd", "list"); + assert!(c.exact_match_entity("cmd", "next")); + assert!(!c.exact_match_entity("cmd", "bar")); + assert!(!c.exact_match_entity("foo", "bar")); + } +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index a5464ca53..f7b7486d5 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1 +1,2 @@ -mod lexer; +mod a2; +mod cli2; diff --git a/src/cli/lexer.rs b/src/util/lexer.rs similarity index 94% rename from src/cli/lexer.rs rename to src/util/lexer.rs index 9cc9bb237..1e04218dd 100644 --- a/src/cli/lexer.rs +++ b/src/util/lexer.rs @@ -1,9 +1,12 @@ +//! A re-implementation of TaskWarrior's Lexer. +//! +//! This is tested to pass that module's tests, and includes some additional tests that were also +//! verified against that module. + use crate::util::datetime::DateTime; use crate::util::duration::Duration; use std::convert::TryFrom; -// based on src/Lexer.{h,cpp} in the Taskwarrior code - const UUID_PATTERN: &[u8] = b"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; const UUID_MIN_LENGTH: usize = 8; const MINIMUM_MATCH_LEN: usize = 3; @@ -12,7 +15,7 @@ const DATE_SUBELEMENTS: &[&str] = &[ ]; #[derive(PartialEq, Debug, Clone, Copy)] -enum Type { +pub(crate) enum Type { Uuid, Number, Hex, @@ -33,7 +36,7 @@ enum Type { Duration, } -struct Lexer { +pub(crate) struct Lexer { text: String, cursor: usize, eos: usize, @@ -151,11 +154,6 @@ fn is_hard_boundary(left: char, right: char) -> bool { right == '\0' || left == '(' || left == ')' || right == '(' || right == ')' } -/// Returns true if the given string must have been shell-quoted -fn was_quoted(s: &str) -> bool { - s.contains(&[' ', '\t', '(', ')', '<', '>', '&', '~'][..]) -} - fn is_unicode_hex_digit(c: char) -> bool { match c { '0'..='9' | 'a'..='f' | 'A'..='F' => true, @@ -184,7 +182,7 @@ fn hex_to_char(hex: &str) -> Option { /// Strips matching quote symbols from the beginning and end of the given string /// (removing all quotes if given a single quote `'`) -fn dequote<'a, 'b>(s: &'a str, quotes: &'b str) -> &'a str { +pub(crate) fn dequote<'a, 'b>(s: &'a str, quotes: &'b str) -> &'a str { // note that this returns a new ref to the same string, rather // than modifying its argument as the C++ version does. if let Some(first_char) = s.chars().next() { @@ -202,7 +200,7 @@ fn dequote<'a, 'b>(s: &'a str, quotes: &'b str) -> &'a str { s } -fn read_word_quoted(text: &str, quotes: &str, cursor: usize) -> Option<(String, usize)> { +pub(crate) fn read_word_quoted(text: &str, quotes: &str, cursor: usize) -> Option<(String, usize)> { let mut pos = cursor; let mut res = String::new(); let mut skipchars = 0; @@ -281,7 +279,7 @@ fn read_word_quoted(text: &str, quotes: &str, cursor: usize) -> Option<(String, None } -fn read_word_unquoted(text: &str, cursor: usize) -> Option<(String, usize)> { +pub(crate) fn read_word_unquoted(text: &str, cursor: usize) -> Option<(String, usize)> { let mut pos = cursor; let mut res = String::new(); let mut prev = None; @@ -365,12 +363,118 @@ fn common_length(s1: &str, s2: &str) -> usize { .len() } +/// Returns true if the given string must have been shell-quoted +pub(crate) fn was_quoted(s: &str) -> bool { + s.contains(&[' ', '\t', '(', ')', '<', '>', '&', '~'][..]) +} + #[derive(Debug, PartialEq)] -pub struct DecomposedPair { - name: String, - modifier: String, - separator: String, - value: String, +pub(crate) struct DecomposedPair { + pub(crate) name: String, + pub(crate) modifier: String, + pub(crate) separator: String, + pub(crate) value: String, +} + +/// Parse ("decompose") a pair into its constituent parts. This assumes the text is a valid pair +/// string. +pub(crate) fn decompose_pair(text: &str) -> Option { + let npos = usize::max_value(); + let dot = text.find(".").unwrap_or(npos); + let sep_defer = text.find("::").unwrap_or(npos); + let sep_eval = text.find(":=").unwrap_or(npos); + let sep_colon = text.find(":").unwrap_or(npos); + let sep_equal = text.find("=").unwrap_or(npos); + + let (sep, sep_end) = if sep_defer != npos + && sep_defer <= sep_eval + && sep_defer <= sep_colon + && sep_defer <= sep_equal + { + (sep_defer, sep_defer + 2) + } else if sep_eval != npos + && sep_eval <= sep_defer + && sep_eval <= sep_colon + && sep_eval <= sep_equal + { + (sep_eval, sep_eval + 2) + } else if sep_colon != npos + && sep_colon <= sep_defer + && sep_colon <= sep_eval + && sep_colon <= sep_equal + { + (sep_colon, sep_colon + 1) + } else if sep_equal != npos + && sep_equal <= sep_defer + && sep_equal <= sep_eval + && sep_equal <= sep_colon + { + (sep_equal, sep_equal + 1) + } else { + return None; + }; + + let (name, modifier) = if dot != npos && dot < sep { + ( + text.get(0..dot).unwrap().into(), + text.get(dot + 1..sep).unwrap().into(), + ) + } else { + (text.get(0..sep).unwrap().into(), "".into()) + }; + + let separator = text.get(sep..sep_end).unwrap().into(); + let value = text.get(sep_end..).unwrap().into(); + + Some(DecomposedPair { + name, + modifier, + separator, + value, + }) +} + +#[derive(Debug, PartialEq)] +pub(crate) struct DecomposedSubstitution { + pub(crate) from: String, + pub(crate) to: String, + pub(crate) flags: String, +} + +/// Parse ("decompose") a substitution into its constituent parts. This assumes +/// the text is a valid substitution string. +pub(crate) fn decompose_substitution(text: &str) -> Option { + let mut cursor = 0; + if let Some((from, from_curs)) = read_word_quoted(text, "/", cursor) { + cursor = from_curs - 1; + if let Some((to, to_curs)) = read_word_quoted(text, "/", cursor) { + cursor = to_curs; + let from = dequote(&from, "/").into(); + let to = dequote(&to, "/").into(); + let flags = text[cursor..].into(); + return Some(DecomposedSubstitution { from, to, flags }); + } + } + None +} + +#[derive(Debug, PartialEq)] +pub(crate) struct DecomposedPattern { + pub(crate) pattern: String, + pub(crate) flags: String, +} + +/// Parse ("decompose") a pattern into its constituent parts. This assumes the text is a valid +/// pattern string. +pub(crate) fn decompose_pattern(text: &str) -> Option { + let mut cursor = 0; + if let Some((pattern, pattern_curs)) = read_word_quoted(text, "/", cursor) { + cursor = pattern_curs; + let pattern = dequote(&pattern, "/").into(); + let flags = text[cursor..].into(); + return Some(DecomposedPattern { pattern, flags }); + } + None } impl Lexer { @@ -475,65 +579,8 @@ impl Lexer { None } - pub fn decompose_pair(text: &str) -> Option { - let npos = usize::max_value(); - // npos - let dot = text.find(".").unwrap_or(npos); - // npos - let sep_defer = text.find("::").unwrap_or(npos); - // npos - let sep_eval = text.find(":=").unwrap_or(npos); - // 4 - let sep_colon = text.find(":").unwrap_or(npos); - // npos - let sep_equal = text.find("=").unwrap_or(npos); - - let (sep, sep_end) = if sep_defer != npos - && sep_defer <= sep_eval - && sep_defer <= sep_colon - && sep_defer <= sep_equal - { - (sep_defer, sep_defer + 2) - } else if sep_eval != npos - && sep_eval <= sep_defer - && sep_eval <= sep_colon - && sep_eval <= sep_equal - { - (sep_eval, sep_eval + 2) - } else if sep_colon != npos - && sep_colon <= sep_defer - && sep_colon <= sep_eval - && sep_colon <= sep_equal - { - (sep_colon, sep_colon + 1) - } else if sep_equal != npos - && sep_equal <= sep_defer - && sep_equal <= sep_eval - && sep_equal <= sep_colon - { - (sep_equal, sep_equal + 1) - } else { - return None; - }; - - let (name, modifier) = if dot != npos && dot < sep { - ( - text.get(0..dot).unwrap().into(), - text.get(dot + 1..sep).unwrap().into(), - ) - } else { - (text.get(0..sep).unwrap().into(), "".into()) - }; - - let separator = text.get(sep..sep_end).unwrap().into(); - let value = text.get(sep_end..).unwrap().into(); - - Some(DecomposedPair { - name, - modifier, - separator, - value, - }) + pub fn is_eos(&self) -> bool { + self.cursor == self.eos } // recognizers for the `token` method @@ -1195,7 +1242,7 @@ impl Lexer { } } -struct LexerIterator(Lexer); +pub(crate) struct LexerIterator(Lexer); impl Iterator for LexerIterator { type Item = (String, Type); @@ -1319,6 +1366,7 @@ mod test { fn test_token_empty() { let mut l = Lexer::new(""); assert_eq!(l.token(), NONE); + assert!(l.is_eos()); } #[test] @@ -1326,6 +1374,7 @@ mod test { let mut l = Lexer::new( " one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'", ); + assert!(!l.is_eos()); assert_eq!(l.token(), Some((String::from("one"), Type::Identifier))); assert_eq!( l.token(), @@ -1346,6 +1395,7 @@ mod test { assert_eq!(l.token(), Some((String::from("and"), Type::Op))); assert_eq!(l.token(), Some((String::from("'€'"), Type::String))); assert_eq!(l.token(), None); + assert!(l.is_eos()); } #[test] @@ -2193,7 +2243,7 @@ mod test { value ); assert_eq!( - Lexer::decompose_pair(&input), + decompose_pair(&input), Some(DecomposedPair { name: name.into(), modifier: String::from(*modifier), @@ -2206,6 +2256,52 @@ mod test { } } + #[test] + fn decompose_substitution_no_flags() { + assert_eq!( + decompose_substitution("/a/b/"), + Some(DecomposedSubstitution { + from: "a".into(), + to: "b".into(), + flags: "".into(), + }) + ); + } + + #[test] + fn decompose_substitution_flags() { + assert_eq!( + decompose_substitution("/a/b/g"), + Some(DecomposedSubstitution { + from: "a".into(), + to: "b".into(), + flags: "g".into(), + }) + ); + } + + #[test] + fn decompose_pattern_no_flags() { + assert_eq!( + decompose_pattern("/foober/"), + Some(DecomposedPattern { + pattern: "foober".into(), + flags: "".into(), + }) + ); + } + + #[test] + fn decompose_pattern_flags() { + assert_eq!( + decompose_pattern("/foober/g"), + Some(DecomposedPattern { + pattern: "foober".into(), + flags: "g".into(), + }) + ); + } + #[test] fn test_is_one_of() { let mut l = Lexer::new("Grumpy."); diff --git a/src/util/mod.rs b/src/util/mod.rs index efbbc7fb5..a6465d5d7 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,3 +1,4 @@ pub(crate) mod datetime; pub(crate) mod duration; +pub(crate) mod lexer; pub(crate) mod pig;