use crate::util::datetime::DateTime; use crate::util::duration::Duration; use std::convert::TryFrom; // based on src/Lexer.{h,cpp} in the Taskwarrior code const UUID_PATTERN: &[u8] = b"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; const UUID_MIN_LENGTH: usize = 8; const MINIMUM_MATCH_LEN: usize = 3; const DATE_SUBELEMENTS: &[&str] = &[ "year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second", ]; #[derive(PartialEq, Debug, Clone, Copy)] enum Type { Uuid, Number, Hex, String, URL, Pair, Set, Separator, Tag, Path, Substitution, Pattern, Op, DOM, Identifier, Word, Date, Duration, } struct Lexer { text: String, cursor: usize, eos: usize, attributes: Vec, } // TaskWarrior uses some non-standard character definitions, so they are repeated verbatim here, // rather than defaulting to the unicode functions available on the char type. /// Returns true if this character is whitespace, as defined in TaskWarrior's libshared. fn unicode_whitespace(c: char) -> bool { unicode_horizontal_whitespace(c) || unicode_vertical_whitespace(c) } /// Returns true if this character is horizontal whitespace, as defined in TaskWarrior's libshared. fn unicode_horizontal_whitespace(c: char) -> bool { let c: u32 = c.into(); return c == 0x0020 || // space Common Separator, space c == 0x0009 || // Common Other, control HT, Horizontal Tab c == 0x00A0 || // no-break space Common Separator, space c == 0x1680 || // ogham space mark Ogham Separator, space c == 0x180E || // mongolian vowel separator Mongolian Separator, space c == 0x2000 || // en quad Common Separator, space c == 0x2001 || // em quad Common Separator, space c == 0x2002 || // en space Common Separator, space c == 0x2003 || // em space Common Separator, space c == 0x2004 || // three-per-em space Common Separator, space c == 0x2005 || // four-per-em space Common Separator, space c == 0x2006 || // six-per-em space Common Separator, space c == 0x2007 || // figure space Common Separator, space c == 0x2008 || // punctuation space Common Separator, space c == 0x2009 || // thin space Common Separator, space c == 0x200A || // hair space Common Separator, space c == 0x200B || // zero width space c == 0x200C || // zero width non-joiner c == 0x200D || // zero width joiner c == 0x202F || // narrow no-break space Common Separator, space c == 0x205F || // medium mathematical space Common Separator, space c == 0x2060 || // word joiner c == 0x3000; // ideographic space Common Separator, space } /// Returns true if this character is vertical whitespace, as defined in TaskWarrior's libshared. fn unicode_vertical_whitespace(c: char) -> bool { let c: u32 = c.into(); return c == 0x000A || // Common Other, control LF, Line feed c == 0x000B || // Common Other, control VT, Vertical Tab c == 0x000C || // Common Other, control FF, Form feed c == 0x000D || // Common Other, control CR, Carriage return c == 0x0085 || // Common Other, control NEL, Next line c == 0x2028 || // line separator Common Separator, line c == 0x2029; // paragraph separator Common Separator, paragraph } /// Returns true if the given character is an ascii digit fn unicode_latin_digit(c: char) -> bool { c.is_ascii_digit() } /// Returns true if the given character is an ascii letter fn unicode_latin_alpha(c: char) -> bool { c.is_ascii_alphabetic() } /// Replicates the C function of the same name, which only recognizes ASCII printable fn isprint(c: char) -> bool { c.is_ascii_graphic() } /// Returns true if the given character is punctuation. fn is_punctuation(c: char) -> bool { isprint(c) && c != ' ' && c != '@' && c != '#' && c != '$' && c != '_' && !unicode_latin_digit(c) && !unicode_latin_alpha(c) } /// Returns true if this character is an operator fn is_single_char_operator(c: char) -> bool { match c { '+' | '-' | '*' | '/' | '(' | ')' | '<' | '>' | '^' | '!' | '%' | '=' | '~' => true, _ => false, } } /// Returns true if this character can start an identifier fn is_identifier_start(c: char) -> bool { !unicode_whitespace(c) && !unicode_latin_digit(c) && !is_single_char_operator(c) && !is_punctuation(c) } /// Returns true if this character can be in the middle of an identifier fn is_identifier_next(c: char) -> bool { c != ':' && c != '=' && !unicode_whitespace(c) && !is_single_char_operator(c) } /// Returns true if the sequence `` represents a token boundary. fn is_boundary(left: char, right: char) -> bool { right == '\0' || (unicode_latin_alpha(left) != unicode_latin_alpha(right)) || (unicode_latin_digit(left) != unicode_latin_digit(right)) || (unicode_whitespace(left) != unicode_whitespace(right)) || is_punctuation(left) || is_punctuation(right) } /// Returns true if the sequence `` represents a hard token boundary. fn is_hard_boundary(left: char, right: char) -> bool { right == '\0' || left == '(' || left == ')' || right == '(' || right == ')' } /// Returns true if the given string must have been shell-quoted fn was_quoted(s: &str) -> bool { s.contains(&[' ', '\t', '(', ')', '<', '>', '&', '~'][..]) } fn is_unicode_hex_digit(c: char) -> bool { match c { '0'..='9' | 'a'..='f' | 'A'..='F' => true, _ => false, } } fn hex_to_char(hex: &str) -> Option { let mut num = 0u32; for c in hex.chars() { num <<= 4; num += match c { '0'..='9' => c as u32 - '0' as u32, 'a'..='f' => 10 + (c as u32 - 'a' as u32), 'A'..='F' => 10 + (c as u32 - 'A' as u32), _ => return None, } } if let Ok(c) = char::try_from(num) { Some(c) } else { None } } /// Strips matching quote symbols from the beginning and end of the given string /// (removing all quotes if given a single quote `'`) fn dequote<'a, 'b>(s: &'a str, quotes: &'b str) -> &'a str { // note that this returns a new ref to the same string, rather // than modifying its argument as the C++ version does. if let Some(first_char) = s.chars().next() { if let Some(last_char) = s.chars().rev().next() { if first_char == last_char && quotes.contains(first_char) { let quote_len = first_char.len_utf8(); if s.len() > 2 * quote_len { return &s[quote_len..s.len() - quote_len]; } else { return ""; } } } } s } fn read_word_quoted(text: &str, quotes: &str, cursor: usize) -> Option<(String, usize)> { let mut pos = cursor; let mut res = String::new(); let mut skipchars = 0; let mut chars = text.get(cursor..)?.chars(); let quote = chars.next(); if quote.is_none() { return None; } let quote = quote.unwrap(); if !quotes.contains(quote) { return None; } res.push(quote); pos += quote.len_utf8(); for c in chars { if skipchars > 0 { skipchars -= 1; pos += c.len_utf8(); continue; } if c == quote { res.push(c); pos += quote.len_utf8(); return Some((res, pos)); } if c == 'U' { if let Some('+') = text.get(pos + 1..).unwrap().chars().next() { if let Some(hex) = text.get(pos + 2..pos + 6) { if let Some(c) = hex_to_char(hex) { res.push(c); skipchars += 5; } else { res.push('U'); } } else { res.push('U'); } } else { res.push('U'); } } else if c == '\\' { match text.get(pos + 1..).unwrap().chars().next() { None => res.push(c), Some('b') => res.push('\x08'), Some('f') => res.push('\x0c'), Some('n') => res.push('\x0a'), Some('r') => res.push('\x0d'), Some('t') => res.push('\x09'), Some('v') => res.push('\x0b'), Some('u') => { if let Some(hex) = text.get(pos + 2..pos + 6) { if let Some(c) = hex_to_char(hex) { res.push(c); skipchars += 4; } else { res.push('u') } } else { res.push('u') } } Some(c @ _) => res.push(c), } skipchars += 1; } else { res.push(c); } pos += c.len_utf8(); } None } fn read_word_unquoted(text: &str, cursor: usize) -> Option<(String, usize)> { let mut pos = cursor; let mut res = String::new(); let mut prev = None; let mut skipchars = 0; for c in text.get(cursor..)?.chars() { if skipchars > 0 { skipchars -= 1; pos += c.len_utf8(); prev = Some(c); continue; } if unicode_whitespace(c) { break; } if let Some(p) = prev { if is_hard_boundary(p, c) { break; } } if c == 'U' { if let Some('+') = text.get(pos + 1..).unwrap().chars().next() { if let Some(hex) = text.get(pos + 2..pos + 6) { if let Some(c) = hex_to_char(hex) { res.push(c); skipchars += 5; } else { res.push('U'); } } else { res.push('U'); } } else { res.push('U'); } } else if c == '\\' { match text.get(pos + 1..).unwrap().chars().next() { None => res.push(c), Some('b') => res.push('\x08'), Some('f') => res.push('\x0c'), Some('n') => res.push('\x0a'), Some('r') => res.push('\x0d'), Some('t') => res.push('\x09'), Some('v') => res.push('\x0b'), Some('u') => { if let Some(hex) = text.get(pos + 2..pos + 6) { if let Some(c) = hex_to_char(hex) { res.push(c); skipchars += 4; } else { res.push('u') } } else { res.push('u') } } Some(c @ _) => res.push(c), } skipchars += 1; } else { res.push(c); } pos += c.len_utf8(); prev = Some(c); } if pos != cursor { Some((res, pos)) } else { None } } fn common_length(s1: &str, s2: &str) -> usize { s1.chars() .zip(s2.chars()) .take_while(|(c1, c2)| c1 == c2) .collect::>() .len() } #[derive(Debug, PartialEq)] pub struct DecomposedPair { name: String, modifier: String, separator: String, value: String, } impl Lexer { pub fn new>(text: S) -> Lexer { let text = text.into(); let eos = text.len(); Lexer { text, cursor: 0, eos, attributes: vec![], } } pub fn add_attribute>(&mut self, attribute: S) { self.attributes.push(attribute.into()); } /// This static method tokenizes the input, but discards the type information. pub fn split>(text: S) -> Vec { Lexer::new(text).into_iter().map(|(tx, ty)| tx).collect() } pub fn token(&mut self) -> Option<(String, Type)> { // Eat whitespace while let Some(c) = self.text[self.cursor..].chars().next() { if unicode_whitespace(c) { self.cursor += c.len_utf8(); continue; } break; } if self.cursor == self.eos { return None; } // The sequence is specific, and must follow these rules: // - date < duration < uuid < identifier // - dom < uuid // - uuid < hex < number // - url < pair < identifier // - hex < number // - separator < tag < operator // - path < substitution < pattern // - set < number // - word last if let Some(r) = self.is_string("\"'") { return Some(r); } if let Some(r) = self.is_date() { return Some(r); } if let Some(r) = self.is_duration() { return Some(r); } if let Some(r) = self.is_url() { return Some(r); } if let Some(r) = self.is_pair() { return Some(r); } if let Some(r) = self.is_uuid(true) { return Some(r); } if let Some(r) = self.is_set() { return Some(r); } if let Some(r) = self.is_dom() { return Some(r); } if let Some(r) = self.is_hexnumber() { return Some(r); } if let Some(r) = self.is_number() { return Some(r); } if let Some(r) = self.is_separator() { return Some(r); } if let Some(r) = self.is_tag() { return Some(r); } if let Some(r) = self.is_path() { return Some(r); } if let Some(r) = self.is_substitution() { return Some(r); } if let Some(r) = self.is_pattern() { return Some(r); } if let Some(r) = self.is_operator() { return Some(r); } if let Some(r) = self.is_identifier() { return Some(r); } if let Some(r) = self.is_word() { return Some(r); } None } pub fn decompose_pair(text: &str) -> Option { let npos = usize::max_value(); // npos let dot = text.find(".").unwrap_or(npos); // npos let sep_defer = text.find("::").unwrap_or(npos); // npos let sep_eval = text.find(":=").unwrap_or(npos); // 4 let sep_colon = text.find(":").unwrap_or(npos); // npos let sep_equal = text.find("=").unwrap_or(npos); let (sep, sep_end) = if sep_defer != npos && sep_defer <= sep_eval && sep_defer <= sep_colon && sep_defer <= sep_equal { (sep_defer, sep_defer + 2) } else if sep_eval != npos && sep_eval <= sep_defer && sep_eval <= sep_colon && sep_eval <= sep_equal { (sep_eval, sep_eval + 2) } else if sep_colon != npos && sep_colon <= sep_defer && sep_colon <= sep_eval && sep_colon <= sep_equal { (sep_colon, sep_colon + 1) } else if sep_equal != npos && sep_equal <= sep_defer && sep_equal <= sep_eval && sep_equal <= sep_colon { (sep_equal, sep_equal + 1) } else { return None; }; let (name, modifier) = if dot != npos && dot < sep { ( text.get(0..dot).unwrap().into(), text.get(dot + 1..sep).unwrap().into(), ) } else { (text.get(0..sep).unwrap().into(), "".into()) }; let separator = text.get(sep..sep_end).unwrap().into(); let value = text.get(sep_end..).unwrap().into(); Some(DecomposedPair { name, modifier, separator, value, }) } // recognizers for the `token` method fn is_string(&mut self, quotes: &str) -> Option<(String, Type)> { if let Some((s, pos)) = read_word_quoted(&self.text, quotes, self.cursor) { self.cursor = pos; return Some((s, Type::String)); } None } fn is_date(&mut self) -> Option<(String, Type)> { let (_, read) = DateTime::parse(&self.text[self.cursor..], "")?; let token = self.text[self.cursor..self.cursor + read].into(); self.cursor += read; Some((token, Type::Date)) } fn is_duration(&mut self) -> Option<(String, Type)> { let marker = self.cursor; if self.is_operator().is_some() { self.cursor = marker; return None; } let (_, read) = Duration::parse(&self.text[self.cursor..], "")?; let token = self.text[self.cursor..self.cursor + read].into(); self.cursor += read; Some((token, Type::Duration)) } fn is_url(&mut self) -> Option<(String, Type)> { let remainder = &self.text[self.cursor..]; if remainder.starts_with("https://") || remainder.starts_with("http://") { if let Some(i) = remainder.find(unicode_whitespace) { let token = &remainder[..i]; self.cursor += i; return Some((token.into(), Type::URL)); } else { self.cursor = self.eos; return Some((remainder.into(), Type::URL)); } } None } fn is_pair(&mut self) -> Option<(String, Type)> { let marker = self.cursor; if self.is_identifier().is_some() { let separator = &self.text[self.cursor..]; if separator.starts_with("::") || separator.starts_with(":=") { self.cursor += 2; } else if separator.starts_with(":") || separator.starts_with("=") { self.cursor += 1; } else { self.cursor = marker; return None; } // String, word, or nothing are all valid let marker2 = self.cursor; if let Some((word, end)) = read_word_quoted(&self.text[..], "'\"", self.cursor) { self.cursor = end; return Some(( format!("{}{}", &self.text[marker..marker2], word), Type::Pair, )); } if let Some((word, end)) = read_word_unquoted(&self.text[..], self.cursor) { self.cursor = end; return Some(( format!("{}{}", &self.text[marker..marker2], word), Type::Pair, )); } if self.cursor == self.eos || unicode_whitespace(self.text[self.cursor..].chars().next().unwrap()) { return Some((self.text[marker..self.cursor].into(), Type::Pair)); } } self.cursor = marker; None } fn is_uuid(&mut self, end_boundary: bool) -> Option<(String, Type)> { let mut i = 0; for c in self.text[self.cursor..].chars() { if UUID_PATTERN[i] == b'x' { if !is_unicode_hex_digit(c) { break; } } else { if c != '-' { break; } } i += 1; if i >= UUID_PATTERN.len() { break; } } if i < UUID_MIN_LENGTH { return None; } if end_boundary { let c = self.text[self.cursor + i..].chars().next(); if let Some(c) = c { if !unicode_whitespace(c) && !is_single_char_operator(c) { return None; } } } let token = self.text[self.cursor..self.cursor + i].into(); self.cursor += i; Some((token, Type::Uuid)) } fn is_set(&mut self) -> Option<(String, Type)> { let marker = self.cursor; let mut count = 0; loop { if self.is_integer().is_some() { count += 1; if self.is_literal("-", false, false) { if self.is_integer().is_some() { count += 1; } else { self.cursor = marker; return None; } } } else { self.cursor = marker; return None; } if !self.is_literal(",", false, false) { break; } } if count <= 1 { self.cursor = marker; return None; } // -1 is OK here since integers are ASCII let last_char = self.text[self.cursor - 1..].chars().next().unwrap(); // look ahead a bit match self.text[self.cursor..].chars().next() { Some(c) if !unicode_whitespace(c) && !is_hard_boundary(last_char, c) => { self.cursor = marker; return None; } _ => (), } Some((self.text[marker..self.cursor].into(), Type::Set)) } fn is_dom(&mut self) -> Option<(String, Type)> { let marker = self.cursor; // rc. ... if self.is_literal("rc.", false, false) && self.is_word().is_some() { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } else { self.cursor = marker; } // Literals if self.is_one_of( &vec![ "tw.syncneeded", "tw.program", "tw.args", "tw.width", "tw.height", "tw.version", "context.program", "context.args", "context.width", "context.height", "system.version", "system.os", ], false, true, ) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } // Optional: // . // . if self.is_uuid(false).is_some() || self.is_integer().is_some() { if !self.is_literal(".", false, false) { self.cursor = marker; return None; } } // Any failure after this line should rollback to the checkpoint. let checkpoint = self.cursor; // [prefix]tags. if self.is_literal("tags", false, false) && self.is_literal(".", false, false) && self.is_word().is_some() { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } else { self.cursor = checkpoint; } // [prefix]attribute (bounded) // (have to clone here to avoid double-borrowing self let attributes = self.attributes.clone(); if self.is_one_of(&attributes, false, true) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } // [prefix]attribute. (unbounded) if self.is_one_of(&attributes, false, false) { if self.is_literal(".", false, false) { let attribute = &self.text[checkpoint..self.cursor - 1]; // if attribute type is 'date', then it has sub-elements. if attribute == "date" && self.is_one_of(&DATE_SUBELEMENTS, false, true) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } self.cursor = checkpoint; } // Lookahead: ! else if !self.text[marker..] .chars() .next() .map_or(false, |c| unicode_latin_alpha(c)) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } self.cursor = checkpoint; } // [prefix]annotations. if self.is_literal("annotations", true, false) && self.is_literal(".", false, false) { if self.is_literal("count", false, false) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } if self.is_integer().is_some() { if self.is_literal(".", false, false) { if self.is_literal("description", false, true) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } else if self.is_literal("entry", false, true) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } else if self.is_literal("entry", false, false) && self.is_literal(".", false, false) && self.is_one_of(&DATE_SUBELEMENTS, false, true) { return Some((self.text[marker..self.cursor].into(), Type::DOM)); } } } else { self.cursor = checkpoint; } } self.cursor = marker; None } fn is_hexnumber(&mut self) -> Option<(String, Type)> { let remainder = &self.text[self.cursor..]; if !remainder.starts_with("0x") { return None; } let mut end = 2; for (i, c) in remainder[2..].char_indices() { if is_unicode_hex_digit(c) { end = 2 + i + c.len_utf8(); } else { break; } } if end > 2 { self.cursor += end; Some((remainder[..end].into(), Type::Hex)) } else { None } } fn is_number(&mut self) -> Option<(String, Type)> { let remainder = &self.text[self.cursor..]; let mut chars = remainder.char_indices().peekable(); let mut marker = 0; // A hand-rolled regexp. States are as follows: // \d \d* (. \d \d*)? ([eE] [+-]? \d \d* (. \d \d*)?)? // 0 1 2 3 4 5 6 7 8 9 10 11 12 let mut state = 0; loop { let c = match chars.peek() { Some((i, c)) => { marker = *i; Some(*c) } None => None, }; match (state, c) { (0, Some(c)) if unicode_latin_digit(c) => state = 1, (1, Some(c)) if unicode_latin_digit(c) => state = 2, (1, Some(c)) if c == '.' => state = 3, (1, Some(c)) if c == 'e' || c == 'E' => state = 6, (1, _) => break, (2, Some(c)) if unicode_latin_digit(c) => state = 2, (2, Some(c)) if c == '.' => state = 3, (2, Some(c)) if c == 'e' || c == 'E' => state = 6, (2, _) => break, (3, Some(c)) if unicode_latin_digit(c) => state = 4, (3, Some(c)) if c == 'e' || c == 'E' => state = 6, (3, _) => break, (4, Some(c)) if unicode_latin_digit(c) => state = 5, (4, Some(c)) if c == 'e' || c == 'E' => state = 6, (4, _) => break, (5, Some(c)) if unicode_latin_digit(c) => state = 5, (5, Some(c)) if c == 'e' || c == 'E' => state = 6, (5, _) => break, (6, Some(c)) if unicode_latin_digit(c) => state = 8, (6, Some(c)) if c == '-' || c == '+' => state = 7, (6, _) => break, (7, Some(c)) if unicode_latin_digit(c) => state = 8, (7, _) => break, (8, Some(c)) if unicode_latin_digit(c) => state = 9, (8, Some(c)) if c == '.' => state = 10, (8, _) => break, (9, Some(c)) if unicode_latin_digit(c) => state = 9, (9, Some(c)) if c == '.' => state = 10, (9, _) => break, (10, Some(c)) if unicode_latin_digit(c) => state = 11, (10, _) => break, (11, Some(c)) if unicode_latin_digit(c) => state = 11, (11, _) => break, _ => return None, }; if let Some((i, c)) = chars.next() { marker = i + c.len_utf8(); } } // lookahead if let Some((_, c)) = chars.peek() { if !unicode_whitespace(*c) && !is_single_char_operator(*c) { return None; } } self.cursor += marker; Some((remainder[..marker].into(), Type::Number)) } fn is_separator(&mut self) -> Option<(String, Type)> { let next_chars = self .text .get(self.cursor..self.cursor + 2)? .chars() .collect::>(); if &next_chars[..] == &['-', '-'] { self.cursor += 2; return Some(("--".into(), Type::Separator)); } None } fn is_tag(&mut self) -> Option<(String, Type)> { let mut marker = self.cursor; // Lookbehind: Assert ^ or preceded by whitespace, (, or ). if marker > 0 { // if the previous byte is not a valid character, then it's // not ( or ) if let Some(lookbehind) = self.text.get(self.cursor - 1..) { if let Some(c) = lookbehind.chars().next() { if !unicode_whitespace(c) && c != '(' && c != ')' { return None; } } } else { return None; } } let mut chars = self.text[marker..].chars(); if let Some(c) = chars.next() { if c == '+' || c == '-' { marker += c.len_utf8(); if let Some(c) = chars.next() { if is_identifier_start(c) { marker += c.len_utf8(); while let Some(c) = chars.next() { if !is_identifier_next(c) { break; } marker += c.len_utf8(); } let token = self.text[self.cursor..marker].into(); self.cursor = marker; return Some((token, Type::Tag)); } } } } None } fn is_path(&mut self) -> Option<(String, Type)> { let mut marker = self.cursor; let mut slash_count = 0; let mut chars = self.text[self.cursor..].chars().peekable(); loop { if let Some('/') = chars.next() { marker += 1; slash_count += 1; } else { break; } if let Some(c) = chars.next() { if !unicode_whitespace(c) && c != '/' { marker += 1; while let Some(c) = chars.peek() { if !unicode_whitespace(*c) && *c != '/' { marker += 1; chars.next(); } else { break; } } } else { break; } } else { break; } } if marker > self.cursor && slash_count > 3 { let token = self.text[self.cursor..marker].into(); self.cursor = marker; return Some((token, Type::Path)); } None } fn is_substitution(&mut self) -> Option<(String, Type)> { let marker = self.cursor; if let Some((_, end)) = read_word_quoted(&self.text, "/", self.cursor) { // end-1 to step back over the middle `/` if let Some((_, end)) = read_word_quoted(&self.text, "/", end - 1) { let mut remainder = self.text[end..].chars(); return match remainder.next() { None => { self.cursor = end; Some((self.text[marker..self.cursor].into(), Type::Substitution)) } Some('g') => match remainder.next() { None => { self.cursor = end + 1; Some((self.text[marker..self.cursor].into(), Type::Substitution)) } Some(c) if unicode_whitespace(c) => { self.cursor = end + 1; Some((self.text[marker..self.cursor].into(), Type::Substitution)) } _ => None, }, Some(c) if unicode_whitespace(c) => { self.cursor = end; Some((self.text[marker..self.cursor].into(), Type::Substitution)) } _ => None, }; } } None } fn is_pattern(&mut self) -> Option<(String, Type)> { let marker = self.cursor; if let Some((_, end)) = read_word_quoted(&self.text, "/", self.cursor) { if end == self.eos || unicode_whitespace(self.text[end..].chars().next().unwrap()) { self.cursor = end; return Some((self.text[marker..self.cursor].into(), Type::Pattern)); } } None } fn is_operator(&mut self) -> Option<(String, Type)> { let remainder = &self.text[self.cursor..]; // operators that do not require a boundary afterward for strop in &[ // custom stuff "_hastag_", "_notag_", "_neg_", "_pos_", // triple-char "!==", // and, xor below // double-char "==", "!=", "<=", ">=", "||", "&&", "!~", // or below // single-char "+", "-", "*", "/", "(", ")", "<", ">", "^", "!", "%", "=", "~", ] { if remainder.starts_with(strop) { self.cursor += strop.len(); return Some((remainder[..strop.len()].into(), Type::Op)); } } // operators that require a boundary afterward for strop in &["and", "xor", "!==", "or"] { if remainder.starts_with(strop) { if self.cursor + strop.len() == self.eos || is_boundary( remainder[strop.len() - 1..].chars().next().unwrap(), remainder[strop.len()..].chars().next().unwrap(), ) { self.cursor += strop.len(); return Some((remainder[..strop.len()].into(), Type::Op)); } } } None } fn is_identifier(&mut self) -> Option<(String, Type)> { let mut chars = self.text.get(self.cursor..)?.chars(); let start = self.cursor; let mut len = 0; if let Some(c) = chars.next() { if is_identifier_start(c) { len += c.len_utf8(); for c in chars { if !is_identifier_next(c) { break; } len += c.len_utf8(); } self.cursor += len; return Some((self.text.get(start..self.cursor)?.into(), Type::Identifier)); } } None } fn is_word(&mut self) -> Option<(String, Type)> { let mut marker = self.cursor; for c in self.text[self.cursor..].chars() { if unicode_whitespace(c) || is_single_char_operator(c) { break; } marker += c.len_utf8(); } if marker > self.cursor { let token = self.text[self.cursor..marker].into(); self.cursor = marker; return Some((token, Type::Word)); } None } // utilities that may modify self fn is_one_of>( &mut self, options: &[S], allow_abbreviations: bool, end_boundary: bool, ) -> bool { for option in options { if self.is_literal(option.as_ref(), allow_abbreviations, end_boundary) { return true; } } false } fn is_literal(&mut self, literal: &str, allow_abbreviations: bool, end_boundary: bool) -> bool { // calculate the number of common characters between the literal and the string being // parsed let common = common_length(literal, &self.text[self.cursor..]); // Without abbreviations, common must equal literal length. if !allow_abbreviations && common < literal.len() { return false; } if allow_abbreviations && common < MINIMUM_MATCH_LEN { return false; } if end_boundary { let c = self.text[self.cursor + common..].chars().next(); if let Some(c) = c { if !unicode_whitespace(c) && !is_single_char_operator(c) { return false; } } } self.cursor += common; true } fn is_integer(&mut self) -> Option<(String, Type)> { let mut marker = self.cursor; for c in self.text[self.cursor..].chars() { if !unicode_latin_digit(c) { break; } marker += c.len_utf8(); } if marker > self.cursor { let token = self.text[self.cursor..marker].into(); self.cursor = marker; return Some((token, Type::Number)); } None } } struct LexerIterator(Lexer); impl Iterator for LexerIterator { type Item = (String, Type); fn next(&mut self) -> Option { self.0.token() } } impl IntoIterator for Lexer { type Item = (String, Type); type IntoIter = LexerIterator; fn into_iter(self) -> Self::IntoIter { LexerIterator(self) } } #[cfg(test)] mod test { use super::*; const NONE: Option<(String, Type)> = None; #[test] fn test_is_punctuation_comma() { assert!(is_punctuation(',')); } #[test] fn test_is_punctuation_slash() { assert!(is_punctuation('/')); } #[test] fn test_is_punctuation_at() { assert!(!is_punctuation('@')); } #[test] fn test_is_punctuation_hash() { assert!(!is_punctuation('#')); } #[test] fn test_is_punctuation_dollar() { assert!(!is_punctuation('$')); } #[test] fn test_is_punctuation_underscore() { assert!(!is_punctuation('_')); } #[test] fn test_is_punctuation_space() { assert!(!is_punctuation(' ')); } #[test] fn test_is_punctuation_a() { assert!(!is_punctuation('a')); } #[test] fn test_is_punctuation_9() { assert!(!is_punctuation('9')); } #[test] fn test_is_punctuation_latin() { assert!(!is_punctuation('é')); } #[test] fn test_is_punctuation_euro() { assert!(!is_punctuation('€')); } #[test] fn test_is_punctuation_smile() { assert!(!is_punctuation('☺')); } #[test] fn test_is_punctuation_numeric() { assert!(!is_punctuation('¾')); } #[test] fn test_is_boundary() { assert!(is_boundary(' ', 'a')); assert!(is_boundary('a', ' ')); assert!(is_boundary(' ', '+')); assert!(is_boundary(' ', ',')); assert!(!is_boundary('3', '4')); assert!(is_boundary('(', '(')); assert!(!is_boundary('r', 'd')); } #[test] fn test_was_quoted() { assert!(!was_quoted("")); assert!(!was_quoted("foo")); assert!(was_quoted("a b")); assert!(was_quoted("(a)")); } #[test] fn test_dequote() { assert_eq!(dequote("foo", "'\""), "foo"); assert_eq!(dequote("'foo'", "'\""), "foo"); assert_eq!(dequote("\"foo\"", "'\""), "foo"); assert_eq!(dequote("'o\\'clock'", "'\""), "o\\'clock"); // single quote char assert_eq!(dequote("'", "'\""), ""); // multibyte quote char assert_eq!(dequote("éo\\'clocké", "é"), "o\\'clock"); } #[test] fn test_token_empty() { let mut l = Lexer::new(""); assert_eq!(l.token(), NONE); } #[test] fn test_token_tokens() { let mut l = Lexer::new( " one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'", ); assert_eq!(l.token(), Some((String::from("one"), Type::Identifier))); assert_eq!( l.token(), Some((String::from("'two 'three''"), Type::String)) ); assert_eq!(l.token(), Some((String::from("+"), Type::Op))); assert_eq!(l.token(), Some((String::from("456"), Type::Number))); assert_eq!(l.token(), Some((String::from("-"), Type::Op))); assert_eq!(l.token(), Some((String::from("("), Type::Op))); assert_eq!(l.token(), Some((String::from("1.3"), Type::Number))); assert_eq!(l.token(), Some((String::from("*"), Type::Op))); assert_eq!(l.token(), Some((String::from("2"), Type::Number))); assert_eq!(l.token(), Some((String::from("-"), Type::Op))); assert_eq!(l.token(), Some((String::from("0x12"), Type::Hex))); assert_eq!(l.token(), Some((String::from(")"), Type::Op))); assert_eq!(l.token(), Some((String::from("1.2e-3.4"), Type::Number))); assert_eq!(l.token(), Some((String::from("foo.bar"), Type::Identifier))); assert_eq!(l.token(), Some((String::from("and"), Type::Op))); assert_eq!(l.token(), Some((String::from("'€'"), Type::String))); assert_eq!(l.token(), None); } #[test] fn test_token_short_numbers() { let mut l = Lexer::new("1 12 123 1234 12345 123456 1234567 123.45e 12.34e+"); assert_eq!(l.token(), Some((String::from("1"), Type::Number))); assert_eq!(l.token(), Some((String::from("12"), Type::Number))); assert_eq!(l.token(), Some((String::from("123"), Type::Number))); assert_eq!(l.token(), Some((String::from("1234"), Type::Number))); assert_eq!(l.token(), Some((String::from("12345"), Type::Number))); assert_eq!(l.token(), Some((String::from("123456"), Type::Number))); assert_eq!(l.token(), Some((String::from("1234567"), Type::Number))); assert_eq!(l.token(), Some((String::from("123.45e"), Type::Number))); assert_eq!(l.token(), Some((String::from("12.34e+"), Type::Number))); assert_eq!(l.token(), None); } #[test] fn test_read_word_quoted_simple() { assert_eq!( read_word_quoted("'one two'", "'\"", 0), Some((String::from("'one two'"), 9)) ); } #[test] fn test_read_word_quoted_unterminated() { assert_eq!( read_word_quoted("'one two", "'\"", 0), None as Option<(String, usize)> ); } #[test] fn test_read_word_quoted_backslash_u() { assert_eq!( read_word_quoted("'pay \\u20a43'", "'\"", 0), Some((String::from("'pay ₤3'"), 13)) ); } #[test] fn test_read_word_quoted_u_plus() { assert_eq!( read_word_quoted("\"pay U+20AC5\"", "'\"", 0), Some((String::from("\"pay €5\""), 13)) ); } #[test] fn test_read_word_unquoted_simple() { assert_eq!( read_word_unquoted("input", 0), Some((String::from("input"), 5)) ); } #[test] fn test_read_word_unquoted_escaped_space() { assert_eq!( read_word_unquoted("one\\ two", 0), Some((String::from("one two"), 8)) ); } #[test] fn test_read_word_unquoted_escaped_quote() { assert_eq!( read_word_unquoted("one\\\"two", 0), Some((String::from("one\"two"), 8)) ); } #[test] fn test_read_word_unquoted_escaped_newline() { assert_eq!( read_word_unquoted("one\\ntwo", 0), Some((String::from("one\x0atwo"), 8)) ); } #[test] fn test_read_word_unquoted_escaped_backslash_u() { assert_eq!( read_word_unquoted("pay\\u20a43", 0), Some((String::from("pay₤3"), 10)) ); } #[test] fn test_read_word_unquoted_incomplete_escaped_backslash_u() { assert_eq!( read_word_unquoted("\\u203", 0), Some((String::from("u203"), 5)) ); } #[test] fn test_read_word_unquoted_nonhex_escaped_backslash_u() { assert_eq!( read_word_unquoted("\\u2fghk", 0), Some((String::from("u2fghk"), 7)) ); } #[test] fn test_read_word_unquoted_escaped_u_plus() { assert_eq!( read_word_unquoted("payU+20AC4", 0), Some((String::from("pay€4"), 10)) ); } #[test] fn test_read_word_unquoted_incomplete_u_plus() { assert_eq!( read_word_unquoted("U+20A", 0), Some((String::from("U+20A"), 5)) ); } #[test] fn test_read_word_trailing_whitespace() { assert_eq!( read_word_unquoted("one ", 0), Some((String::from("one"), 3)) ); } #[test] fn test_read_word_unquoted_several_words() { let text = "one 'two' three\\ four"; assert_eq!(read_word_unquoted(text, 0), Some((String::from("one"), 3))); assert_eq!( read_word_unquoted(text, 4), Some((String::from("'two'"), 9)) ); assert_eq!( read_word_unquoted(text, 10), Some((String::from("three four"), 21)) ); } #[test] fn test_common_length_empty() { assert_eq!(common_length("", ""), 0); } #[test] fn test_common_length_match_one() { assert_eq!(common_length("a", "a"), 1); } #[test] fn test_common_length_match_longer() { assert_eq!(common_length("abcde", "abcde"), 5); } #[test] fn test_common_length_match_s2_short() { assert_eq!(common_length("abc", ""), 0); } #[test] fn test_common_length_match_differ() { assert_eq!(common_length("abc", "def"), 0); } #[test] fn test_common_length_match_s2_prefix() { assert_eq!(common_length("foobar", "foo"), 3); } #[test] fn test_common_length_match_s1_prefix() { assert_eq!(common_length("foo", "foobar"), 3); } #[test] fn test_is_string() { let mut l = Lexer::new("'one'"); assert_eq!(l.is_string("'\""), Some(("'one'".into(), Type::String))); assert_eq!(l.cursor, 5); } #[test] fn test_is_string_negative() { let mut l = Lexer::new("one"); assert_eq!(l.is_string("'\""), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_string_empty() { let mut l = Lexer::new("''"); assert_eq!(l.is_string("'\""), Some(("''".into(), Type::String))); assert_eq!(l.cursor, 2); } #[test] fn test_is_string_escape() { let mut l = Lexer::new("'one\ttwo'"); assert_eq!( l.is_string("'\""), Some(("'one\ttwo'".into(), Type::String)) ); assert_eq!(l.cursor, 9); } #[test] fn test_is_date_year_eos() { let mut l = Lexer::new("2015"); assert_eq!(l.is_date(), Some(("2015".into(), Type::Date))); assert_eq!(l.cursor, 4); } #[test] fn test_is_date_epoch() { let mut l = Lexer::new("315532800"); assert_eq!(l.is_date(), Some(("315532800".into(), Type::Date))); assert_eq!(l.cursor, 9); } #[test] fn test_is_date_year_ws() { let mut l = Lexer::new("2015 "); assert_eq!(l.is_date(), Some(("2015".into(), Type::Date))); assert_eq!(l.cursor, 4); } #[test] fn test_is_date_year_ident() { let mut l = Lexer::new("2015abc"); assert_eq!(l.is_date(), Some(("2015".into(), Type::Date))); assert_eq!(l.cursor, 4); } #[test] fn test_is_date_year_plus() { let mut l = Lexer::new("2015+"); assert_eq!(l.is_date(), Some(("2015".into(), Type::Date))); assert_eq!(l.cursor, 4); } #[test] fn test_is_date_year_minus() { let mut l = Lexer::new("2015-xyz"); assert_eq!(l.is_date(), Some(("2015-".into(), Type::Date))); assert_eq!(l.cursor, 5); } #[test] fn test_is_duration_1w() { let mut l = Lexer::new("1w"); assert_eq!(l.is_duration(), Some(("1w".into(), Type::Duration))); assert_eq!(l.cursor, 2); } #[test] fn test_is_duration_op() { let mut l = Lexer::new("!!"); assert_eq!(l.is_duration(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_number_digit() { let mut l = Lexer::new("3"); assert_eq!(l.is_number(), Some(("3".into(), Type::Number))); assert_eq!(l.cursor, 1); } #[test] fn test_is_number_integer() { let mut l = Lexer::new("13"); assert_eq!(l.is_number(), Some(("13".into(), Type::Number))); assert_eq!(l.cursor, 2); } #[test] fn test_is_number_trailing_minus() { let mut l = Lexer::new("13-"); assert_eq!(l.is_number(), Some(("13".into(), Type::Number))); assert_eq!(l.cursor, 2); } #[test] fn test_is_number_decimal() { let mut l = Lexer::new("1.3"); assert_eq!(l.is_number(), Some(("1.3".into(), Type::Number))); assert_eq!(l.cursor, 3); } #[test] fn test_is_number_multiple_decimal() { let mut l = Lexer::new("1.3.4"); assert_eq!(l.is_number(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_number_decimal_no_digits() { let mut l = Lexer::new("1."); assert_eq!(l.is_number(), Some(("1.".into(), Type::Number))); assert_eq!(l.cursor, 2); } #[test] fn test_is_number_decimal_multi_digit() { let mut l = Lexer::new("12.32"); assert_eq!(l.is_number(), Some(("12.32".into(), Type::Number))); assert_eq!(l.cursor, 5); } #[test] fn test_is_number_decimal_e_no_exponent() { let mut l = Lexer::new("12.32e"); assert_eq!(l.is_number(), Some(("12.32e".into(), Type::Number))); assert_eq!(l.cursor, 6); } #[test] fn test_is_number_decimal_e_plus_no_exponent() { let mut l = Lexer::new("12.32e+"); assert_eq!(l.is_number(), Some(("12.32e+".into(), Type::Number))); assert_eq!(l.cursor, 7); } #[test] fn test_is_number_decimal_e_integer_exponent() { let mut l = Lexer::new("12.32e-12"); assert_eq!(l.is_number(), Some(("12.32e-12".into(), Type::Number))); assert_eq!(l.cursor, 9); } #[test] fn test_is_number_decimal_e_decimal_exponent() { let mut l = Lexer::new("12.32e12.34"); assert_eq!(l.is_number(), Some(("12.32e12.34".into(), Type::Number))); assert_eq!(l.cursor, 11); } #[test] fn test_is_number_integer_invalid_lookahead() { let mut l = Lexer::new("13a"); assert_eq!(l.is_number(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_set_singletons() { let mut l = Lexer::new("12,13"); assert_eq!(l.is_set(), Some(("12,13".into(), Type::Set))); assert_eq!(l.cursor, 5); } #[test] fn test_is_set_ranges() { let mut l = Lexer::new("12-13,19-200"); assert_eq!(l.is_set(), Some(("12-13,19-200".into(), Type::Set))); assert_eq!(l.cursor, 12); } #[test] fn test_is_set_double_comma() { let mut l = Lexer::new("12-13,,19-200"); assert_eq!(l.is_set(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_set_trailing_comma() { let mut l = Lexer::new("12-13,"); assert_eq!(l.is_set(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_set_trailing_ws() { let mut l = Lexer::new("12-13 "); assert_eq!(l.is_set(), Some(("12-13".into(), Type::Set))); assert_eq!(l.cursor, 5); } #[test] fn test_is_set_trailing_non_hard_boundary() { let mut l = Lexer::new("12-13abc"); assert_eq!(l.is_set(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_separator() { let mut l = Lexer::new(" -- "); l.cursor = 2; assert_eq!(l.is_separator(), Some(("--".into(), Type::Separator))); assert_eq!(l.cursor, 4); } #[test] fn test_is_separator_negative() { let mut l = Lexer::new("- "); assert_eq!(l.is_separator(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_tag_plus() { let mut l = Lexer::new("+foo"); assert_eq!(l.is_tag(), Some(("+foo".into(), Type::Tag))); assert_eq!(l.cursor, 4); } #[test] fn test_is_tag_not_after_whitespace() { let mut l = Lexer::new("x+y"); l.cursor = 1; assert_eq!(l.is_tag(), NONE); assert_eq!(l.cursor, 1); } #[test] fn test_is_tag_after_whitespace() { let mut l = Lexer::new(" +y"); l.cursor = 1; assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag))); assert_eq!(l.cursor, 3); } #[test] fn test_is_tag_after_lparen() { let mut l = Lexer::new("(+y"); l.cursor = 1; assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag))); assert_eq!(l.cursor, 3); } #[test] fn test_is_tag_after_rparen() { let mut l = Lexer::new(")+y"); l.cursor = 1; assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag))); assert_eq!(l.cursor, 3); } #[test] fn test_is_tag_after_multibyte_char() { let mut l = Lexer::new("€+y"); l.cursor = 3; assert_eq!(l.is_tag(), NONE); assert_eq!(l.cursor, 3); } #[test] fn test_is_url_http() { let mut l = Lexer::new("http://foo.com/bar"); assert_eq!(l.is_url(), Some(("http://foo.com/bar".into(), Type::URL))); assert_eq!(l.cursor, 18); } #[test] fn test_is_url_https() { let mut l = Lexer::new("https://foo.com/bar"); assert_eq!(l.is_url(), Some(("https://foo.com/bar".into(), Type::URL))); assert_eq!(l.cursor, 19); } #[test] fn test_is_url_ws() { let mut l = Lexer::new("https://foo.com/bar "); assert_eq!(l.is_url(), Some(("https://foo.com/bar".into(), Type::URL))); assert_eq!(l.cursor, 19); } #[test] fn test_is_url_with_ops() { let mut l = Lexer::new("https://foo.com/bar()+-~"); assert_eq!( l.is_url(), Some(("https://foo.com/bar()+-~".into(), Type::URL)) ); assert_eq!(l.cursor, 24); } #[test] fn test_is_url_negative() { let mut l = Lexer::new("file://foo.com/bar"); assert_eq!(l.is_url(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_pair_double_colon() { let mut l = Lexer::new("foo::bar "); assert_eq!(l.is_pair(), Some(("foo::bar".into(), Type::Pair))); assert_eq!(l.cursor, 8); } #[test] fn test_is_pair_colon_eq() { let mut l = Lexer::new("foo:=bar "); assert_eq!(l.is_pair(), Some(("foo:=bar".into(), Type::Pair))); assert_eq!(l.cursor, 8); } #[test] fn test_is_pair_colon() { let mut l = Lexer::new("foo:bar "); assert_eq!(l.is_pair(), Some(("foo:bar".into(), Type::Pair))); assert_eq!(l.cursor, 7); } #[test] fn test_is_pair_equal() { let mut l = Lexer::new("foo=bar"); assert_eq!(l.is_pair(), Some(("foo=bar".into(), Type::Pair))); assert_eq!(l.cursor, 7); } #[test] fn test_is_pair_quoted() { let mut l = Lexer::new("foo='abc def'"); assert_eq!(l.is_pair(), Some(("foo='abc def'".into(), Type::Pair))); assert_eq!(l.cursor, 13); } #[test] fn test_is_pair_quoted_escapes() { let mut l = Lexer::new("foo='abc\\u20acdef'"); assert_eq!(l.is_pair(), Some(("foo='abc€def'".into(), Type::Pair))); assert_eq!(l.cursor, 18); } #[test] fn test_is_uuid_long_eof() { let u = "ffffffff-ffff-ffff-ffff-ffffffffff"; let mut l = Lexer::new(u); assert_eq!(l.is_uuid(true), Some((u.into(), Type::Uuid))); assert_eq!(l.cursor, 34); } #[test] fn test_is_uuid_long_ws() { let u = "ffffffff-ffff-ffff-ffff-ffffffffff kjdf"; let mut l = Lexer::new(u); assert_eq!(l.is_uuid(true), Some((u[..34].into(), Type::Uuid))); assert_eq!(l.cursor, 34); } #[test] fn test_is_uuid_long_op() { let u = "ffffffff-ffff-ffff-ffff-ffffffffff+"; let mut l = Lexer::new(u); assert_eq!(l.is_uuid(true), Some((u[..34].into(), Type::Uuid))); assert_eq!(l.cursor, 34); } #[test] fn test_is_uuid_long_bad_boundary() { let u = "ffffffff-ffff-ffff-ffff-ffffffffff_"; let mut l = Lexer::new(u); assert_eq!(l.is_uuid(true), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_uuid_long_bad_boundary_ignored() { let u = "ffffffff-ffff-ffff-ffff-ffffffffff_"; let mut l = Lexer::new(u); assert_eq!(l.is_uuid(false), Some((u[..34].into(), Type::Uuid))); assert_eq!(l.cursor, 34); } #[test] fn test_is_uuid_too_short() { let u = "ffffff"; let mut l = Lexer::new(u); assert_eq!(l.is_uuid(true), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_path_simple() { let mut l = Lexer::new("/path/to/a/file"); assert_eq!(l.is_path(), Some(("/path/to/a/file".into(), Type::Path))); assert_eq!(l.cursor, 15); } #[test] fn test_is_path_too_short() { let mut l = Lexer::new("/a/file"); assert_eq!(l.is_path(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_path_trailing_slash() { let mut l = Lexer::new("/path/to/a/dir/"); assert_eq!(l.is_path(), Some(("/path/to/a/dir/".into(), Type::Path))); assert_eq!(l.cursor, 15); } #[test] fn test_is_path_double_slash() { let mut l = Lexer::new("/a//file"); assert_eq!(l.is_path(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_path_no_initial_slash() { let mut l = Lexer::new("a/path/to/a/file"); assert_eq!(l.is_path(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_substitution_simple() { let mut l = Lexer::new("/foo/bar/"); assert_eq!( l.is_substitution(), Some(("/foo/bar/".into(), Type::Substitution)) ); assert_eq!(l.cursor, 9); } #[test] fn test_is_substitution_simple_ws() { let mut l = Lexer::new("/foo/bar/ "); assert_eq!( l.is_substitution(), Some(("/foo/bar/".into(), Type::Substitution)) ); assert_eq!(l.cursor, 9); } #[test] fn test_is_substitution_simple_g() { let mut l = Lexer::new("/foo/bar/g"); assert_eq!( l.is_substitution(), Some(("/foo/bar/g".into(), Type::Substitution)) ); assert_eq!(l.cursor, 10); } #[test] fn test_is_substitution_simple_g_ws() { let mut l = Lexer::new("/foo/bar/g "); assert_eq!( l.is_substitution(), Some(("/foo/bar/g".into(), Type::Substitution)) ); assert_eq!(l.cursor, 10); } #[test] fn test_is_substitution_simple_not_g() { let mut l = Lexer::new("/foo/bar/h"); assert_eq!(l.is_substitution(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_substitution_simple_not_g_op() { let mut l = Lexer::new("/foo/bar/+"); assert_eq!(l.is_substitution(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_substitution_simple_g_but_not_ws() { let mut l = Lexer::new("/foo/bar/ghi"); assert_eq!(l.is_substitution(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_pattern_simple() { let mut l = Lexer::new("/foo/"); assert_eq!(l.is_pattern(), Some(("/foo/".into(), Type::Pattern))); assert_eq!(l.cursor, 5); } #[test] fn test_is_pattern_escaped() { let mut l = Lexer::new("/f\\u20A4o/"); assert_eq!(l.is_pattern(), Some(("/f\\u20A4o/".into(), Type::Pattern))); assert_eq!(l.cursor, 10); } #[test] fn test_is_pattern_simple_trailing_ws() { let mut l = Lexer::new("/foo/\n\t"); assert_eq!(l.is_pattern(), Some(("/foo/".into(), Type::Pattern))); assert_eq!(l.cursor, 5); } #[test] fn test_is_operator_hastag() { let mut l = Lexer::new("_hastag_"); assert_eq!(l.is_operator(), Some(("_hastag_".into(), Type::Op))); } #[test] fn test_is_operator_notag() { let mut l = Lexer::new("_notag_"); assert_eq!(l.is_operator(), Some(("_notag_".into(), Type::Op))); } #[test] fn test_is_operator_neg() { let mut l = Lexer::new("_neg_"); assert_eq!(l.is_operator(), Some(("_neg_".into(), Type::Op))); } #[test] fn test_is_operator_xor() { let mut l = Lexer::new("xor"); assert_eq!(l.is_operator(), Some(("xor".into(), Type::Op))); } #[test] fn test_is_identifier_empty() { let mut l = Lexer::new(""); assert_eq!(l.is_identifier(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_identifier_multibyte_nonpunct_first_char() { let mut l = Lexer::new("☺"); assert_eq!(l.is_identifier(), Some(("☺".into(), Type::Identifier))); assert_eq!(l.cursor, 3); } #[test] fn test_is_identifier_bad_first_char() { let mut l = Lexer::new("1abc"); assert_eq!(l.is_identifier(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_identifier_bad_next_char() { let mut l = Lexer::new("a:bc"); assert_eq!(l.is_identifier(), Some(("a".into(), Type::Identifier))); assert_eq!(l.cursor, 1); } #[test] fn test_is_identifier_ok() { let mut l = Lexer::new("abc"); assert_eq!(l.is_identifier(), Some(("abc".into(), Type::Identifier))); assert_eq!(l.cursor, 3); } #[test] fn test_is_word_no() { let mut l = Lexer::new("+"); assert!(l.is_word().is_none()); assert_eq!(l.cursor, 0); } #[test] fn test_is_word_pending() { let mut l = Lexer::new("foo.PENDING"); l.cursor = 4; assert_eq!(l.is_word(), Some(("PENDING".into(), Type::Word))); assert_eq!(l.cursor, 11); } #[test] fn test_is_word_to_eof() { let mut l = Lexer::new("abc"); assert_eq!(l.is_word(), Some(("abc".into(), Type::Word))); assert_eq!(l.cursor, 3); } #[test] fn test_is_word_nonzero_start() { let mut l = Lexer::new("--abc"); l.cursor = 2; assert_eq!(l.is_word(), Some(("abc".into(), Type::Word))); assert_eq!(l.cursor, 5); } #[test] fn test_is_word_to_ws() { let mut l = Lexer::new("abc def"); assert_eq!(l.is_word(), Some(("abc".into(), Type::Word))); assert_eq!(l.cursor, 3); } #[test] fn test_is_word_to_op() { let mut l = Lexer::new("abc*def"); assert_eq!(l.is_word(), Some(("abc".into(), Type::Word))); assert_eq!(l.cursor, 3); } #[test] fn test_split_simple() { assert_eq!( Lexer::split(" ( A or B ) "), vec![ String::from("("), String::from("A"), String::from("or"), String::from("B"), String::from(")"), ] ); } #[test] fn test_split_confusing() { assert_eq!( Lexer::split(" +-* a+b 12.3e4 'c d'"), vec![ String::from("+"), String::from("-"), String::from("*"), String::from("a"), String::from("+"), String::from("b"), String::from("12.3e4"), String::from("'c d'"), ] ); } #[test] fn test_decompose_pair_combos() { let name = "name"; for modifier in ["", "mod"].iter() { for separator in [":", "=", "::", ":="].iter() { for value in ["", "value", "a:b", "a::b", "a=b", "a:=b"].iter() { let input = format!( "{}{}{}{}{}", name, if modifier.len() > 0 { "." } else { "" }, modifier, separator, value ); assert_eq!( Lexer::decompose_pair(&input), Some(DecomposedPair { name: name.into(), modifier: String::from(*modifier), separator: String::from(*separator), value: String::from(*value), }) ); } } } } #[test] fn test_is_one_of() { let mut l = Lexer::new("Grumpy."); let dwarves = vec![ "Sneezy", "Doc", "Bashful", "Grumpy", "Happy", "Sleepy", "Dopey", ]; assert!(!l.is_one_of(&dwarves, false, true)); assert_eq!(l.cursor, 0); assert!(l.is_one_of(&dwarves, false, false)); assert_eq!(l.cursor, 6); } #[test] fn test_is_integer_negative() { let mut l = Lexer::new("one"); assert_eq!(l.is_integer(), NONE); assert_eq!(l.cursor, 0); } #[test] fn test_is_integer_positive() { let mut l = Lexer::new("123"); assert_eq!(l.is_integer(), Some(("123".into(), Type::Number))); assert_eq!(l.cursor, 3); } #[test] fn test_is_integer_trailing_dot() { let mut l = Lexer::new("123.foo"); assert_eq!(l.is_integer(), Some(("123".into(), Type::Number))); assert_eq!(l.cursor, 3); } #[test] fn test_is_integer_not_at_start() { let mut l = Lexer::new("abc.123.foo"); l.cursor = 4; assert_eq!(l.is_integer(), Some(("123".into(), Type::Number))); assert_eq!(l.cursor, 7); } #[test] fn test_is_literal_no_match() { let mut l = Lexer::new("one.two"); assert!(!l.is_literal("zero", false, false)); assert_eq!(l.cursor, 0); } #[test] fn test_is_literal_multi() { let mut l = Lexer::new("one.two"); assert!(l.is_literal("one", false, false)); assert_eq!(l.cursor, 3); assert!(l.is_literal(".", false, false)); assert_eq!(l.cursor, 4); assert!(l.is_literal("two", false, true)); assert_eq!(l.cursor, 7); } #[test] fn test_is_literal_abbrev() { let mut l = Lexer::new("wonder"); assert!(!l.is_literal("wonderful", false, false)); assert_eq!(l.cursor, 0); assert!(l.is_literal("wonderful", true, false)); assert_eq!(l.cursor, 6); } mod integ { use super::super::*; fn lexer_test(input: &str, expected: Vec<(&str, Type)>) { // isolated case.. let mut lexer = Lexer::new(input); lexer.add_attribute("due"); lexer.add_attribute("tags"); lexer.add_attribute("description"); let got: Vec<_> = lexer.into_iter().collect(); let got_strs: Vec<_> = got.iter().map(|(s, t)| (s.as_ref(), *t)).collect(); assert_eq!(got_strs, expected); // embedded case.. let mut lexer = Lexer::new(format!(" {} ", input)); lexer.add_attribute("due"); lexer.add_attribute("tags"); lexer.add_attribute("description"); let got: Vec<_> = lexer.into_iter().collect(); let got_strs: Vec<_> = got.iter().map(|(s, t)| (s.as_ref(), *t)).collect(); assert_eq!(got_strs, expected); } #[test] fn test_pattern_foo() { lexer_test("/foo/", vec![("/foo/", Type::Pattern)]); } #[test] fn test_pattern_escaped_slash() { lexer_test("/a\\/b/", vec![("/a\\/b/", Type::Pattern)]); } #[test] fn test_pattern_quote() { lexer_test("/'/", vec![("/'/", Type::Pattern)]); } // Substitution // #[test] fn test_subst_g() { lexer_test("/from/to/g", vec![("/from/to/g", Type::Substitution)]); } #[test] fn test_subst() { lexer_test("/from/to/", vec![("/from/to/", Type::Substitution)]); } // Tag // #[test] fn test_tag_simple() { lexer_test("+tag", vec![("+tag", Type::Tag)]); } #[test] fn test_tag_negative() { lexer_test("-tag", vec![("-tag", Type::Tag)]); } #[test] fn test_tag_at() { lexer_test("+@tag", vec![("+@tag", Type::Tag)]); } // Path // #[test] fn test_path() { lexer_test( "/long/path/to/file.txt", vec![("/long/path/to/file.txt", Type::Path)], ); } #[test] fn test_path_dir() { lexer_test( "/long/path/to/dir/", vec![("/long/path/to/dir/", Type::Path)], ); } // Word // #[test] fn test_1_foo_bar() { lexer_test("1.foo.bar", vec![("1.foo.bar", Type::Word)]); } // Identifier // #[test] fn test_foo() { lexer_test("foo", vec![("foo", Type::Identifier)]); } #[test] fn test_multibyte_ident() { lexer_test("Çirçös", vec![("Çirçös", Type::Identifier)]); } #[test] fn test_multibyte_nonpunctuation_single_char() { lexer_test("☺", vec![("☺", Type::Identifier)]); } #[test] fn test_name() { lexer_test("name", vec![("name", Type::Identifier)]); } #[test] fn test_f1() { lexer_test("f1", vec![("f1", Type::Identifier)]); } #[test] fn test_foo_dot_bar() { lexer_test("foo.bar", vec![("foo.bar", Type::Identifier)]); } #[test] fn test_long_with_underscore() { lexer_test( "a1a1a1a1_a1a1_a1a1_a1a1_a1a1a1a1a1a1", vec![("a1a1a1a1_a1a1_a1a1_a1a1_a1a1a1a1a1a1", Type::Identifier)], ); } // Word that starts wih 'or', which is an operator, but should be ignored. // #[test] fn test_starts_with_or() { lexer_test("ordinary", vec![("ordinary", Type::Identifier)]); } // DOM // #[test] fn test_due() { lexer_test("due", vec![("due", Type::DOM)]); } #[test] fn test_123_tags() { lexer_test("123.tags", vec![("123.tags", Type::DOM)]); } #[test] fn test_123_tags_pending() { lexer_test("123.tags.PENDING", vec![("123.tags.PENDING", Type::DOM)]); } #[test] fn test_123_description() { lexer_test("123.description", vec![("123.description", Type::DOM)]); } #[test] fn test_123_annotations_count() { lexer_test( "123.annotations.count", vec![("123.annotations.count", Type::DOM)], ); } #[test] fn test_123_annotations_1_description() { lexer_test( "123.annotations.1.description", vec![("123.annotations.1.description", Type::DOM)], ); } #[test] fn test_123_annotations_1_entry() { lexer_test( "123.annotations.1.entry", vec![("123.annotations.1.entry", Type::DOM)], ); } #[test] fn test_123_annotations_1_entry_year() { lexer_test( "123.annotations.1.entry.year", vec![("123.annotations.1.entry.year", Type::DOM)], ); } #[test] fn test_uuid_due() { lexer_test( "a360fc44-315c-4366-b70c-ea7e7520b749.due", vec![("a360fc44-315c-4366-b70c-ea7e7520b749.due", Type::DOM)], ); } #[test] fn test_numeric_uuid_due() { lexer_test( "12345678-1234-1234-1234-123456789012.due", vec![("12345678-1234-1234-1234-123456789012.due", Type::DOM)], ); } #[test] fn test_system_os() { lexer_test("system.os", vec![("system.os", Type::DOM)]); } #[test] fn test_rc_foo() { lexer_test("rc.foo", vec![("rc.foo", Type::DOM)]); } // URL // #[test] fn test_lexer_31() { lexer_test( "http://example.com", vec![("http://example.com", Type::URL)], ); } #[test] fn test_lexer_32() { lexer_test( "https://foo.example.com", vec![("https://foo.example.com", Type::URL)], ); } // String // #[test] fn test_quoted_string() { lexer_test("'one two'", vec![("'one two'", Type::String)]); } #[test] fn test_double_quoted_string() { lexer_test("\"three\"", vec![("\"three\"", Type::String)]); } #[test] fn test_string_quoted_with_escapes() { lexer_test("'\\''", vec![("'''", Type::String)]); } #[test] fn test_string_quoted_quotes() { lexer_test("\"\\\"\"", vec![("\"\"\"", Type::String)]); } #[test] fn test_quoted_tabs() { lexer_test("\"\tfoo\t\"", vec![("\"\tfoo\t\"", Type::String)]); } #[test] fn test_multibyte_slash_u() { lexer_test("\"\\u20A43\"", vec![("\"₤3\"", Type::String)]); } #[test] fn test_multibyte_u_plus() { lexer_test("\"U+20AC4\"", vec![("\"€4\"", Type::String)]); } // Number // #[test] fn test_one() { lexer_test("1", vec![("1", Type::Number)]); } #[test] fn test_pi() { lexer_test("3.14", vec![("3.14", Type::Number)]); } #[test] fn test_avogadro() { lexer_test("6.02217e23", vec![("6.02217e23", Type::Number)]); } #[test] fn test_expo() { lexer_test("1.2e-3.4", vec![("1.2e-3.4", Type::Number)]); } #[test] fn test_hex() { lexer_test("0x2f", vec![("0x2f", Type::Hex)]); } // Set (1,2,4-7,9) // #[test] fn test_set_pair() { lexer_test("1,2", vec![("1,2", Type::Set)]); } #[test] fn test_set_range() { lexer_test("1-2", vec![("1-2", Type::Set)]); } #[test] fn test_set_range_pair() { lexer_test("1-2,4", vec![("1-2,4", Type::Set)]); } #[test] fn test_set_range_pair_ws() { lexer_test("1-2,4 ", vec![("1-2,4", Type::Set)]); } #[test] fn test_set_range_pair_paren() { lexer_test("1-2,4(", vec![("1-2,4", Type::Set), ("(", Type::Op)]); } #[test] fn test_ranges_and_singletons() { lexer_test("1-2,4,6-8", vec![("1-2,4,6-8", Type::Set)]); } #[test] fn test_set_more_ranges_and_singletons() { lexer_test("1-2,4,6-8,10-12", vec![("1-2,4,6-8,10-12", Type::Set)]); } // Pair // #[test] fn test_name_colon_value() { lexer_test("name:value", vec![("name:value", Type::Pair)]); } #[test] fn test_name_eq_value() { lexer_test("name=value", vec![("name=value", Type::Pair)]); } #[test] fn test_name_colon_eq_value() { lexer_test("name:=value", vec![("name:=value", Type::Pair)]); } #[test] fn test_name_dot_mod_colon_value() { lexer_test("name.mod:value", vec![("name.mod:value", Type::Pair)]); } #[test] fn test_name_dot_mod_eq_value() { lexer_test("name.mod=value", vec![("name.mod=value", Type::Pair)]); } #[test] fn test_name_colon() { lexer_test("name:", vec![("name:", Type::Pair)]); } #[test] fn test_name_eq() { lexer_test("name=", vec![("name=", Type::Pair)]); } #[test] fn test_name_dot_mod_colon() { lexer_test("name.mod:", vec![("name.mod:", Type::Pair)]); } #[test] fn test_name_dot_mod_equal() { lexer_test("name.mod=", vec![("name.mod=", Type::Pair)]); } #[test] fn test_pro_quoted() { lexer_test("pro:'P 1'", vec![("pro:'P 1'", Type::Pair)]); } #[test] fn test_rc_colon_x() { lexer_test("rc:x", vec![("rc:x", Type::Pair)]); } #[test] fn test_rc_dot_name_colon_value() { lexer_test("rc.name:value", vec![("rc.name:value", Type::Pair)]); } #[test] fn test_rc_dot_name_eq_value() { lexer_test("rc.name=value", vec![("rc.name=value", Type::Pair)]); } #[test] fn test_rc_dot_name_colon_eq_value() { lexer_test("rc.name:=value", vec![("rc.name:=value", Type::Pair)]); } #[test] fn test_due_colon_eq_quoted() { lexer_test("due:='eow - 2d'", vec![("due:='eow - 2d'", Type::Pair)]); } #[test] fn test_name_colon_quoted_with_newline() { lexer_test("name:'foo\nbar'", vec![("name:'foo\nbar'", Type::Pair)]); } // Operator - complete set // #[test] fn test_caret() { lexer_test("^", vec![("^", Type::Op)]); } #[test] fn test_bang() { lexer_test("!", vec![("!", Type::Op)]); } #[test] fn test_neg() { lexer_test("_neg_", vec![("_neg_", Type::Op)]); } #[test] fn test_pos() { lexer_test("_pos_", vec![("_pos_", Type::Op)]); } #[test] fn test_hastag() { lexer_test("_hastag_", vec![("_hastag_", Type::Op)]); } #[test] fn test_notag() { lexer_test("_notag_", vec![("_notag_", Type::Op)]); } #[test] fn test_star() { lexer_test("*", vec![("*", Type::Op)]); } #[test] fn test_slash() { lexer_test("/", vec![("/", Type::Op)]); } #[test] fn test_percent() { lexer_test("%", vec![("%", Type::Op)]); } #[test] fn test_plus() { lexer_test("+", vec![("+", Type::Op)]); } #[test] fn test_minus() { lexer_test("-", vec![("-", Type::Op)]); } #[test] fn test_leq() { lexer_test("<=", vec![("<=", Type::Op)]); } #[test] fn test_geq() { lexer_test(">=", vec![(">=", Type::Op)]); } #[test] fn test_gt() { lexer_test(">", vec![(">", Type::Op)]); } #[test] fn test_lt() { lexer_test("<", vec![("<", Type::Op)]); } #[test] fn test_eq() { lexer_test("=", vec![("=", Type::Op)]); } #[test] fn test_double_eq() { lexer_test("==", vec![("==", Type::Op)]); } #[test] fn test_not_eq() { lexer_test("!=", vec![("!=", Type::Op)]); } #[test] fn test_not_double_eq() { lexer_test("!==", vec![("!==", Type::Op)]); } #[test] fn test_tilde() { lexer_test("~", vec![("~", Type::Op)]); } #[test] fn test_not_tilde() { lexer_test("!~", vec![("!~", Type::Op)]); } #[test] fn test_and() { lexer_test("and", vec![("and", Type::Op)]); } #[test] fn test_or() { lexer_test("or", vec![("or", Type::Op)]); } #[test] fn test_xor() { lexer_test("xor", vec![("xor", Type::Op)]); } #[test] fn test_lparen() { lexer_test("(", vec![("(", Type::Op)]); } #[test] fn test_rparen() { lexer_test(")", vec![(")", Type::Op)]); } // UUID // #[test] fn test_uuid_ffs() { lexer_test( "ffffffff-ffff-ffff-ffff-ffffffffffff", vec![("ffffffff-ffff-ffff-ffff-ffffffffffff", Type::Uuid)], ); } #[test] fn test_uuid_00s() { lexer_test( "00000000-0000-0000-0000-0000000", vec![("00000000-0000-0000-0000-0000000", Type::Uuid)], ); } #[test] fn test_uuid_shorter() { lexer_test( "00000000-0000-0000-0000", vec![("00000000-0000-0000-0000", Type::Uuid)], ); } #[test] fn test_uuid_shorter_still() { lexer_test( "00000000-0000-0000", vec![("00000000-0000-0000", Type::Uuid)], ); } #[test] fn test_uuid_even_shorter() { lexer_test("00000000-0000", vec![("00000000-0000", Type::Uuid)]); } #[test] fn test_uuid_only_first_bit() { lexer_test("00000000", vec![("00000000", Type::Uuid)]); } #[test] fn test_real_uuid() { lexer_test( "a360fc44-315c-4366-b70c-ea7e7520b749", vec![("a360fc44-315c-4366-b70c-ea7e7520b749", Type::Uuid)], ); } #[test] fn test_real_uuid_shorter() { lexer_test( "a360fc44-315c-4366-b70c-ea7e752", vec![("a360fc44-315c-4366-b70c-ea7e752", Type::Uuid)], ); } #[test] fn test_real_uuid_shorter_still() { lexer_test( "a360fc44-315c-4366-b70c", vec![("a360fc44-315c-4366-b70c", Type::Uuid)], ); } #[test] fn test_real_uuid_even_shorter() { lexer_test( "a360fc44-315c-4366", vec![("a360fc44-315c-4366", Type::Uuid)], ); } #[test] fn test_real_uuid_naming_is_hard() { lexer_test("a360fc44-315c", vec![("a360fc44-315c", Type::Uuid)]); } #[test] fn test_real_uuid_only_first_bit() { lexer_test("a360fc44", vec![("a360fc44", Type::Uuid)]); } // Date // #[test] fn test_year_week() { lexer_test("2015-W01", vec![("2015-W01", Type::Date)]); } #[test] fn test_year_month_day() { lexer_test("2015-02-17", vec![("2015-02-17", Type::Date)]); } #[test] fn test_timestamp() { lexer_test( "2013-11-29T22:58:00Z", vec![("2013-11-29T22:58:00Z", Type::Date)], ); } #[test] fn test_abbrev_timestamp() { lexer_test("20131129T225800Z", vec![("20131129T225800Z", Type::Date)]); } #[test] fn test_9thn() { lexer_test("9th", vec![("9th", Type::Date)]); } #[test] fn test_10th() { lexer_test("10th", vec![("10th", Type::Date)]); } #[test] fn test_today() { lexer_test("today", vec![("today", Type::Date)]); } // Duration // #[test] fn test_year() { lexer_test("year", vec![("year", Type::Duration)]); } #[test] fn test_4weeks() { lexer_test("4weeks", vec![("4weeks", Type::Duration)]); } #[test] fn test_pt23h() { lexer_test("PT23H", vec![("PT23H", Type::Duration)]); } #[test] fn test_1second() { lexer_test("1second", vec![("1second", Type::Duration)]); } #[test] fn test_1s() { lexer_test("1s", vec![("1s", Type::Duration)]); } #[test] fn test_1minute() { lexer_test("1minute", vec![("1minute", Type::Duration)]); } #[test] fn test_2hour() { lexer_test("2hour", vec![("2hour", Type::Duration)]); } #[test] fn test_3_days() { lexer_test("3 days", vec![("3 days", Type::Duration)]); } #[test] fn test_4w() { lexer_test("4w", vec![("4w", Type::Duration)]); } #[test] fn test_5mo() { lexer_test("5mo", vec![("5mo", Type::Duration)]); } #[test] fn test_6_years() { lexer_test("6 years", vec![("6 years", Type::Duration)]); } #[test] fn test_p1y() { lexer_test("P1Y", vec![("P1Y", Type::Duration)]); } #[test] fn test_pt1h() { lexer_test("PT1H", vec![("PT1H", Type::Duration)]); } #[test] fn test_p_full() { lexer_test("P1Y1M1DT1H1M1S", vec![("P1Y1M1DT1H1M1S", Type::Duration)]); } // Misc // #[test] fn test_separator() { lexer_test("--", vec![("--", Type::Separator)]); } #[test] fn test_separator_ws() { lexer_test(" -- ", vec![("--", Type::Separator)]); } #[test] fn test_separator_boundaries() { lexer_test( "123--123 ", vec![ ("123", Type::Number), ("--", Type::Separator), ("123", Type::Number), ], ); } // Expression // due:eom-2w // due < eom + 1w + 1d // ( /pattern/ or 8ad2e3db-914d-4832-b0e6-72fa04f6e331,3b6218f9-726a-44fc-aa63-889ff52be442 ) // #[test] fn test_expression() { lexer_test( "(1+2)", vec![ ("(", Type::Op), ("1", Type::Number), ("+", Type::Op), ("2", Type::Number), (")", Type::Op), ], ); } #[test] fn test_expression_dom_tilde() { lexer_test( "description~pattern", vec![ ("description", Type::DOM), ("~", Type::Op), ("pattern", Type::Identifier), ], ); } #[test] fn test_expression_paren_tag() { lexer_test( "(+tag)", vec![("(", Type::Op), ("+tag", Type::Tag), (")", Type::Op)], ); } #[test] fn test_expression_paren_name_value() { lexer_test( "(name:value)", vec![("(", Type::Op), ("name:value", Type::Pair), (")", Type::Op)], ); } } }