Files
taskwarrior-2.x/src/cli/lexer.rs
2020-02-02 15:11:14 -05:00

3099 lines
88 KiB
Rust

use crate::util::datetime::DateTime;
use crate::util::duration::Duration;
use std::convert::TryFrom;
// based on src/Lexer.{h,cpp} in the Taskwarrior code
const UUID_PATTERN: &[u8] = b"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
const UUID_MIN_LENGTH: usize = 8;
const MINIMUM_MATCH_LEN: usize = 3;
const DATE_SUBELEMENTS: &[&str] = &[
"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second",
];
#[derive(PartialEq, Debug, Clone, Copy)]
enum Type {
Uuid,
Number,
Hex,
String,
URL,
Pair,
Set,
Separator,
Tag,
Path,
Substitution,
Pattern,
Op,
DOM,
Identifier,
Word,
Date,
Duration,
}
struct Lexer {
text: String,
cursor: usize,
eos: usize,
attributes: Vec<String>,
}
// TaskWarrior uses some non-standard character definitions, so they are repeated verbatim here,
// rather than defaulting to the unicode functions available on the char type.
/// Returns true if this character is whitespace, as defined in TaskWarrior's libshared.
fn unicode_whitespace(c: char) -> bool {
unicode_horizontal_whitespace(c) || unicode_vertical_whitespace(c)
}
/// Returns true if this character is horizontal whitespace, as defined in TaskWarrior's libshared.
fn unicode_horizontal_whitespace(c: char) -> bool {
let c: u32 = c.into();
return c == 0x0020 || // space Common Separator, space
c == 0x0009 || // Common Other, control HT, Horizontal Tab
c == 0x00A0 || // no-break space Common Separator, space
c == 0x1680 || // ogham space mark Ogham Separator, space
c == 0x180E || // mongolian vowel separator Mongolian Separator, space
c == 0x2000 || // en quad Common Separator, space
c == 0x2001 || // em quad Common Separator, space
c == 0x2002 || // en space Common Separator, space
c == 0x2003 || // em space Common Separator, space
c == 0x2004 || // three-per-em space Common Separator, space
c == 0x2005 || // four-per-em space Common Separator, space
c == 0x2006 || // six-per-em space Common Separator, space
c == 0x2007 || // figure space Common Separator, space
c == 0x2008 || // punctuation space Common Separator, space
c == 0x2009 || // thin space Common Separator, space
c == 0x200A || // hair space Common Separator, space
c == 0x200B || // zero width space
c == 0x200C || // zero width non-joiner
c == 0x200D || // zero width joiner
c == 0x202F || // narrow no-break space Common Separator, space
c == 0x205F || // medium mathematical space Common Separator, space
c == 0x2060 || // word joiner
c == 0x3000; // ideographic space Common Separator, space
}
/// Returns true if this character is vertical whitespace, as defined in TaskWarrior's libshared.
fn unicode_vertical_whitespace(c: char) -> bool {
let c: u32 = c.into();
return c == 0x000A || // Common Other, control LF, Line feed
c == 0x000B || // Common Other, control VT, Vertical Tab
c == 0x000C || // Common Other, control FF, Form feed
c == 0x000D || // Common Other, control CR, Carriage return
c == 0x0085 || // Common Other, control NEL, Next line
c == 0x2028 || // line separator Common Separator, line
c == 0x2029; // paragraph separator Common Separator, paragraph
}
/// Returns true if the given character is an ascii digit
fn unicode_latin_digit(c: char) -> bool {
c.is_ascii_digit()
}
/// Returns true if the given character is an ascii letter
fn unicode_latin_alpha(c: char) -> bool {
c.is_ascii_alphabetic()
}
/// Replicates the C function of the same name, which only recognizes ASCII printable
fn isprint(c: char) -> bool {
c.is_ascii_graphic()
}
/// Returns true if the given character is punctuation.
fn is_punctuation(c: char) -> bool {
isprint(c)
&& c != ' '
&& c != '@'
&& c != '#'
&& c != '$'
&& c != '_'
&& !unicode_latin_digit(c)
&& !unicode_latin_alpha(c)
}
/// Returns true if this character is an operator
fn is_single_char_operator(c: char) -> bool {
match c {
'+' | '-' | '*' | '/' | '(' | ')' | '<' | '>' | '^' | '!' | '%' | '=' | '~' => true,
_ => false,
}
}
/// Returns true if this character can start an identifier
fn is_identifier_start(c: char) -> bool {
!unicode_whitespace(c)
&& !unicode_latin_digit(c)
&& !is_single_char_operator(c)
&& !is_punctuation(c)
}
/// Returns true if this character can be in the middle of an identifier
fn is_identifier_next(c: char) -> bool {
c != ':' && c != '=' && !unicode_whitespace(c) && !is_single_char_operator(c)
}
/// Returns true if the sequence `<left><right>` represents a token boundary.
fn is_boundary(left: char, right: char) -> bool {
right == '\0'
|| (unicode_latin_alpha(left) != unicode_latin_alpha(right))
|| (unicode_latin_digit(left) != unicode_latin_digit(right))
|| (unicode_whitespace(left) != unicode_whitespace(right))
|| is_punctuation(left)
|| is_punctuation(right)
}
/// Returns true if the sequence `<left><right>` represents a hard token boundary.
fn is_hard_boundary(left: char, right: char) -> bool {
right == '\0' || left == '(' || left == ')' || right == '(' || right == ')'
}
/// Returns true if the given string must have been shell-quoted
fn was_quoted(s: &str) -> bool {
s.contains(&[' ', '\t', '(', ')', '<', '>', '&', '~'][..])
}
fn is_unicode_hex_digit(c: char) -> bool {
match c {
'0'..='9' | 'a'..='f' | 'A'..='F' => true,
_ => false,
}
}
fn hex_to_char(hex: &str) -> Option<char> {
let mut num = 0u32;
for c in hex.chars() {
num <<= 4;
num += match c {
'0'..='9' => c as u32 - '0' as u32,
'a'..='f' => 10 + (c as u32 - 'a' as u32),
'A'..='F' => 10 + (c as u32 - 'A' as u32),
_ => return None,
}
}
if let Ok(c) = char::try_from(num) {
Some(c)
} else {
None
}
}
/// Strips matching quote symbols from the beginning and end of the given string
/// (removing all quotes if given a single quote `'`)
fn dequote<'a, 'b>(s: &'a str, quotes: &'b str) -> &'a str {
// note that this returns a new ref to the same string, rather
// than modifying its argument as the C++ version does.
if let Some(first_char) = s.chars().next() {
if let Some(last_char) = s.chars().rev().next() {
if first_char == last_char && quotes.contains(first_char) {
let quote_len = first_char.len_utf8();
if s.len() > 2 * quote_len {
return &s[quote_len..s.len() - quote_len];
} else {
return "";
}
}
}
}
s
}
fn read_word_quoted(text: &str, quotes: &str, cursor: usize) -> Option<(String, usize)> {
let mut pos = cursor;
let mut res = String::new();
let mut skipchars = 0;
let mut chars = text.get(cursor..)?.chars();
let quote = chars.next();
if quote.is_none() {
return None;
}
let quote = quote.unwrap();
if !quotes.contains(quote) {
return None;
}
res.push(quote);
pos += quote.len_utf8();
for c in chars {
if skipchars > 0 {
skipchars -= 1;
pos += c.len_utf8();
continue;
}
if c == quote {
res.push(c);
pos += quote.len_utf8();
return Some((res, pos));
}
if c == 'U' {
if let Some('+') = text.get(pos + 1..).unwrap().chars().next() {
if let Some(hex) = text.get(pos + 2..pos + 6) {
if let Some(c) = hex_to_char(hex) {
res.push(c);
skipchars += 5;
} else {
res.push('U');
}
} else {
res.push('U');
}
} else {
res.push('U');
}
} else if c == '\\' {
match text.get(pos + 1..).unwrap().chars().next() {
None => res.push(c),
Some('b') => res.push('\x08'),
Some('f') => res.push('\x0c'),
Some('n') => res.push('\x0a'),
Some('r') => res.push('\x0d'),
Some('t') => res.push('\x09'),
Some('v') => res.push('\x0b'),
Some('u') => {
if let Some(hex) = text.get(pos + 2..pos + 6) {
if let Some(c) = hex_to_char(hex) {
res.push(c);
skipchars += 4;
} else {
res.push('u')
}
} else {
res.push('u')
}
}
Some(c @ _) => res.push(c),
}
skipchars += 1;
} else {
res.push(c);
}
pos += c.len_utf8();
}
None
}
fn read_word_unquoted(text: &str, cursor: usize) -> Option<(String, usize)> {
let mut pos = cursor;
let mut res = String::new();
let mut prev = None;
let mut skipchars = 0;
for c in text.get(cursor..)?.chars() {
if skipchars > 0 {
skipchars -= 1;
pos += c.len_utf8();
prev = Some(c);
continue;
}
if unicode_whitespace(c) {
break;
}
if let Some(p) = prev {
if is_hard_boundary(p, c) {
break;
}
}
if c == 'U' {
if let Some('+') = text.get(pos + 1..).unwrap().chars().next() {
if let Some(hex) = text.get(pos + 2..pos + 6) {
if let Some(c) = hex_to_char(hex) {
res.push(c);
skipchars += 5;
} else {
res.push('U');
}
} else {
res.push('U');
}
} else {
res.push('U');
}
} else if c == '\\' {
match text.get(pos + 1..).unwrap().chars().next() {
None => res.push(c),
Some('b') => res.push('\x08'),
Some('f') => res.push('\x0c'),
Some('n') => res.push('\x0a'),
Some('r') => res.push('\x0d'),
Some('t') => res.push('\x09'),
Some('v') => res.push('\x0b'),
Some('u') => {
if let Some(hex) = text.get(pos + 2..pos + 6) {
if let Some(c) = hex_to_char(hex) {
res.push(c);
skipchars += 4;
} else {
res.push('u')
}
} else {
res.push('u')
}
}
Some(c @ _) => res.push(c),
}
skipchars += 1;
} else {
res.push(c);
}
pos += c.len_utf8();
prev = Some(c);
}
if pos != cursor {
Some((res, pos))
} else {
None
}
}
fn common_length(s1: &str, s2: &str) -> usize {
s1.chars()
.zip(s2.chars())
.take_while(|(c1, c2)| c1 == c2)
.collect::<Vec<_>>()
.len()
}
#[derive(Debug, PartialEq)]
pub struct DecomposedPair {
name: String,
modifier: String,
separator: String,
value: String,
}
impl Lexer {
pub fn new<S: Into<String>>(text: S) -> Lexer {
let text = text.into();
let eos = text.len();
Lexer {
text,
cursor: 0,
eos,
attributes: vec![],
}
}
pub fn add_attribute<S: Into<String>>(&mut self, attribute: S) {
self.attributes.push(attribute.into());
}
/// This static method tokenizes the input, but discards the type information.
pub fn split<S: Into<String>>(text: S) -> Vec<String> {
Lexer::new(text).into_iter().map(|(tx, ty)| tx).collect()
}
pub fn token(&mut self) -> Option<(String, Type)> {
// Eat whitespace
while let Some(c) = self.text[self.cursor..].chars().next() {
if unicode_whitespace(c) {
self.cursor += c.len_utf8();
continue;
}
break;
}
if self.cursor == self.eos {
return None;
}
// The sequence is specific, and must follow these rules:
// - date < duration < uuid < identifier
// - dom < uuid
// - uuid < hex < number
// - url < pair < identifier
// - hex < number
// - separator < tag < operator
// - path < substitution < pattern
// - set < number
// - word last
if let Some(r) = self.is_string("\"'") {
return Some(r);
}
if let Some(r) = self.is_date() {
return Some(r);
}
if let Some(r) = self.is_duration() {
return Some(r);
}
if let Some(r) = self.is_url() {
return Some(r);
}
if let Some(r) = self.is_pair() {
return Some(r);
}
if let Some(r) = self.is_uuid(true) {
return Some(r);
}
if let Some(r) = self.is_set() {
return Some(r);
}
if let Some(r) = self.is_dom() {
return Some(r);
}
if let Some(r) = self.is_hexnumber() {
return Some(r);
}
if let Some(r) = self.is_number() {
return Some(r);
}
if let Some(r) = self.is_separator() {
return Some(r);
}
if let Some(r) = self.is_tag() {
return Some(r);
}
if let Some(r) = self.is_path() {
return Some(r);
}
if let Some(r) = self.is_substitution() {
return Some(r);
}
if let Some(r) = self.is_pattern() {
return Some(r);
}
if let Some(r) = self.is_operator() {
return Some(r);
}
if let Some(r) = self.is_identifier() {
return Some(r);
}
if let Some(r) = self.is_word() {
return Some(r);
}
None
}
pub fn decompose_pair(text: &str) -> Option<DecomposedPair> {
let npos = usize::max_value();
// npos
let dot = text.find(".").unwrap_or(npos);
// npos
let sep_defer = text.find("::").unwrap_or(npos);
// npos
let sep_eval = text.find(":=").unwrap_or(npos);
// 4
let sep_colon = text.find(":").unwrap_or(npos);
// npos
let sep_equal = text.find("=").unwrap_or(npos);
let (sep, sep_end) = if sep_defer != npos
&& sep_defer <= sep_eval
&& sep_defer <= sep_colon
&& sep_defer <= sep_equal
{
(sep_defer, sep_defer + 2)
} else if sep_eval != npos
&& sep_eval <= sep_defer
&& sep_eval <= sep_colon
&& sep_eval <= sep_equal
{
(sep_eval, sep_eval + 2)
} else if sep_colon != npos
&& sep_colon <= sep_defer
&& sep_colon <= sep_eval
&& sep_colon <= sep_equal
{
(sep_colon, sep_colon + 1)
} else if sep_equal != npos
&& sep_equal <= sep_defer
&& sep_equal <= sep_eval
&& sep_equal <= sep_colon
{
(sep_equal, sep_equal + 1)
} else {
return None;
};
let (name, modifier) = if dot != npos && dot < sep {
(
text.get(0..dot).unwrap().into(),
text.get(dot + 1..sep).unwrap().into(),
)
} else {
(text.get(0..sep).unwrap().into(), "".into())
};
let separator = text.get(sep..sep_end).unwrap().into();
let value = text.get(sep_end..).unwrap().into();
Some(DecomposedPair {
name,
modifier,
separator,
value,
})
}
// recognizers for the `token` method
fn is_string(&mut self, quotes: &str) -> Option<(String, Type)> {
if let Some((s, pos)) = read_word_quoted(&self.text, quotes, self.cursor) {
self.cursor = pos;
return Some((s, Type::String));
}
None
}
fn is_date(&mut self) -> Option<(String, Type)> {
let (_, read) = DateTime::parse(&self.text[self.cursor..], "")?;
let token = self.text[self.cursor..self.cursor + read].into();
self.cursor += read;
Some((token, Type::Date))
}
fn is_duration(&mut self) -> Option<(String, Type)> {
let marker = self.cursor;
if self.is_operator().is_some() {
self.cursor = marker;
return None;
}
let (_, read) = Duration::parse(&self.text[self.cursor..], "")?;
let token = self.text[self.cursor..self.cursor + read].into();
self.cursor += read;
Some((token, Type::Duration))
}
fn is_url(&mut self) -> Option<(String, Type)> {
let remainder = &self.text[self.cursor..];
if remainder.starts_with("https://") || remainder.starts_with("http://") {
if let Some(i) = remainder.find(unicode_whitespace) {
let token = &remainder[..i];
self.cursor += i;
return Some((token.into(), Type::URL));
} else {
self.cursor = self.eos;
return Some((remainder.into(), Type::URL));
}
}
None
}
fn is_pair(&mut self) -> Option<(String, Type)> {
let marker = self.cursor;
if self.is_identifier().is_some() {
let separator = &self.text[self.cursor..];
if separator.starts_with("::") || separator.starts_with(":=") {
self.cursor += 2;
} else if separator.starts_with(":") || separator.starts_with("=") {
self.cursor += 1;
} else {
self.cursor = marker;
return None;
}
// String, word, or nothing are all valid
let marker2 = self.cursor;
if let Some((word, end)) = read_word_quoted(&self.text[..], "'\"", self.cursor) {
self.cursor = end;
return Some((
format!("{}{}", &self.text[marker..marker2], word),
Type::Pair,
));
}
if let Some((word, end)) = read_word_unquoted(&self.text[..], self.cursor) {
self.cursor = end;
return Some((
format!("{}{}", &self.text[marker..marker2], word),
Type::Pair,
));
}
if self.cursor == self.eos
|| unicode_whitespace(self.text[self.cursor..].chars().next().unwrap())
{
return Some((self.text[marker..self.cursor].into(), Type::Pair));
}
}
self.cursor = marker;
None
}
fn is_uuid(&mut self, end_boundary: bool) -> Option<(String, Type)> {
let mut i = 0;
for c in self.text[self.cursor..].chars() {
if UUID_PATTERN[i] == b'x' {
if !is_unicode_hex_digit(c) {
break;
}
} else {
if c != '-' {
break;
}
}
i += 1;
if i >= UUID_PATTERN.len() {
break;
}
}
if i < UUID_MIN_LENGTH {
return None;
}
if end_boundary {
let c = self.text[self.cursor + i..].chars().next();
if let Some(c) = c {
if !unicode_whitespace(c) && !is_single_char_operator(c) {
return None;
}
}
}
let token = self.text[self.cursor..self.cursor + i].into();
self.cursor += i;
Some((token, Type::Uuid))
}
fn is_set(&mut self) -> Option<(String, Type)> {
let marker = self.cursor;
let mut count = 0;
loop {
if self.is_integer().is_some() {
count += 1;
if self.is_literal("-", false, false) {
if self.is_integer().is_some() {
count += 1;
} else {
self.cursor = marker;
return None;
}
}
} else {
self.cursor = marker;
return None;
}
if !self.is_literal(",", false, false) {
break;
}
}
if count <= 1 {
self.cursor = marker;
return None;
}
// -1 is OK here since integers are ASCII
let last_char = self.text[self.cursor - 1..].chars().next().unwrap();
// look ahead a bit
match self.text[self.cursor..].chars().next() {
Some(c) if !unicode_whitespace(c) && !is_hard_boundary(last_char, c) => {
self.cursor = marker;
return None;
}
_ => (),
}
Some((self.text[marker..self.cursor].into(), Type::Set))
}
fn is_dom(&mut self) -> Option<(String, Type)> {
let marker = self.cursor;
// rc. ...
if self.is_literal("rc.", false, false) && self.is_word().is_some() {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
} else {
self.cursor = marker;
}
// Literals
if self.is_one_of(
&vec![
"tw.syncneeded",
"tw.program",
"tw.args",
"tw.width",
"tw.height",
"tw.version",
"context.program",
"context.args",
"context.width",
"context.height",
"system.version",
"system.os",
],
false,
true,
) {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
}
// Optional:
// <uuid>.
// <id>.
if self.is_uuid(false).is_some() || self.is_integer().is_some() {
if !self.is_literal(".", false, false) {
self.cursor = marker;
return None;
}
}
// Any failure after this line should rollback to the checkpoint.
let checkpoint = self.cursor;
// [prefix]tags.<word>
if self.is_literal("tags", false, false)
&& self.is_literal(".", false, false)
&& self.is_word().is_some()
{
return Some((self.text[marker..self.cursor].into(), Type::DOM));
} else {
self.cursor = checkpoint;
}
// [prefix]attribute (bounded)
// (have to clone here to avoid double-borrowing self
let attributes = self.attributes.clone();
if self.is_one_of(&attributes, false, true) {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
}
// [prefix]attribute. (unbounded)
if self.is_one_of(&attributes, false, false) {
if self.is_literal(".", false, false) {
let attribute = &self.text[checkpoint..self.cursor - 1];
// if attribute type is 'date', then it has sub-elements.
if attribute == "date" && self.is_one_of(&DATE_SUBELEMENTS, false, true) {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
}
self.cursor = checkpoint;
}
// Lookahead: !<alpha>
else if !self.text[marker..]
.chars()
.next()
.map_or(false, |c| unicode_latin_alpha(c))
{
return Some((self.text[marker..self.cursor].into(), Type::DOM));
}
self.cursor = checkpoint;
}
// [prefix]annotations.
if self.is_literal("annotations", true, false) && self.is_literal(".", false, false) {
if self.is_literal("count", false, false) {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
}
if self.is_integer().is_some() {
if self.is_literal(".", false, false) {
if self.is_literal("description", false, true) {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
} else if self.is_literal("entry", false, true) {
return Some((self.text[marker..self.cursor].into(), Type::DOM));
} else if self.is_literal("entry", false, false)
&& self.is_literal(".", false, false)
&& self.is_one_of(&DATE_SUBELEMENTS, false, true)
{
return Some((self.text[marker..self.cursor].into(), Type::DOM));
}
}
} else {
self.cursor = checkpoint;
}
}
self.cursor = marker;
None
}
fn is_hexnumber(&mut self) -> Option<(String, Type)> {
let remainder = &self.text[self.cursor..];
if !remainder.starts_with("0x") {
return None;
}
let mut end = 2;
for (i, c) in remainder[2..].char_indices() {
if is_unicode_hex_digit(c) {
end = 2 + i + c.len_utf8();
} else {
break;
}
}
if end > 2 {
self.cursor += end;
Some((remainder[..end].into(), Type::Hex))
} else {
None
}
}
fn is_number(&mut self) -> Option<(String, Type)> {
let remainder = &self.text[self.cursor..];
let mut chars = remainder.char_indices().peekable();
let mut marker = 0;
// A hand-rolled regexp. States are as follows:
// \d \d* (. \d \d*)? ([eE] [+-]? \d \d* (. \d \d*)?)?
// 0 1 2 3 4 5 6 7 8 9 10 11 12
let mut state = 0;
loop {
let c = match chars.peek() {
Some((i, c)) => {
marker = *i;
Some(*c)
}
None => None,
};
match (state, c) {
(0, Some(c)) if unicode_latin_digit(c) => state = 1,
(1, Some(c)) if unicode_latin_digit(c) => state = 2,
(1, Some(c)) if c == '.' => state = 3,
(1, Some(c)) if c == 'e' || c == 'E' => state = 6,
(1, _) => break,
(2, Some(c)) if unicode_latin_digit(c) => state = 2,
(2, Some(c)) if c == '.' => state = 3,
(2, Some(c)) if c == 'e' || c == 'E' => state = 6,
(2, _) => break,
(3, Some(c)) if unicode_latin_digit(c) => state = 4,
(3, Some(c)) if c == 'e' || c == 'E' => state = 6,
(3, _) => break,
(4, Some(c)) if unicode_latin_digit(c) => state = 5,
(4, Some(c)) if c == 'e' || c == 'E' => state = 6,
(4, _) => break,
(5, Some(c)) if unicode_latin_digit(c) => state = 5,
(5, Some(c)) if c == 'e' || c == 'E' => state = 6,
(5, _) => break,
(6, Some(c)) if unicode_latin_digit(c) => state = 8,
(6, Some(c)) if c == '-' || c == '+' => state = 7,
(6, _) => break,
(7, Some(c)) if unicode_latin_digit(c) => state = 8,
(7, _) => break,
(8, Some(c)) if unicode_latin_digit(c) => state = 9,
(8, Some(c)) if c == '.' => state = 10,
(8, _) => break,
(9, Some(c)) if unicode_latin_digit(c) => state = 9,
(9, Some(c)) if c == '.' => state = 10,
(9, _) => break,
(10, Some(c)) if unicode_latin_digit(c) => state = 11,
(10, _) => break,
(11, Some(c)) if unicode_latin_digit(c) => state = 11,
(11, _) => break,
_ => return None,
};
if let Some((i, c)) = chars.next() {
marker = i + c.len_utf8();
}
}
// lookahead
if let Some((_, c)) = chars.peek() {
if !unicode_whitespace(*c) && !is_single_char_operator(*c) {
return None;
}
}
self.cursor += marker;
Some((remainder[..marker].into(), Type::Number))
}
fn is_separator(&mut self) -> Option<(String, Type)> {
let next_chars = self
.text
.get(self.cursor..self.cursor + 2)?
.chars()
.collect::<Vec<_>>();
if &next_chars[..] == &['-', '-'] {
self.cursor += 2;
return Some(("--".into(), Type::Separator));
}
None
}
fn is_tag(&mut self) -> Option<(String, Type)> {
let mut marker = self.cursor;
// Lookbehind: Assert ^ or preceded by whitespace, (, or ).
if marker > 0 {
// if the previous byte is not a valid character, then it's
// not ( or )
if let Some(lookbehind) = self.text.get(self.cursor - 1..) {
if let Some(c) = lookbehind.chars().next() {
if !unicode_whitespace(c) && c != '(' && c != ')' {
return None;
}
}
} else {
return None;
}
}
let mut chars = self.text[marker..].chars();
if let Some(c) = chars.next() {
if c == '+' || c == '-' {
marker += c.len_utf8();
if let Some(c) = chars.next() {
if is_identifier_start(c) {
marker += c.len_utf8();
while let Some(c) = chars.next() {
if !is_identifier_next(c) {
break;
}
marker += c.len_utf8();
}
let token = self.text[self.cursor..marker].into();
self.cursor = marker;
return Some((token, Type::Tag));
}
}
}
}
None
}
fn is_path(&mut self) -> Option<(String, Type)> {
let mut marker = self.cursor;
let mut slash_count = 0;
let mut chars = self.text[self.cursor..].chars().peekable();
loop {
if let Some('/') = chars.next() {
marker += 1;
slash_count += 1;
} else {
break;
}
if let Some(c) = chars.next() {
if !unicode_whitespace(c) && c != '/' {
marker += 1;
while let Some(c) = chars.peek() {
if !unicode_whitespace(*c) && *c != '/' {
marker += 1;
chars.next();
} else {
break;
}
}
} else {
break;
}
} else {
break;
}
}
if marker > self.cursor && slash_count > 3 {
let token = self.text[self.cursor..marker].into();
self.cursor = marker;
return Some((token, Type::Path));
}
None
}
fn is_substitution(&mut self) -> Option<(String, Type)> {
let marker = self.cursor;
if let Some((_, end)) = read_word_quoted(&self.text, "/", self.cursor) {
// end-1 to step back over the middle `/`
if let Some((_, end)) = read_word_quoted(&self.text, "/", end - 1) {
let mut remainder = self.text[end..].chars();
return match remainder.next() {
None => {
self.cursor = end;
Some((self.text[marker..self.cursor].into(), Type::Substitution))
}
Some('g') => match remainder.next() {
None => {
self.cursor = end + 1;
Some((self.text[marker..self.cursor].into(), Type::Substitution))
}
Some(c) if unicode_whitespace(c) => {
self.cursor = end + 1;
Some((self.text[marker..self.cursor].into(), Type::Substitution))
}
_ => None,
},
Some(c) if unicode_whitespace(c) => {
self.cursor = end;
Some((self.text[marker..self.cursor].into(), Type::Substitution))
}
_ => None,
};
}
}
None
}
fn is_pattern(&mut self) -> Option<(String, Type)> {
let marker = self.cursor;
if let Some((_, end)) = read_word_quoted(&self.text, "/", self.cursor) {
if end == self.eos || unicode_whitespace(self.text[end..].chars().next().unwrap()) {
self.cursor = end;
return Some((self.text[marker..self.cursor].into(), Type::Pattern));
}
}
None
}
fn is_operator(&mut self) -> Option<(String, Type)> {
let remainder = &self.text[self.cursor..];
// operators that do not require a boundary afterward
for strop in &[
// custom stuff
"_hastag_", "_notag_", "_neg_", "_pos_",
// triple-char
"!==", // and, xor below
// double-char
"==", "!=", "<=", ">=", "||", "&&", "!~", // or below
// single-char
"+", "-", "*", "/", "(", ")", "<", ">", "^", "!", "%", "=", "~",
] {
if remainder.starts_with(strop) {
self.cursor += strop.len();
return Some((remainder[..strop.len()].into(), Type::Op));
}
}
// operators that require a boundary afterward
for strop in &["and", "xor", "!==", "or"] {
if remainder.starts_with(strop) {
if self.cursor + strop.len() == self.eos
|| is_boundary(
remainder[strop.len() - 1..].chars().next().unwrap(),
remainder[strop.len()..].chars().next().unwrap(),
)
{
self.cursor += strop.len();
return Some((remainder[..strop.len()].into(), Type::Op));
}
}
}
None
}
fn is_identifier(&mut self) -> Option<(String, Type)> {
let mut chars = self.text.get(self.cursor..)?.chars();
let start = self.cursor;
let mut len = 0;
if let Some(c) = chars.next() {
if is_identifier_start(c) {
len += c.len_utf8();
for c in chars {
if !is_identifier_next(c) {
break;
}
len += c.len_utf8();
}
self.cursor += len;
return Some((self.text.get(start..self.cursor)?.into(), Type::Identifier));
}
}
None
}
fn is_word(&mut self) -> Option<(String, Type)> {
let mut marker = self.cursor;
for c in self.text[self.cursor..].chars() {
if unicode_whitespace(c) || is_single_char_operator(c) {
break;
}
marker += c.len_utf8();
}
if marker > self.cursor {
let token = self.text[self.cursor..marker].into();
self.cursor = marker;
return Some((token, Type::Word));
}
None
}
// utilities that may modify self
fn is_one_of<S: AsRef<str>>(
&mut self,
options: &[S],
allow_abbreviations: bool,
end_boundary: bool,
) -> bool {
for option in options {
if self.is_literal(option.as_ref(), allow_abbreviations, end_boundary) {
return true;
}
}
false
}
fn is_literal(&mut self, literal: &str, allow_abbreviations: bool, end_boundary: bool) -> bool {
// calculate the number of common characters between the literal and the string being
// parsed
let common = common_length(literal, &self.text[self.cursor..]);
// Without abbreviations, common must equal literal length.
if !allow_abbreviations && common < literal.len() {
return false;
}
if allow_abbreviations && common < MINIMUM_MATCH_LEN {
return false;
}
if end_boundary {
let c = self.text[self.cursor + common..].chars().next();
if let Some(c) = c {
if !unicode_whitespace(c) && !is_single_char_operator(c) {
return false;
}
}
}
self.cursor += common;
true
}
fn is_integer(&mut self) -> Option<(String, Type)> {
let mut marker = self.cursor;
for c in self.text[self.cursor..].chars() {
if !unicode_latin_digit(c) {
break;
}
marker += c.len_utf8();
}
if marker > self.cursor {
let token = self.text[self.cursor..marker].into();
self.cursor = marker;
return Some((token, Type::Number));
}
None
}
}
struct LexerIterator(Lexer);
impl Iterator for LexerIterator {
type Item = (String, Type);
fn next(&mut self) -> Option<Self::Item> {
self.0.token()
}
}
impl IntoIterator for Lexer {
type Item = (String, Type);
type IntoIter = LexerIterator;
fn into_iter(self) -> Self::IntoIter {
LexerIterator(self)
}
}
#[cfg(test)]
mod test {
use super::*;
const NONE: Option<(String, Type)> = None;
#[test]
fn test_is_punctuation_comma() {
assert!(is_punctuation(','));
}
#[test]
fn test_is_punctuation_slash() {
assert!(is_punctuation('/'));
}
#[test]
fn test_is_punctuation_at() {
assert!(!is_punctuation('@'));
}
#[test]
fn test_is_punctuation_hash() {
assert!(!is_punctuation('#'));
}
#[test]
fn test_is_punctuation_dollar() {
assert!(!is_punctuation('$'));
}
#[test]
fn test_is_punctuation_underscore() {
assert!(!is_punctuation('_'));
}
#[test]
fn test_is_punctuation_space() {
assert!(!is_punctuation(' '));
}
#[test]
fn test_is_punctuation_a() {
assert!(!is_punctuation('a'));
}
#[test]
fn test_is_punctuation_9() {
assert!(!is_punctuation('9'));
}
#[test]
fn test_is_punctuation_latin() {
assert!(!is_punctuation('é'));
}
#[test]
fn test_is_punctuation_euro() {
assert!(!is_punctuation('€'));
}
#[test]
fn test_is_punctuation_smile() {
assert!(!is_punctuation('☺'));
}
#[test]
fn test_is_punctuation_numeric() {
assert!(!is_punctuation('¾'));
}
#[test]
fn test_is_boundary() {
assert!(is_boundary(' ', 'a'));
assert!(is_boundary('a', ' '));
assert!(is_boundary(' ', '+'));
assert!(is_boundary(' ', ','));
assert!(!is_boundary('3', '4'));
assert!(is_boundary('(', '('));
assert!(!is_boundary('r', 'd'));
}
#[test]
fn test_was_quoted() {
assert!(!was_quoted(""));
assert!(!was_quoted("foo"));
assert!(was_quoted("a b"));
assert!(was_quoted("(a)"));
}
#[test]
fn test_dequote() {
assert_eq!(dequote("foo", "'\""), "foo");
assert_eq!(dequote("'foo'", "'\""), "foo");
assert_eq!(dequote("\"foo\"", "'\""), "foo");
assert_eq!(dequote("'o\\'clock'", "'\""), "o\\'clock");
// single quote char
assert_eq!(dequote("'", "'\""), "");
// multibyte quote char
assert_eq!(dequote("éo\\'clocké", "é"), "o\\'clock");
}
#[test]
fn test_token_empty() {
let mut l = Lexer::new("");
assert_eq!(l.token(), NONE);
}
#[test]
fn test_token_tokens() {
let mut l = Lexer::new(
" one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'",
);
assert_eq!(l.token(), Some((String::from("one"), Type::Identifier)));
assert_eq!(
l.token(),
Some((String::from("'two 'three''"), Type::String))
);
assert_eq!(l.token(), Some((String::from("+"), Type::Op)));
assert_eq!(l.token(), Some((String::from("456"), Type::Number)));
assert_eq!(l.token(), Some((String::from("-"), Type::Op)));
assert_eq!(l.token(), Some((String::from("("), Type::Op)));
assert_eq!(l.token(), Some((String::from("1.3"), Type::Number)));
assert_eq!(l.token(), Some((String::from("*"), Type::Op)));
assert_eq!(l.token(), Some((String::from("2"), Type::Number)));
assert_eq!(l.token(), Some((String::from("-"), Type::Op)));
assert_eq!(l.token(), Some((String::from("0x12"), Type::Hex)));
assert_eq!(l.token(), Some((String::from(")"), Type::Op)));
assert_eq!(l.token(), Some((String::from("1.2e-3.4"), Type::Number)));
assert_eq!(l.token(), Some((String::from("foo.bar"), Type::Identifier)));
assert_eq!(l.token(), Some((String::from("and"), Type::Op)));
assert_eq!(l.token(), Some((String::from("'€'"), Type::String)));
assert_eq!(l.token(), None);
}
#[test]
fn test_token_short_numbers() {
let mut l = Lexer::new("1 12 123 1234 12345 123456 1234567 123.45e 12.34e+");
assert_eq!(l.token(), Some((String::from("1"), Type::Number)));
assert_eq!(l.token(), Some((String::from("12"), Type::Number)));
assert_eq!(l.token(), Some((String::from("123"), Type::Number)));
assert_eq!(l.token(), Some((String::from("1234"), Type::Number)));
assert_eq!(l.token(), Some((String::from("12345"), Type::Number)));
assert_eq!(l.token(), Some((String::from("123456"), Type::Number)));
assert_eq!(l.token(), Some((String::from("1234567"), Type::Number)));
assert_eq!(l.token(), Some((String::from("123.45e"), Type::Number)));
assert_eq!(l.token(), Some((String::from("12.34e+"), Type::Number)));
assert_eq!(l.token(), None);
}
#[test]
fn test_read_word_quoted_simple() {
assert_eq!(
read_word_quoted("'one two'", "'\"", 0),
Some((String::from("'one two'"), 9))
);
}
#[test]
fn test_read_word_quoted_unterminated() {
assert_eq!(
read_word_quoted("'one two", "'\"", 0),
None as Option<(String, usize)>
);
}
#[test]
fn test_read_word_quoted_backslash_u() {
assert_eq!(
read_word_quoted("'pay \\u20a43'", "'\"", 0),
Some((String::from("'pay ₤3'"), 13))
);
}
#[test]
fn test_read_word_quoted_u_plus() {
assert_eq!(
read_word_quoted("\"pay U+20AC5\"", "'\"", 0),
Some((String::from("\"pay €5\""), 13))
);
}
#[test]
fn test_read_word_unquoted_simple() {
assert_eq!(
read_word_unquoted("input", 0),
Some((String::from("input"), 5))
);
}
#[test]
fn test_read_word_unquoted_escaped_space() {
assert_eq!(
read_word_unquoted("one\\ two", 0),
Some((String::from("one two"), 8))
);
}
#[test]
fn test_read_word_unquoted_escaped_quote() {
assert_eq!(
read_word_unquoted("one\\\"two", 0),
Some((String::from("one\"two"), 8))
);
}
#[test]
fn test_read_word_unquoted_escaped_newline() {
assert_eq!(
read_word_unquoted("one\\ntwo", 0),
Some((String::from("one\x0atwo"), 8))
);
}
#[test]
fn test_read_word_unquoted_escaped_backslash_u() {
assert_eq!(
read_word_unquoted("pay\\u20a43", 0),
Some((String::from("pay₤3"), 10))
);
}
#[test]
fn test_read_word_unquoted_incomplete_escaped_backslash_u() {
assert_eq!(
read_word_unquoted("\\u203", 0),
Some((String::from("u203"), 5))
);
}
#[test]
fn test_read_word_unquoted_nonhex_escaped_backslash_u() {
assert_eq!(
read_word_unquoted("\\u2fghk", 0),
Some((String::from("u2fghk"), 7))
);
}
#[test]
fn test_read_word_unquoted_escaped_u_plus() {
assert_eq!(
read_word_unquoted("payU+20AC4", 0),
Some((String::from("pay€4"), 10))
);
}
#[test]
fn test_read_word_unquoted_incomplete_u_plus() {
assert_eq!(
read_word_unquoted("U+20A", 0),
Some((String::from("U+20A"), 5))
);
}
#[test]
fn test_read_word_trailing_whitespace() {
assert_eq!(
read_word_unquoted("one ", 0),
Some((String::from("one"), 3))
);
}
#[test]
fn test_read_word_unquoted_several_words() {
let text = "one 'two' three\\ four";
assert_eq!(read_word_unquoted(text, 0), Some((String::from("one"), 3)));
assert_eq!(
read_word_unquoted(text, 4),
Some((String::from("'two'"), 9))
);
assert_eq!(
read_word_unquoted(text, 10),
Some((String::from("three four"), 21))
);
}
#[test]
fn test_common_length_empty() {
assert_eq!(common_length("", ""), 0);
}
#[test]
fn test_common_length_match_one() {
assert_eq!(common_length("a", "a"), 1);
}
#[test]
fn test_common_length_match_longer() {
assert_eq!(common_length("abcde", "abcde"), 5);
}
#[test]
fn test_common_length_match_s2_short() {
assert_eq!(common_length("abc", ""), 0);
}
#[test]
fn test_common_length_match_differ() {
assert_eq!(common_length("abc", "def"), 0);
}
#[test]
fn test_common_length_match_s2_prefix() {
assert_eq!(common_length("foobar", "foo"), 3);
}
#[test]
fn test_common_length_match_s1_prefix() {
assert_eq!(common_length("foo", "foobar"), 3);
}
#[test]
fn test_is_string() {
let mut l = Lexer::new("'one'");
assert_eq!(l.is_string("'\""), Some(("'one'".into(), Type::String)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_string_negative() {
let mut l = Lexer::new("one");
assert_eq!(l.is_string("'\""), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_string_empty() {
let mut l = Lexer::new("''");
assert_eq!(l.is_string("'\""), Some(("''".into(), Type::String)));
assert_eq!(l.cursor, 2);
}
#[test]
fn test_is_string_escape() {
let mut l = Lexer::new("'one\ttwo'");
assert_eq!(
l.is_string("'\""),
Some(("'one\ttwo'".into(), Type::String))
);
assert_eq!(l.cursor, 9);
}
#[test]
fn test_is_date_year_eos() {
let mut l = Lexer::new("2015");
assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
assert_eq!(l.cursor, 4);
}
#[test]
fn test_is_date_epoch() {
let mut l = Lexer::new("315532800");
assert_eq!(l.is_date(), Some(("315532800".into(), Type::Date)));
assert_eq!(l.cursor, 9);
}
#[test]
fn test_is_date_year_ws() {
let mut l = Lexer::new("2015 ");
assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
assert_eq!(l.cursor, 4);
}
#[test]
fn test_is_date_year_ident() {
let mut l = Lexer::new("2015abc");
assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
assert_eq!(l.cursor, 4);
}
#[test]
fn test_is_date_year_plus() {
let mut l = Lexer::new("2015+");
assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
assert_eq!(l.cursor, 4);
}
#[test]
fn test_is_date_year_minus() {
let mut l = Lexer::new("2015-xyz");
assert_eq!(l.is_date(), Some(("2015-".into(), Type::Date)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_duration_1w() {
let mut l = Lexer::new("1w");
assert_eq!(l.is_duration(), Some(("1w".into(), Type::Duration)));
assert_eq!(l.cursor, 2);
}
#[test]
fn test_is_duration_op() {
let mut l = Lexer::new("!!");
assert_eq!(l.is_duration(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_number_digit() {
let mut l = Lexer::new("3");
assert_eq!(l.is_number(), Some(("3".into(), Type::Number)));
assert_eq!(l.cursor, 1);
}
#[test]
fn test_is_number_integer() {
let mut l = Lexer::new("13");
assert_eq!(l.is_number(), Some(("13".into(), Type::Number)));
assert_eq!(l.cursor, 2);
}
#[test]
fn test_is_number_trailing_minus() {
let mut l = Lexer::new("13-");
assert_eq!(l.is_number(), Some(("13".into(), Type::Number)));
assert_eq!(l.cursor, 2);
}
#[test]
fn test_is_number_decimal() {
let mut l = Lexer::new("1.3");
assert_eq!(l.is_number(), Some(("1.3".into(), Type::Number)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_number_multiple_decimal() {
let mut l = Lexer::new("1.3.4");
assert_eq!(l.is_number(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_number_decimal_no_digits() {
let mut l = Lexer::new("1.");
assert_eq!(l.is_number(), Some(("1.".into(), Type::Number)));
assert_eq!(l.cursor, 2);
}
#[test]
fn test_is_number_decimal_multi_digit() {
let mut l = Lexer::new("12.32");
assert_eq!(l.is_number(), Some(("12.32".into(), Type::Number)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_number_decimal_e_no_exponent() {
let mut l = Lexer::new("12.32e");
assert_eq!(l.is_number(), Some(("12.32e".into(), Type::Number)));
assert_eq!(l.cursor, 6);
}
#[test]
fn test_is_number_decimal_e_plus_no_exponent() {
let mut l = Lexer::new("12.32e+");
assert_eq!(l.is_number(), Some(("12.32e+".into(), Type::Number)));
assert_eq!(l.cursor, 7);
}
#[test]
fn test_is_number_decimal_e_integer_exponent() {
let mut l = Lexer::new("12.32e-12");
assert_eq!(l.is_number(), Some(("12.32e-12".into(), Type::Number)));
assert_eq!(l.cursor, 9);
}
#[test]
fn test_is_number_decimal_e_decimal_exponent() {
let mut l = Lexer::new("12.32e12.34");
assert_eq!(l.is_number(), Some(("12.32e12.34".into(), Type::Number)));
assert_eq!(l.cursor, 11);
}
#[test]
fn test_is_number_integer_invalid_lookahead() {
let mut l = Lexer::new("13a");
assert_eq!(l.is_number(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_set_singletons() {
let mut l = Lexer::new("12,13");
assert_eq!(l.is_set(), Some(("12,13".into(), Type::Set)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_set_ranges() {
let mut l = Lexer::new("12-13,19-200");
assert_eq!(l.is_set(), Some(("12-13,19-200".into(), Type::Set)));
assert_eq!(l.cursor, 12);
}
#[test]
fn test_is_set_double_comma() {
let mut l = Lexer::new("12-13,,19-200");
assert_eq!(l.is_set(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_set_trailing_comma() {
let mut l = Lexer::new("12-13,");
assert_eq!(l.is_set(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_set_trailing_ws() {
let mut l = Lexer::new("12-13 ");
assert_eq!(l.is_set(), Some(("12-13".into(), Type::Set)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_set_trailing_non_hard_boundary() {
let mut l = Lexer::new("12-13abc");
assert_eq!(l.is_set(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_separator() {
let mut l = Lexer::new(" -- ");
l.cursor = 2;
assert_eq!(l.is_separator(), Some(("--".into(), Type::Separator)));
assert_eq!(l.cursor, 4);
}
#[test]
fn test_is_separator_negative() {
let mut l = Lexer::new("- ");
assert_eq!(l.is_separator(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_tag_plus() {
let mut l = Lexer::new("+foo");
assert_eq!(l.is_tag(), Some(("+foo".into(), Type::Tag)));
assert_eq!(l.cursor, 4);
}
#[test]
fn test_is_tag_not_after_whitespace() {
let mut l = Lexer::new("x+y");
l.cursor = 1;
assert_eq!(l.is_tag(), NONE);
assert_eq!(l.cursor, 1);
}
#[test]
fn test_is_tag_after_whitespace() {
let mut l = Lexer::new(" +y");
l.cursor = 1;
assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_tag_after_lparen() {
let mut l = Lexer::new("(+y");
l.cursor = 1;
assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_tag_after_rparen() {
let mut l = Lexer::new(")+y");
l.cursor = 1;
assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_tag_after_multibyte_char() {
let mut l = Lexer::new("€+y");
l.cursor = 3;
assert_eq!(l.is_tag(), NONE);
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_url_http() {
let mut l = Lexer::new("http://foo.com/bar");
assert_eq!(l.is_url(), Some(("http://foo.com/bar".into(), Type::URL)));
assert_eq!(l.cursor, 18);
}
#[test]
fn test_is_url_https() {
let mut l = Lexer::new("https://foo.com/bar");
assert_eq!(l.is_url(), Some(("https://foo.com/bar".into(), Type::URL)));
assert_eq!(l.cursor, 19);
}
#[test]
fn test_is_url_ws() {
let mut l = Lexer::new("https://foo.com/bar ");
assert_eq!(l.is_url(), Some(("https://foo.com/bar".into(), Type::URL)));
assert_eq!(l.cursor, 19);
}
#[test]
fn test_is_url_with_ops() {
let mut l = Lexer::new("https://foo.com/bar()+-~");
assert_eq!(
l.is_url(),
Some(("https://foo.com/bar()+-~".into(), Type::URL))
);
assert_eq!(l.cursor, 24);
}
#[test]
fn test_is_url_negative() {
let mut l = Lexer::new("file://foo.com/bar");
assert_eq!(l.is_url(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_pair_double_colon() {
let mut l = Lexer::new("foo::bar ");
assert_eq!(l.is_pair(), Some(("foo::bar".into(), Type::Pair)));
assert_eq!(l.cursor, 8);
}
#[test]
fn test_is_pair_colon_eq() {
let mut l = Lexer::new("foo:=bar ");
assert_eq!(l.is_pair(), Some(("foo:=bar".into(), Type::Pair)));
assert_eq!(l.cursor, 8);
}
#[test]
fn test_is_pair_colon() {
let mut l = Lexer::new("foo:bar ");
assert_eq!(l.is_pair(), Some(("foo:bar".into(), Type::Pair)));
assert_eq!(l.cursor, 7);
}
#[test]
fn test_is_pair_equal() {
let mut l = Lexer::new("foo=bar");
assert_eq!(l.is_pair(), Some(("foo=bar".into(), Type::Pair)));
assert_eq!(l.cursor, 7);
}
#[test]
fn test_is_pair_quoted() {
let mut l = Lexer::new("foo='abc def'");
assert_eq!(l.is_pair(), Some(("foo='abc def'".into(), Type::Pair)));
assert_eq!(l.cursor, 13);
}
#[test]
fn test_is_pair_quoted_escapes() {
let mut l = Lexer::new("foo='abc\\u20acdef'");
assert_eq!(l.is_pair(), Some(("foo='abc€def'".into(), Type::Pair)));
assert_eq!(l.cursor, 18);
}
#[test]
fn test_is_uuid_long_eof() {
let u = "ffffffff-ffff-ffff-ffff-ffffffffff";
let mut l = Lexer::new(u);
assert_eq!(l.is_uuid(true), Some((u.into(), Type::Uuid)));
assert_eq!(l.cursor, 34);
}
#[test]
fn test_is_uuid_long_ws() {
let u = "ffffffff-ffff-ffff-ffff-ffffffffff kjdf";
let mut l = Lexer::new(u);
assert_eq!(l.is_uuid(true), Some((u[..34].into(), Type::Uuid)));
assert_eq!(l.cursor, 34);
}
#[test]
fn test_is_uuid_long_op() {
let u = "ffffffff-ffff-ffff-ffff-ffffffffff+";
let mut l = Lexer::new(u);
assert_eq!(l.is_uuid(true), Some((u[..34].into(), Type::Uuid)));
assert_eq!(l.cursor, 34);
}
#[test]
fn test_is_uuid_long_bad_boundary() {
let u = "ffffffff-ffff-ffff-ffff-ffffffffff_";
let mut l = Lexer::new(u);
assert_eq!(l.is_uuid(true), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_uuid_long_bad_boundary_ignored() {
let u = "ffffffff-ffff-ffff-ffff-ffffffffff_";
let mut l = Lexer::new(u);
assert_eq!(l.is_uuid(false), Some((u[..34].into(), Type::Uuid)));
assert_eq!(l.cursor, 34);
}
#[test]
fn test_is_uuid_too_short() {
let u = "ffffff";
let mut l = Lexer::new(u);
assert_eq!(l.is_uuid(true), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_path_simple() {
let mut l = Lexer::new("/path/to/a/file");
assert_eq!(l.is_path(), Some(("/path/to/a/file".into(), Type::Path)));
assert_eq!(l.cursor, 15);
}
#[test]
fn test_is_path_too_short() {
let mut l = Lexer::new("/a/file");
assert_eq!(l.is_path(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_path_trailing_slash() {
let mut l = Lexer::new("/path/to/a/dir/");
assert_eq!(l.is_path(), Some(("/path/to/a/dir/".into(), Type::Path)));
assert_eq!(l.cursor, 15);
}
#[test]
fn test_is_path_double_slash() {
let mut l = Lexer::new("/a//file");
assert_eq!(l.is_path(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_path_no_initial_slash() {
let mut l = Lexer::new("a/path/to/a/file");
assert_eq!(l.is_path(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_substitution_simple() {
let mut l = Lexer::new("/foo/bar/");
assert_eq!(
l.is_substitution(),
Some(("/foo/bar/".into(), Type::Substitution))
);
assert_eq!(l.cursor, 9);
}
#[test]
fn test_is_substitution_simple_ws() {
let mut l = Lexer::new("/foo/bar/ ");
assert_eq!(
l.is_substitution(),
Some(("/foo/bar/".into(), Type::Substitution))
);
assert_eq!(l.cursor, 9);
}
#[test]
fn test_is_substitution_simple_g() {
let mut l = Lexer::new("/foo/bar/g");
assert_eq!(
l.is_substitution(),
Some(("/foo/bar/g".into(), Type::Substitution))
);
assert_eq!(l.cursor, 10);
}
#[test]
fn test_is_substitution_simple_g_ws() {
let mut l = Lexer::new("/foo/bar/g ");
assert_eq!(
l.is_substitution(),
Some(("/foo/bar/g".into(), Type::Substitution))
);
assert_eq!(l.cursor, 10);
}
#[test]
fn test_is_substitution_simple_not_g() {
let mut l = Lexer::new("/foo/bar/h");
assert_eq!(l.is_substitution(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_substitution_simple_not_g_op() {
let mut l = Lexer::new("/foo/bar/+");
assert_eq!(l.is_substitution(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_substitution_simple_g_but_not_ws() {
let mut l = Lexer::new("/foo/bar/ghi");
assert_eq!(l.is_substitution(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_pattern_simple() {
let mut l = Lexer::new("/foo/");
assert_eq!(l.is_pattern(), Some(("/foo/".into(), Type::Pattern)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_pattern_escaped() {
let mut l = Lexer::new("/f\\u20A4o/");
assert_eq!(l.is_pattern(), Some(("/f\\u20A4o/".into(), Type::Pattern)));
assert_eq!(l.cursor, 10);
}
#[test]
fn test_is_pattern_simple_trailing_ws() {
let mut l = Lexer::new("/foo/\n\t");
assert_eq!(l.is_pattern(), Some(("/foo/".into(), Type::Pattern)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_operator_hastag() {
let mut l = Lexer::new("_hastag_");
assert_eq!(l.is_operator(), Some(("_hastag_".into(), Type::Op)));
}
#[test]
fn test_is_operator_notag() {
let mut l = Lexer::new("_notag_");
assert_eq!(l.is_operator(), Some(("_notag_".into(), Type::Op)));
}
#[test]
fn test_is_operator_neg() {
let mut l = Lexer::new("_neg_");
assert_eq!(l.is_operator(), Some(("_neg_".into(), Type::Op)));
}
#[test]
fn test_is_operator_xor() {
let mut l = Lexer::new("xor");
assert_eq!(l.is_operator(), Some(("xor".into(), Type::Op)));
}
#[test]
fn test_is_identifier_empty() {
let mut l = Lexer::new("");
assert_eq!(l.is_identifier(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_identifier_multibyte_nonpunct_first_char() {
let mut l = Lexer::new("");
assert_eq!(l.is_identifier(), Some(("".into(), Type::Identifier)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_identifier_bad_first_char() {
let mut l = Lexer::new("1abc");
assert_eq!(l.is_identifier(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_identifier_bad_next_char() {
let mut l = Lexer::new("a:bc");
assert_eq!(l.is_identifier(), Some(("a".into(), Type::Identifier)));
assert_eq!(l.cursor, 1);
}
#[test]
fn test_is_identifier_ok() {
let mut l = Lexer::new("abc");
assert_eq!(l.is_identifier(), Some(("abc".into(), Type::Identifier)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_word_no() {
let mut l = Lexer::new("+");
assert!(l.is_word().is_none());
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_word_pending() {
let mut l = Lexer::new("foo.PENDING");
l.cursor = 4;
assert_eq!(l.is_word(), Some(("PENDING".into(), Type::Word)));
assert_eq!(l.cursor, 11);
}
#[test]
fn test_is_word_to_eof() {
let mut l = Lexer::new("abc");
assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_word_nonzero_start() {
let mut l = Lexer::new("--abc");
l.cursor = 2;
assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
assert_eq!(l.cursor, 5);
}
#[test]
fn test_is_word_to_ws() {
let mut l = Lexer::new("abc def");
assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_word_to_op() {
let mut l = Lexer::new("abc*def");
assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_split_simple() {
assert_eq!(
Lexer::split(" ( A or B ) "),
vec![
String::from("("),
String::from("A"),
String::from("or"),
String::from("B"),
String::from(")"),
]
);
}
#[test]
fn test_split_confusing() {
assert_eq!(
Lexer::split(" +-* a+b 12.3e4 'c d'"),
vec![
String::from("+"),
String::from("-"),
String::from("*"),
String::from("a"),
String::from("+"),
String::from("b"),
String::from("12.3e4"),
String::from("'c d'"),
]
);
}
#[test]
fn test_decompose_pair_combos() {
let name = "name";
for modifier in ["", "mod"].iter() {
for separator in [":", "=", "::", ":="].iter() {
for value in ["", "value", "a:b", "a::b", "a=b", "a:=b"].iter() {
let input = format!(
"{}{}{}{}{}",
name,
if modifier.len() > 0 { "." } else { "" },
modifier,
separator,
value
);
assert_eq!(
Lexer::decompose_pair(&input),
Some(DecomposedPair {
name: name.into(),
modifier: String::from(*modifier),
separator: String::from(*separator),
value: String::from(*value),
})
);
}
}
}
}
#[test]
fn test_is_one_of() {
let mut l = Lexer::new("Grumpy.");
let dwarves = vec![
"Sneezy", "Doc", "Bashful", "Grumpy", "Happy", "Sleepy", "Dopey",
];
assert!(!l.is_one_of(&dwarves, false, true));
assert_eq!(l.cursor, 0);
assert!(l.is_one_of(&dwarves, false, false));
assert_eq!(l.cursor, 6);
}
#[test]
fn test_is_integer_negative() {
let mut l = Lexer::new("one");
assert_eq!(l.is_integer(), NONE);
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_integer_positive() {
let mut l = Lexer::new("123");
assert_eq!(l.is_integer(), Some(("123".into(), Type::Number)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_integer_trailing_dot() {
let mut l = Lexer::new("123.foo");
assert_eq!(l.is_integer(), Some(("123".into(), Type::Number)));
assert_eq!(l.cursor, 3);
}
#[test]
fn test_is_integer_not_at_start() {
let mut l = Lexer::new("abc.123.foo");
l.cursor = 4;
assert_eq!(l.is_integer(), Some(("123".into(), Type::Number)));
assert_eq!(l.cursor, 7);
}
#[test]
fn test_is_literal_no_match() {
let mut l = Lexer::new("one.two");
assert!(!l.is_literal("zero", false, false));
assert_eq!(l.cursor, 0);
}
#[test]
fn test_is_literal_multi() {
let mut l = Lexer::new("one.two");
assert!(l.is_literal("one", false, false));
assert_eq!(l.cursor, 3);
assert!(l.is_literal(".", false, false));
assert_eq!(l.cursor, 4);
assert!(l.is_literal("two", false, true));
assert_eq!(l.cursor, 7);
}
#[test]
fn test_is_literal_abbrev() {
let mut l = Lexer::new("wonder");
assert!(!l.is_literal("wonderful", false, false));
assert_eq!(l.cursor, 0);
assert!(l.is_literal("wonderful", true, false));
assert_eq!(l.cursor, 6);
}
mod integ {
use super::super::*;
fn lexer_test(input: &str, expected: Vec<(&str, Type)>) {
// isolated case..
let mut lexer = Lexer::new(input);
lexer.add_attribute("due");
lexer.add_attribute("tags");
lexer.add_attribute("description");
let got: Vec<_> = lexer.into_iter().collect();
let got_strs: Vec<_> = got.iter().map(|(s, t)| (s.as_ref(), *t)).collect();
assert_eq!(got_strs, expected);
// embedded case..
let mut lexer = Lexer::new(format!(" {} ", input));
lexer.add_attribute("due");
lexer.add_attribute("tags");
lexer.add_attribute("description");
let got: Vec<_> = lexer.into_iter().collect();
let got_strs: Vec<_> = got.iter().map(|(s, t)| (s.as_ref(), *t)).collect();
assert_eq!(got_strs, expected);
}
#[test]
fn test_pattern_foo() {
lexer_test("/foo/", vec![("/foo/", Type::Pattern)]);
}
#[test]
fn test_pattern_escaped_slash() {
lexer_test("/a\\/b/", vec![("/a\\/b/", Type::Pattern)]);
}
#[test]
fn test_pattern_quote() {
lexer_test("/'/", vec![("/'/", Type::Pattern)]);
}
// Substitution
//
#[test]
fn test_subst_g() {
lexer_test("/from/to/g", vec![("/from/to/g", Type::Substitution)]);
}
#[test]
fn test_subst() {
lexer_test("/from/to/", vec![("/from/to/", Type::Substitution)]);
}
// Tag
//
#[test]
fn test_tag_simple() {
lexer_test("+tag", vec![("+tag", Type::Tag)]);
}
#[test]
fn test_tag_negative() {
lexer_test("-tag", vec![("-tag", Type::Tag)]);
}
#[test]
fn test_tag_at() {
lexer_test("+@tag", vec![("+@tag", Type::Tag)]);
}
// Path
//
#[test]
fn test_path() {
lexer_test(
"/long/path/to/file.txt",
vec![("/long/path/to/file.txt", Type::Path)],
);
}
#[test]
fn test_path_dir() {
lexer_test(
"/long/path/to/dir/",
vec![("/long/path/to/dir/", Type::Path)],
);
}
// Word
//
#[test]
fn test_1_foo_bar() {
lexer_test("1.foo.bar", vec![("1.foo.bar", Type::Word)]);
}
// Identifier
//
#[test]
fn test_foo() {
lexer_test("foo", vec![("foo", Type::Identifier)]);
}
#[test]
fn test_multibyte_ident() {
lexer_test("Çirçös", vec![("Çirçös", Type::Identifier)]);
}
#[test]
fn test_multibyte_nonpunctuation_single_char() {
lexer_test("", vec![("", Type::Identifier)]);
}
#[test]
fn test_name() {
lexer_test("name", vec![("name", Type::Identifier)]);
}
#[test]
fn test_f1() {
lexer_test("f1", vec![("f1", Type::Identifier)]);
}
#[test]
fn test_foo_dot_bar() {
lexer_test("foo.bar", vec![("foo.bar", Type::Identifier)]);
}
#[test]
fn test_long_with_underscore() {
lexer_test(
"a1a1a1a1_a1a1_a1a1_a1a1_a1a1a1a1a1a1",
vec![("a1a1a1a1_a1a1_a1a1_a1a1_a1a1a1a1a1a1", Type::Identifier)],
);
}
// Word that starts wih 'or', which is an operator, but should be ignored.
//
#[test]
fn test_starts_with_or() {
lexer_test("ordinary", vec![("ordinary", Type::Identifier)]);
}
// DOM
//
#[test]
fn test_due() {
lexer_test("due", vec![("due", Type::DOM)]);
}
#[test]
fn test_123_tags() {
lexer_test("123.tags", vec![("123.tags", Type::DOM)]);
}
#[test]
fn test_123_tags_pending() {
lexer_test("123.tags.PENDING", vec![("123.tags.PENDING", Type::DOM)]);
}
#[test]
fn test_123_description() {
lexer_test("123.description", vec![("123.description", Type::DOM)]);
}
#[test]
fn test_123_annotations_count() {
lexer_test(
"123.annotations.count",
vec![("123.annotations.count", Type::DOM)],
);
}
#[test]
fn test_123_annotations_1_description() {
lexer_test(
"123.annotations.1.description",
vec![("123.annotations.1.description", Type::DOM)],
);
}
#[test]
fn test_123_annotations_1_entry() {
lexer_test(
"123.annotations.1.entry",
vec![("123.annotations.1.entry", Type::DOM)],
);
}
#[test]
fn test_123_annotations_1_entry_year() {
lexer_test(
"123.annotations.1.entry.year",
vec![("123.annotations.1.entry.year", Type::DOM)],
);
}
#[test]
fn test_uuid_due() {
lexer_test(
"a360fc44-315c-4366-b70c-ea7e7520b749.due",
vec![("a360fc44-315c-4366-b70c-ea7e7520b749.due", Type::DOM)],
);
}
#[test]
fn test_numeric_uuid_due() {
lexer_test(
"12345678-1234-1234-1234-123456789012.due",
vec![("12345678-1234-1234-1234-123456789012.due", Type::DOM)],
);
}
#[test]
fn test_system_os() {
lexer_test("system.os", vec![("system.os", Type::DOM)]);
}
#[test]
fn test_rc_foo() {
lexer_test("rc.foo", vec![("rc.foo", Type::DOM)]);
}
// URL
//
#[test]
fn test_lexer_31() {
lexer_test(
"http://example.com",
vec![("http://example.com", Type::URL)],
);
}
#[test]
fn test_lexer_32() {
lexer_test(
"https://foo.example.com",
vec![("https://foo.example.com", Type::URL)],
);
}
// String
//
#[test]
fn test_quoted_string() {
lexer_test("'one two'", vec![("'one two'", Type::String)]);
}
#[test]
fn test_double_quoted_string() {
lexer_test("\"three\"", vec![("\"three\"", Type::String)]);
}
#[test]
fn test_string_quoted_with_escapes() {
lexer_test("'\\''", vec![("'''", Type::String)]);
}
#[test]
fn test_string_quoted_quotes() {
lexer_test("\"\\\"\"", vec![("\"\"\"", Type::String)]);
}
#[test]
fn test_quoted_tabs() {
lexer_test("\"\tfoo\t\"", vec![("\"\tfoo\t\"", Type::String)]);
}
#[test]
fn test_multibyte_slash_u() {
lexer_test("\"\\u20A43\"", vec![("\"₤3\"", Type::String)]);
}
#[test]
fn test_multibyte_u_plus() {
lexer_test("\"U+20AC4\"", vec![("\"€4\"", Type::String)]);
}
// Number
//
#[test]
fn test_one() {
lexer_test("1", vec![("1", Type::Number)]);
}
#[test]
fn test_pi() {
lexer_test("3.14", vec![("3.14", Type::Number)]);
}
#[test]
fn test_avogadro() {
lexer_test("6.02217e23", vec![("6.02217e23", Type::Number)]);
}
#[test]
fn test_expo() {
lexer_test("1.2e-3.4", vec![("1.2e-3.4", Type::Number)]);
}
#[test]
fn test_hex() {
lexer_test("0x2f", vec![("0x2f", Type::Hex)]);
}
// Set (1,2,4-7,9)
//
#[test]
fn test_set_pair() {
lexer_test("1,2", vec![("1,2", Type::Set)]);
}
#[test]
fn test_set_range() {
lexer_test("1-2", vec![("1-2", Type::Set)]);
}
#[test]
fn test_set_range_pair() {
lexer_test("1-2,4", vec![("1-2,4", Type::Set)]);
}
#[test]
fn test_set_range_pair_ws() {
lexer_test("1-2,4 ", vec![("1-2,4", Type::Set)]);
}
#[test]
fn test_set_range_pair_paren() {
lexer_test("1-2,4(", vec![("1-2,4", Type::Set), ("(", Type::Op)]);
}
#[test]
fn test_ranges_and_singletons() {
lexer_test("1-2,4,6-8", vec![("1-2,4,6-8", Type::Set)]);
}
#[test]
fn test_set_more_ranges_and_singletons() {
lexer_test("1-2,4,6-8,10-12", vec![("1-2,4,6-8,10-12", Type::Set)]);
}
// Pair
//
#[test]
fn test_name_colon_value() {
lexer_test("name:value", vec![("name:value", Type::Pair)]);
}
#[test]
fn test_name_eq_value() {
lexer_test("name=value", vec![("name=value", Type::Pair)]);
}
#[test]
fn test_name_colon_eq_value() {
lexer_test("name:=value", vec![("name:=value", Type::Pair)]);
}
#[test]
fn test_name_dot_mod_colon_value() {
lexer_test("name.mod:value", vec![("name.mod:value", Type::Pair)]);
}
#[test]
fn test_name_dot_mod_eq_value() {
lexer_test("name.mod=value", vec![("name.mod=value", Type::Pair)]);
}
#[test]
fn test_name_colon() {
lexer_test("name:", vec![("name:", Type::Pair)]);
}
#[test]
fn test_name_eq() {
lexer_test("name=", vec![("name=", Type::Pair)]);
}
#[test]
fn test_name_dot_mod_colon() {
lexer_test("name.mod:", vec![("name.mod:", Type::Pair)]);
}
#[test]
fn test_name_dot_mod_equal() {
lexer_test("name.mod=", vec![("name.mod=", Type::Pair)]);
}
#[test]
fn test_pro_quoted() {
lexer_test("pro:'P 1'", vec![("pro:'P 1'", Type::Pair)]);
}
#[test]
fn test_rc_colon_x() {
lexer_test("rc:x", vec![("rc:x", Type::Pair)]);
}
#[test]
fn test_rc_dot_name_colon_value() {
lexer_test("rc.name:value", vec![("rc.name:value", Type::Pair)]);
}
#[test]
fn test_rc_dot_name_eq_value() {
lexer_test("rc.name=value", vec![("rc.name=value", Type::Pair)]);
}
#[test]
fn test_rc_dot_name_colon_eq_value() {
lexer_test("rc.name:=value", vec![("rc.name:=value", Type::Pair)]);
}
#[test]
fn test_due_colon_eq_quoted() {
lexer_test("due:='eow - 2d'", vec![("due:='eow - 2d'", Type::Pair)]);
}
#[test]
fn test_name_colon_quoted_with_newline() {
lexer_test("name:'foo\nbar'", vec![("name:'foo\nbar'", Type::Pair)]);
}
// Operator - complete set
//
#[test]
fn test_caret() {
lexer_test("^", vec![("^", Type::Op)]);
}
#[test]
fn test_bang() {
lexer_test("!", vec![("!", Type::Op)]);
}
#[test]
fn test_neg() {
lexer_test("_neg_", vec![("_neg_", Type::Op)]);
}
#[test]
fn test_pos() {
lexer_test("_pos_", vec![("_pos_", Type::Op)]);
}
#[test]
fn test_hastag() {
lexer_test("_hastag_", vec![("_hastag_", Type::Op)]);
}
#[test]
fn test_notag() {
lexer_test("_notag_", vec![("_notag_", Type::Op)]);
}
#[test]
fn test_star() {
lexer_test("*", vec![("*", Type::Op)]);
}
#[test]
fn test_slash() {
lexer_test("/", vec![("/", Type::Op)]);
}
#[test]
fn test_percent() {
lexer_test("%", vec![("%", Type::Op)]);
}
#[test]
fn test_plus() {
lexer_test("+", vec![("+", Type::Op)]);
}
#[test]
fn test_minus() {
lexer_test("-", vec![("-", Type::Op)]);
}
#[test]
fn test_leq() {
lexer_test("<=", vec![("<=", Type::Op)]);
}
#[test]
fn test_geq() {
lexer_test(">=", vec![(">=", Type::Op)]);
}
#[test]
fn test_gt() {
lexer_test(">", vec![(">", Type::Op)]);
}
#[test]
fn test_lt() {
lexer_test("<", vec![("<", Type::Op)]);
}
#[test]
fn test_eq() {
lexer_test("=", vec![("=", Type::Op)]);
}
#[test]
fn test_double_eq() {
lexer_test("==", vec![("==", Type::Op)]);
}
#[test]
fn test_not_eq() {
lexer_test("!=", vec![("!=", Type::Op)]);
}
#[test]
fn test_not_double_eq() {
lexer_test("!==", vec![("!==", Type::Op)]);
}
#[test]
fn test_tilde() {
lexer_test("~", vec![("~", Type::Op)]);
}
#[test]
fn test_not_tilde() {
lexer_test("!~", vec![("!~", Type::Op)]);
}
#[test]
fn test_and() {
lexer_test("and", vec![("and", Type::Op)]);
}
#[test]
fn test_or() {
lexer_test("or", vec![("or", Type::Op)]);
}
#[test]
fn test_xor() {
lexer_test("xor", vec![("xor", Type::Op)]);
}
#[test]
fn test_lparen() {
lexer_test("(", vec![("(", Type::Op)]);
}
#[test]
fn test_rparen() {
lexer_test(")", vec![(")", Type::Op)]);
}
// UUID
//
#[test]
fn test_uuid_ffs() {
lexer_test(
"ffffffff-ffff-ffff-ffff-ffffffffffff",
vec![("ffffffff-ffff-ffff-ffff-ffffffffffff", Type::Uuid)],
);
}
#[test]
fn test_uuid_00s() {
lexer_test(
"00000000-0000-0000-0000-0000000",
vec![("00000000-0000-0000-0000-0000000", Type::Uuid)],
);
}
#[test]
fn test_uuid_shorter() {
lexer_test(
"00000000-0000-0000-0000",
vec![("00000000-0000-0000-0000", Type::Uuid)],
);
}
#[test]
fn test_uuid_shorter_still() {
lexer_test(
"00000000-0000-0000",
vec![("00000000-0000-0000", Type::Uuid)],
);
}
#[test]
fn test_uuid_even_shorter() {
lexer_test("00000000-0000", vec![("00000000-0000", Type::Uuid)]);
}
#[test]
fn test_uuid_only_first_bit() {
lexer_test("00000000", vec![("00000000", Type::Uuid)]);
}
#[test]
fn test_real_uuid() {
lexer_test(
"a360fc44-315c-4366-b70c-ea7e7520b749",
vec![("a360fc44-315c-4366-b70c-ea7e7520b749", Type::Uuid)],
);
}
#[test]
fn test_real_uuid_shorter() {
lexer_test(
"a360fc44-315c-4366-b70c-ea7e752",
vec![("a360fc44-315c-4366-b70c-ea7e752", Type::Uuid)],
);
}
#[test]
fn test_real_uuid_shorter_still() {
lexer_test(
"a360fc44-315c-4366-b70c",
vec![("a360fc44-315c-4366-b70c", Type::Uuid)],
);
}
#[test]
fn test_real_uuid_even_shorter() {
lexer_test(
"a360fc44-315c-4366",
vec![("a360fc44-315c-4366", Type::Uuid)],
);
}
#[test]
fn test_real_uuid_naming_is_hard() {
lexer_test("a360fc44-315c", vec![("a360fc44-315c", Type::Uuid)]);
}
#[test]
fn test_real_uuid_only_first_bit() {
lexer_test("a360fc44", vec![("a360fc44", Type::Uuid)]);
}
// Date
//
#[test]
fn test_year_week() {
lexer_test("2015-W01", vec![("2015-W01", Type::Date)]);
}
#[test]
fn test_year_month_day() {
lexer_test("2015-02-17", vec![("2015-02-17", Type::Date)]);
}
#[test]
fn test_timestamp() {
lexer_test(
"2013-11-29T22:58:00Z",
vec![("2013-11-29T22:58:00Z", Type::Date)],
);
}
#[test]
fn test_abbrev_timestamp() {
lexer_test("20131129T225800Z", vec![("20131129T225800Z", Type::Date)]);
}
#[test]
fn test_9thn() {
lexer_test("9th", vec![("9th", Type::Date)]);
}
#[test]
fn test_10th() {
lexer_test("10th", vec![("10th", Type::Date)]);
}
#[test]
fn test_today() {
lexer_test("today", vec![("today", Type::Date)]);
}
// Duration
//
#[test]
fn test_year() {
lexer_test("year", vec![("year", Type::Duration)]);
}
#[test]
fn test_4weeks() {
lexer_test("4weeks", vec![("4weeks", Type::Duration)]);
}
#[test]
fn test_pt23h() {
lexer_test("PT23H", vec![("PT23H", Type::Duration)]);
}
#[test]
fn test_1second() {
lexer_test("1second", vec![("1second", Type::Duration)]);
}
#[test]
fn test_1s() {
lexer_test("1s", vec![("1s", Type::Duration)]);
}
#[test]
fn test_1minute() {
lexer_test("1minute", vec![("1minute", Type::Duration)]);
}
#[test]
fn test_2hour() {
lexer_test("2hour", vec![("2hour", Type::Duration)]);
}
#[test]
fn test_3_days() {
lexer_test("3 days", vec![("3 days", Type::Duration)]);
}
#[test]
fn test_4w() {
lexer_test("4w", vec![("4w", Type::Duration)]);
}
#[test]
fn test_5mo() {
lexer_test("5mo", vec![("5mo", Type::Duration)]);
}
#[test]
fn test_6_years() {
lexer_test("6 years", vec![("6 years", Type::Duration)]);
}
#[test]
fn test_p1y() {
lexer_test("P1Y", vec![("P1Y", Type::Duration)]);
}
#[test]
fn test_pt1h() {
lexer_test("PT1H", vec![("PT1H", Type::Duration)]);
}
#[test]
fn test_p_full() {
lexer_test("P1Y1M1DT1H1M1S", vec![("P1Y1M1DT1H1M1S", Type::Duration)]);
}
// Misc
//
#[test]
fn test_separator() {
lexer_test("--", vec![("--", Type::Separator)]);
}
#[test]
fn test_separator_ws() {
lexer_test(" -- ", vec![("--", Type::Separator)]);
}
#[test]
fn test_separator_boundaries() {
lexer_test(
"123--123 ",
vec![
("123", Type::Number),
("--", Type::Separator),
("123", Type::Number),
],
);
}
// Expression
// due:eom-2w
// due < eom + 1w + 1d
// ( /pattern/ or 8ad2e3db-914d-4832-b0e6-72fa04f6e331,3b6218f9-726a-44fc-aa63-889ff52be442 )
//
#[test]
fn test_expression() {
lexer_test(
"(1+2)",
vec![
("(", Type::Op),
("1", Type::Number),
("+", Type::Op),
("2", Type::Number),
(")", Type::Op),
],
);
}
#[test]
fn test_expression_dom_tilde() {
lexer_test(
"description~pattern",
vec![
("description", Type::DOM),
("~", Type::Op),
("pattern", Type::Identifier),
],
);
}
#[test]
fn test_expression_paren_tag() {
lexer_test(
"(+tag)",
vec![("(", Type::Op), ("+tag", Type::Tag), (")", Type::Op)],
);
}
#[test]
fn test_expression_paren_name_value() {
lexer_test(
"(name:value)",
vec![("(", Type::Op), ("name:value", Type::Pair), (")", Type::Op)],
);
}
}
}