460 lines
11 KiB
Rust
460 lines
11 KiB
Rust
|
mat!(ascii_literal, r"a", "a", Some((0, 1)));
|
||
|
|
||
|
// Some crazy expressions from regular-expressions.info.
|
||
|
mat!(
|
||
|
match_ranges,
|
||
|
r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
|
||
|
"num: 255",
|
||
|
Some((5, 8))
|
||
|
);
|
||
|
mat!(
|
||
|
match_ranges_not,
|
||
|
r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
|
||
|
"num: 256",
|
||
|
None
|
||
|
);
|
||
|
mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
|
||
|
mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
|
||
|
mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
|
||
|
mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
|
||
|
mat!(
|
||
|
match_email,
|
||
|
r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
|
||
|
"mine is jam.slam@gmail.com ",
|
||
|
Some((8, 26))
|
||
|
);
|
||
|
mat!(
|
||
|
match_email_not,
|
||
|
r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
|
||
|
"mine is jam.slam@gmail ",
|
||
|
None
|
||
|
);
|
||
|
mat!(
|
||
|
match_email_big,
|
||
|
r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
|
||
|
"mine is jam.slam@gmail.com ",
|
||
|
Some((8, 26))
|
||
|
);
|
||
|
mat!(
|
||
|
match_date1,
|
||
|
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||
|
"1900-01-01",
|
||
|
Some((0, 10))
|
||
|
);
|
||
|
mat!(
|
||
|
match_date2,
|
||
|
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||
|
"1900-00-01",
|
||
|
None
|
||
|
);
|
||
|
mat!(
|
||
|
match_date3,
|
||
|
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||
|
"1900-13-01",
|
||
|
None
|
||
|
);
|
||
|
|
||
|
// Do some crazy dancing with the start/end assertions.
|
||
|
matiter!(match_start_end_empty, r"^$", "", (0, 0));
|
||
|
matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
|
||
|
matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
|
||
|
matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
|
||
|
matiter!(
|
||
|
match_start_end_empty_rep,
|
||
|
r"(?:^$)*",
|
||
|
"a\nb\nc",
|
||
|
(0, 0),
|
||
|
(1, 1),
|
||
|
(2, 2),
|
||
|
(3, 3),
|
||
|
(4, 4),
|
||
|
(5, 5)
|
||
|
);
|
||
|
matiter!(
|
||
|
match_start_end_empty_rep_rev,
|
||
|
r"(?:$^)*",
|
||
|
"a\nb\nc",
|
||
|
(0, 0),
|
||
|
(1, 1),
|
||
|
(2, 2),
|
||
|
(3, 3),
|
||
|
(4, 4),
|
||
|
(5, 5)
|
||
|
);
|
||
|
|
||
|
// Test negated character classes.
|
||
|
mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
|
||
|
mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
|
||
|
mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
|
||
|
mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
|
||
|
mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
|
||
|
mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
|
||
|
mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
|
||
|
mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
|
||
|
|
||
|
// Test that repeated empty expressions don't loop forever.
|
||
|
mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
|
||
|
mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
|
||
|
mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
|
||
|
mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
|
||
|
mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
|
||
|
mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
|
||
|
mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
|
||
|
mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
|
||
|
mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
|
||
|
mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
|
||
|
mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
|
||
|
mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
|
||
|
|
||
|
// Test that we handle various flavors of empty expressions.
|
||
|
matiter!(match_empty1, r"", "", (0, 0));
|
||
|
matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
|
||
|
matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
|
||
|
matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
|
||
|
matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
|
||
|
matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
|
||
|
|
||
|
// Test that the DFA can handle pathological cases.
|
||
|
// (This should result in the DFA's cache being flushed too frequently, which
|
||
|
// should cause it to quit and fall back to the NFA algorithm.)
|
||
|
#[test]
|
||
|
fn dfa_handles_pathological_case() {
|
||
|
fn ones_and_zeroes(count: usize) -> String {
|
||
|
use rand::rngs::SmallRng;
|
||
|
use rand::{Rng, SeedableRng};
|
||
|
|
||
|
let mut rng = SmallRng::from_entropy();
|
||
|
let mut s = String::new();
|
||
|
for _ in 0..count {
|
||
|
if rng.gen() {
|
||
|
s.push('1');
|
||
|
} else {
|
||
|
s.push('0');
|
||
|
}
|
||
|
}
|
||
|
s
|
||
|
}
|
||
|
|
||
|
let re = regex!(r"[01]*1[01]{20}$");
|
||
|
let text = {
|
||
|
let mut pieces = ones_and_zeroes(100_000);
|
||
|
pieces.push('1');
|
||
|
pieces.push_str(&ones_and_zeroes(20));
|
||
|
pieces
|
||
|
};
|
||
|
assert!(re.is_match(text!(&*text)));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn nest_limit_makes_it_parse() {
|
||
|
use regex::RegexBuilder;
|
||
|
|
||
|
RegexBuilder::new(
|
||
|
r#"(?-u)
|
||
|
2(?:
|
||
|
[45]\d{3}|
|
||
|
7(?:
|
||
|
1[0-267]|
|
||
|
2[0-289]|
|
||
|
3[0-29]|
|
||
|
4[01]|
|
||
|
5[1-3]|
|
||
|
6[013]|
|
||
|
7[0178]|
|
||
|
91
|
||
|
)|
|
||
|
8(?:
|
||
|
0[125]|
|
||
|
[139][1-6]|
|
||
|
2[0157-9]|
|
||
|
41|
|
||
|
6[1-35]|
|
||
|
7[1-5]|
|
||
|
8[1-8]|
|
||
|
90
|
||
|
)|
|
||
|
9(?:
|
||
|
0[0-2]|
|
||
|
1[0-4]|
|
||
|
2[568]|
|
||
|
3[3-6]|
|
||
|
5[5-7]|
|
||
|
6[0167]|
|
||
|
7[15]|
|
||
|
8[0146-9]
|
||
|
)
|
||
|
)\d{4}|
|
||
|
3(?:
|
||
|
12?[5-7]\d{2}|
|
||
|
0(?:
|
||
|
2(?:
|
||
|
[025-79]\d|
|
||
|
[348]\d{1,2}
|
||
|
)|
|
||
|
3(?:
|
||
|
[2-4]\d|
|
||
|
[56]\d?
|
||
|
)
|
||
|
)|
|
||
|
2(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
[12]\d|
|
||
|
[35]\d{1,2}|
|
||
|
4\d?
|
||
|
)
|
||
|
)|
|
||
|
3(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
[2356]\d|
|
||
|
4\d{1,2}
|
||
|
)
|
||
|
)|
|
||
|
4(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
2\d{1,2}|
|
||
|
[47]|
|
||
|
5\d{2}
|
||
|
)
|
||
|
)|
|
||
|
5(?:
|
||
|
1\d{2}|
|
||
|
29
|
||
|
)|
|
||
|
[67]1\d{2}|
|
||
|
8(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
2\d{2}|
|
||
|
3|
|
||
|
4\d
|
||
|
)
|
||
|
)
|
||
|
)\d{3}|
|
||
|
4(?:
|
||
|
0(?:
|
||
|
2(?:
|
||
|
[09]\d|
|
||
|
7
|
||
|
)|
|
||
|
33\d{2}
|
||
|
)|
|
||
|
1\d{3}|
|
||
|
2(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
[25]\d?|
|
||
|
[348]\d|
|
||
|
[67]\d{1,2}
|
||
|
)
|
||
|
)|
|
||
|
3(?:
|
||
|
1\d{2}(?:
|
||
|
\d{2}
|
||
|
)?|
|
||
|
2(?:
|
||
|
[045]\d|
|
||
|
[236-9]\d{1,2}
|
||
|
)|
|
||
|
32\d{2}
|
||
|
)|
|
||
|
4(?:
|
||
|
[18]\d{2}|
|
||
|
2(?:
|
||
|
[2-46]\d{2}|
|
||
|
3
|
||
|
)|
|
||
|
5[25]\d{2}
|
||
|
)|
|
||
|
5(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
3\d|
|
||
|
5
|
||
|
)
|
||
|
)|
|
||
|
6(?:
|
||
|
[18]\d{2}|
|
||
|
2(?:
|
||
|
3(?:
|
||
|
\d{2}
|
||
|
)?|
|
||
|
[46]\d{1,2}|
|
||
|
5\d{2}|
|
||
|
7\d
|
||
|
)|
|
||
|
5(?:
|
||
|
3\d?|
|
||
|
4\d|
|
||
|
[57]\d{1,2}|
|
||
|
6\d{2}|
|
||
|
8
|
||
|
)
|
||
|
)|
|
||
|
71\d{2}|
|
||
|
8(?:
|
||
|
[18]\d{2}|
|
||
|
23\d{2}|
|
||
|
54\d{2}
|
||
|
)|
|
||
|
9(?:
|
||
|
[18]\d{2}|
|
||
|
2[2-5]\d{2}|
|
||
|
53\d{1,2}
|
||
|
)
|
||
|
)\d{3}|
|
||
|
5(?:
|
||
|
02[03489]\d{2}|
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
2(?:
|
||
|
\d{2}
|
||
|
)?|
|
||
|
[457]\d{2}
|
||
|
)
|
||
|
)|
|
||
|
3(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
[37](?:
|
||
|
\d{2}
|
||
|
)?|
|
||
|
[569]\d{2}
|
||
|
)
|
||
|
)|
|
||
|
4(?:
|
||
|
1\d{2}|
|
||
|
2[46]\d{2}
|
||
|
)|
|
||
|
5(?:
|
||
|
1\d{2}|
|
||
|
26\d{1,2}
|
||
|
)|
|
||
|
6(?:
|
||
|
[18]\d{2}|
|
||
|
2|
|
||
|
53\d{2}
|
||
|
)|
|
||
|
7(?:
|
||
|
1|
|
||
|
24
|
||
|
)\d{2}|
|
||
|
8(?:
|
||
|
1|
|
||
|
26
|
||
|
)\d{2}|
|
||
|
91\d{2}
|
||
|
)\d{3}|
|
||
|
6(?:
|
||
|
0(?:
|
||
|
1\d{2}|
|
||
|
2(?:
|
||
|
3\d{2}|
|
||
|
4\d{1,2}
|
||
|
)
|
||
|
)|
|
||
|
2(?:
|
||
|
2[2-5]\d{2}|
|
||
|
5(?:
|
||
|
[3-5]\d{2}|
|
||
|
7
|
||
|
)|
|
||
|
8\d{2}
|
||
|
)|
|
||
|
3(?:
|
||
|
1|
|
||
|
2[3478]
|
||
|
)\d{2}|
|
||
|
4(?:
|
||
|
1|
|
||
|
2[34]
|
||
|
)\d{2}|
|
||
|
5(?:
|
||
|
1|
|
||
|
2[47]
|
||
|
)\d{2}|
|
||
|
6(?:
|
||
|
[18]\d{2}|
|
||
|
6(?:
|
||
|
2(?:
|
||
|
2\d|
|
||
|
[34]\d{2}
|
||
|
)|
|
||
|
5(?:
|
||
|
[24]\d{2}|
|
||
|
3\d|
|
||
|
5\d{1,2}
|
||
|
)
|
||
|
)
|
||
|
)|
|
||
|
72[2-5]\d{2}|
|
||
|
8(?:
|
||
|
1\d{2}|
|
||
|
2[2-5]\d{2}
|
||
|
)|
|
||
|
9(?:
|
||
|
1\d{2}|
|
||
|
2[2-6]\d{2}
|
||
|
)
|
||
|
)\d{3}|
|
||
|
7(?:
|
||
|
(?:
|
||
|
02|
|
||
|
[3-589]1|
|
||
|
6[12]|
|
||
|
72[24]
|
||
|
)\d{2}|
|
||
|
21\d{3}|
|
||
|
32
|
||
|
)\d{3}|
|
||
|
8(?:
|
||
|
(?:
|
||
|
4[12]|
|
||
|
[5-7]2|
|
||
|
1\d?
|
||
|
)|
|
||
|
(?:
|
||
|
0|
|
||
|
3[12]|
|
||
|
[5-7]1|
|
||
|
217
|
||
|
)\d
|
||
|
)\d{4}|
|
||
|
9(?:
|
||
|
[35]1|
|
||
|
(?:
|
||
|
[024]2|
|
||
|
81
|
||
|
)\d|
|
||
|
(?:
|
||
|
1|
|
||
|
[24]1
|
||
|
)\d{2}
|
||
|
)\d{3}
|
||
|
"#,
|
||
|
)
|
||
|
.build()
|
||
|
.unwrap();
|
||
|
}
|