更新libclamav库1.0.0版本

This commit is contained in:
2023-01-14 18:28:39 +08:00
parent b879ee0b2e
commit 45fe15f472
8531 changed files with 1222046 additions and 177272 deletions

View File

@@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"ee78c17c45b2cb5d50f8631a5e870e59821adc6e62d79562dc61a2f4a3e4a259","Cargo.lock":"b4d8b65e499f30eec76de02be42e8ee4c905b04286f0d7496a14880c6ec562c9","Cargo.toml":"f5b5886c898f5137c388dc46920aeb26700583eeab6857168b374c506636b7d5","LICENSE":"4dbda04344456f09a7a588140455413a9ac59b6b26a1ef7cdf9c800c012d87f0","README.md":"7387399dad4d26aee009ee73d9c171c2fce8a1b015935d263c1d1fc9b12fd61f","doc/nom_recipes.md":"a903a6d8f9e5c935f2a4cbd632f67bc46e41934d1cc8517da9b9e7f3840c9955","src/bits/complete.rs":"cf30854f6e9ed79963408d73c674f8fb5871645c1f991f4e8f1d7026e39681a3","src/bits/mod.rs":"7050e20abffc3d28a41bd1aa3ca3000fb3ee1cb6227662d2fd864b3c9143dd96","src/bits/streaming.rs":"a1ee304ab8c388553927c5c75cf839c6ded397301334bfe2e6653baec233a8fe","src/branch/mod.rs":"78b98e223d5a8104ba203dfc2d78f37d1c11bd050a19859bd3f698653512e642","src/branch/tests.rs":"9a4a7b0c38fc28881d904f8ad6757a23e543f47a4c2d6fd6a9589eeb97209088","src/bytes/complete.rs":"666fa037c63292b1616cbc04c5b414a53c705d0d2ccd8d84399bbe78f573b7e9","src/bytes/mod.rs":"055e264f71a9fa2245be652cc747cfb2c3e34c3c2ba3b75e9611be51fcebea0b","src/bytes/streaming.rs":"e716e6555fbde14bfc2d7358a3edc2191df0522bc55b1f7735f9809ceb235829","src/bytes/tests.rs":"f0d9eb90d72873346e47e5592d30093eb38cbbb5fbf2e769cda776ccfff4f887","src/character/complete.rs":"cf4784f5ecb578ccce4768d44d0a6111a92c18cba874acbbc8ccf935bccc8cd6","src/character/mod.rs":"2fc6a3b19b766a3c37328d62eedbc0c9cb9612aa1d38ececd5cc579b61725fa2","src/character/streaming.rs":"de67ec5663475bc5ffa16f12d121ce9181353b16656b90216704197fca3010fc","src/character/tests.rs":"38958a709f96f077f93a72b32d8ded0a2ad6e488d9aadbe3cf1cfd8adaec06c8","src/combinator/mod.rs":"976a7f19fad4a6685eac9ef77e50f253d59272b0774b9dd5ed74a9d011e49697","src/combinator/tests.rs":"1e56e2c1263d93bfbd244d24160a0bea41731e5158d57382e69c215427770b94","src/error.rs":"9d9bf87e76b47cfd9170f8ae50b6deeb02ff1c296aac3eb4f71ee1474dc0fba5","src/internal.rs":"0614a179d7ae01c9e84812b9a3a379ac6d014972718045f84b91a72f2d18ac01","src/lib.rs":"b20780277606306985201eb9a55d01e022f8905c3f829357b214a5bb4a4e3c9e","src/multi/mod.rs":"e75721268620f20b63d9b10fa56bf74da1b66730c70ec30fd05a913e62197680","src/multi/tests.rs":"806f89f5f347978c22e9b8cc7f8a49ad1d1fe23bff5974725b643a2ceffe8cb0","src/number/complete.rs":"2b9f339b8648884ddb0dc449be861477283d5422c17ca4b69c377cd6d12ce21b","src/number/mod.rs":"ba6eb439ee0befcc0872be7ce43b4836622af45c3dc2fc003b0d909ee42d7b20","src/number/streaming.rs":"bd0e382ad150001da5ef1d5c7f003797b3468d56fa0f577d6d31aa0259157766","src/sequence/mod.rs":"adfa5bc34db12deb9581fbc02a0ecc3e2a16a7bb0d1a65696fcec0262d3cbbff","src/sequence/tests.rs":"832d44097b79056879eca93ef10655143bc8caf6fac6364ff7ff153d3357e4ad","src/str.rs":"f26aa11f43e8a4a300ea0f310d599fab3f809102cfb29033ddf84f299ee8010c","src/traits.rs":"cc8cb6ec294abce16be82c10131552bb37deae56b893db1bdb0e666c0b0a18ff","tests/arithmetic.rs":"725efba4fc6cc811f542f3bcc8c7afd52702a66f64319d4f2796225e2e75d0ca","tests/arithmetic_ast.rs":"c7c28c988640405dd250c86045bbda75fc6ead2a769fb05eafbfbe74d97e0485","tests/css.rs":"36a2198e42e601efc611ebd6b3c6861f3ccb6a63525829ae6a2603bcdc4c2b11","tests/custom_errors.rs":"354d5a82a4f5a24b97901a3b411b4eab038c4d034047971956b9cdc12538e50d","tests/escaped.rs":"c25987ea6d9a7dde74d58a49c332d223da3a495569cb79e3fe921bce51729ead","tests/float.rs":"cdac92fb14afb75cba9d6b8f568e272a630b2cfb9f096b76c91909a3cd016869","tests/fnmut.rs":"dc9b6140eb3405d1497b05675fc4d3050785771a2afa81990d684b2edd0c9746","tests/ini.rs":"f0ce38b90057e9e0fd2329819395c420cbf1400457f9c4279414301faa38b19c","tests/ini_str.rs":"4c8f6ce3a2a245e8365837b873c25d2d8f24887313b791e2edd09a76a2d98947","tests/issues.rs":"7c348420ed836c0c79bbb4cd18e936b38f6f75d5f0a3122faee13ef3005345a4","tests/json.rs":"8672fca70b889d6243a2f0f4c99389e22200e4363f253e83a3f26620b92f765e","tests/mp4.rs":"db6568ee9ccad70a7b567295831b961b369011f66dc2dd406851208007588600","tests/multiline.rs":"aef9768beaf5042b8629599b2094712646abb23eb11fa662b5a9bf3dfa432547","tests/overflow.rs":"a249ebeebfc5228faf9bfd5241a54a8181df476c4699ef87bb7d8a2161b9fc72","tests/reborrow_fold.rs":"66230bacd8d36e1559f1dc919ae8eab3515963c4aef85a079ec56218c9a6e676"},"package":"a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"}

File diff suppressed because it is too large Load Diff

282
clamav/libclamav_rust/.cargo/vendor/nom/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,282 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bit-set"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.1"
dependencies = [
"doc-comment",
"memchr",
"minimal-lexical",
"proptest",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "ppv-lite86"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "proptest"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5"
dependencies = [
"bit-set",
"bitflags",
"byteorder",
"lazy_static",
"num-traits",
"quick-error 2.0.1",
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax",
"rusty-fork",
"tempfile",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quick-error"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "rand"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_hc",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_hc"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_xorshift"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
dependencies = [
"rand_core",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "rusty-fork"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
dependencies = [
"fnv",
"quick-error 1.2.3",
"tempfile",
"wait-timeout",
]
[[package]]
name = "tempfile"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22"
dependencies = [
"cfg-if",
"libc",
"rand",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]]
name = "wait-timeout"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
dependencies = [
"libc",
]
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -0,0 +1,129 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
rust-version = "1.48"
name = "nom"
version = "7.1.1"
authors = ["contact@geoffroycouprie.com"]
include = ["CHANGELOG.md", "LICENSE", "README.md", ".gitignore", "Cargo.toml", "src/*.rs", "src/*/*.rs", "tests/*.rs", "doc/nom_recipes.md"]
autoexamples = false
description = "A byte-oriented, zero-copy, parser combinators library"
documentation = "https://docs.rs/nom"
readme = "README.md"
keywords = ["parser", "parser-combinators", "parsing", "streaming", "bit"]
categories = ["parsing"]
license = "MIT"
repository = "https://github.com/Geal/nom"
[package.metadata.docs.rs]
all-features = true
features = ["alloc", "std", "docsrs"]
[profile.bench]
lto = true
codegen-units = 1
debug = true
[[example]]
name = "json"
path = "examples/json.rs"
required-features = ["alloc"]
[[example]]
name = "iterator"
path = "examples/iterator.rs"
[[example]]
name = "s_expression"
path = "examples/s_expression.rs"
required-features = ["alloc"]
[[example]]
name = "string"
path = "examples/string.rs"
required-features = ["alloc"]
[[test]]
name = "arithmetic"
[[test]]
name = "arithmetic_ast"
required-features = ["alloc"]
[[test]]
name = "css"
[[test]]
name = "custom_errors"
[[test]]
name = "float"
[[test]]
name = "ini"
required-features = ["alloc"]
[[test]]
name = "ini_str"
required-features = ["alloc"]
[[test]]
name = "issues"
required-features = ["alloc"]
[[test]]
name = "json"
[[test]]
name = "mp4"
required-features = ["alloc"]
[[test]]
name = "multiline"
required-features = ["alloc"]
[[test]]
name = "overflow"
[[test]]
name = "reborrow_fold"
[[test]]
name = "fnmut"
required-features = ["alloc"]
[dependencies.memchr]
version = "2.3"
default-features = false
[dependencies.minimal-lexical]
version = "0.2.0"
default-features = false
[dev-dependencies.doc-comment]
version = "0.3"
[dev-dependencies.proptest]
version = "1.0.0"
[features]
alloc = []
default = ["std"]
docsrs = []
std = ["alloc", "memchr/std", "minimal-lexical/std"]
[badges.coveralls]
branch = "main"
repository = "Geal/nom"
service = "github"
[badges.maintenance]
status = "actively-developed"
[badges.travis-ci]
repository = "Geal/nom"

View File

@@ -0,0 +1,20 @@
Copyright (c) 2014-2019 Geoffroy Couprie
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,311 @@
# nom, eating data byte by byte
[![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
[![Join the chat at https://gitter.im/Geal/nom](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Geal/nom?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Build Status](https://github.com/Geal/nom/actions/workflows/ci.yml/badge.svg)](https://github.com/Geal/nom/actions/workflows/ci.yml)
[![Coverage Status](https://coveralls.io/repos/github/Geal/nom/badge.svg?branch=main)](https://coveralls.io/github/Geal/nom?branch=main)
[![Crates.io Version](https://img.shields.io/crates/v/nom.svg)](https://crates.io/crates/nom)
[![Minimum rustc version](https://img.shields.io/badge/rustc-1.48.0+-lightgray.svg)](#rust-version-requirements-msrv)
nom is a parser combinators library written in Rust. Its goal is to provide tools
to build safe parsers without compromising the speed or memory consumption. To
that end, it uses extensively Rust's *strong typing* and *memory safety* to produce
fast and correct parsers, and provides functions, macros and traits to abstract most of the
error prone plumbing.
![nom logo in CC0 license, by Ange Albertini](https://raw.githubusercontent.com/Geal/nom/main/assets/nom.png)
*nom will happily take a byte out of your files :)*
## Example
[Hexadecimal color](https://developer.mozilla.org/en-US/docs/Web/CSS/color) parser:
```rust
extern crate nom;
use nom::{
IResult,
bytes::complete::{tag, take_while_m_n},
combinator::map_res,
sequence::tuple
};
#[derive(Debug,PartialEq)]
pub struct Color {
pub red: u8,
pub green: u8,
pub blue: u8,
}
fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
u8::from_str_radix(input, 16)
}
fn is_hex_digit(c: char) -> bool {
c.is_digit(16)
}
fn hex_primary(input: &str) -> IResult<&str, u8> {
map_res(
take_while_m_n(2, 2, is_hex_digit),
from_hex
)(input)
}
fn hex_color(input: &str) -> IResult<&str, Color> {
let (input, _) = tag("#")(input)?;
let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?;
Ok((input, Color { red, green, blue }))
}
fn main() {}
#[test]
fn parse_color() {
assert_eq!(hex_color("#2F14DF"), Ok(("", Color {
red: 47,
green: 20,
blue: 223,
})));
}
```
## Documentation
- [Reference documentation](https://docs.rs/nom)
- [Various design documents and tutorials](https://github.com/Geal/nom/tree/main/doc)
- [List of combinators and their behaviour](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)
If you need any help developing your parsers, please ping `geal` on IRC (libera, geeknode, oftc), go to `#nom-parsers` on Libera IRC, or on the [Gitter chat room](https://gitter.im/Geal/nom).
## Why use nom
If you want to write:
### Binary format parsers
nom was designed to properly parse binary formats from the beginning. Compared
to the usual handwritten C parsers, nom parsers are just as fast, free from
buffer overflow vulnerabilities, and handle common patterns for you:
- [TLV](https://en.wikipedia.org/wiki/Type-length-value)
- Bit level parsing
- Hexadecimal viewer in the debugging macros for easy data analysis
- Streaming parsers for network formats and huge files
Example projects:
- [FLV parser](https://github.com/rust-av/flavors)
- [Matroska parser](https://github.com/rust-av/matroska)
- [tar parser](https://github.com/Keruspe/tar-parser.rs)
### Text format parsers
While nom was made for binary format at first, it soon grew to work just as
well with text formats. From line based formats like CSV, to more complex, nested
formats such as JSON, nom can manage it, and provides you with useful tools:
- Fast case insensitive comparison
- Recognizers for escaped strings
- Regular expressions can be embedded in nom parsers to represent complex character patterns succinctly
- Special care has been given to managing non ASCII characters properly
Example projects:
- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser)
- [TOML parser](https://github.com/joelself/tomllib)
### Programming language parsers
While programming language parsers are usually written manually for more
flexibility and performance, nom can be (and has been successfully) used
as a prototyping parser for a language.
nom will get you started quickly with powerful custom error types, that you
can leverage with [nom_locate](https://github.com/fflorent/nom_locate) to
pinpoint the exact line and column of the error. No need for separate
tokenizing, lexing and parsing phases: nom can automatically handle whitespace
parsing, and construct an AST in place.
Example projects:
- [PHP VM](https://github.com/tagua-vm/parser)
- eve language prototype
- [xshade shading language](https://github.com/xshade-lang/xshade/)
### Streaming formats
While a lot of formats (and the code handling them) assume that they can fit
the complete data in memory, there are formats for which we only get a part
of the data at once, like network formats, or huge files.
nom has been designed for a correct behaviour with partial data: If there is
not enough data to decide, nom will tell you it needs more instead of silently
returning a wrong result. Whether your data comes entirely or in chunks, the
result should be the same.
It allows you to build powerful, deterministic state machines for your protocols.
Example projects:
- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser)
- [Using nom with generators](https://github.com/Geal/generator_nom)
## Parser combinators
Parser combinators are an approach to parsers that is very different from
software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
[yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
in a separate file and generating the corresponding code, you use very
small functions with very specific purpose, like "take 5 bytes", or
"recognize the word 'HTTP'", and assemble them in meaningful patterns
like "recognize 'HTTP', then a space, then a version".
The resulting code is small, and looks like the grammar you would have
written with other parser approaches.
This has a few advantages:
- The parsers are small and easy to write
- The parsers components are easy to reuse (if they're general enough, please add them to nom!)
- The parsers components are easy to test separately (unit tests and property-based tests)
- The parser combination code looks close to the grammar you would have written
- You can build partial parsers, specific to the data you need at the moment, and ignore the rest
## Technical features
nom parsers are for:
- [x] **byte-oriented**: The basic type is `&[u8]` and parsers will work as much as possible on byte array slices (but are not limited to them)
- [x] **bit-oriented**: nom can address a byte slice as a bit stream
- [x] **string-oriented**: The same kind of combinators can apply on UTF-8 strings as well
- [x] **zero-copy**: If a parser returns a subset of its input data, it will return a slice of that input, without copying
- [x] **streaming**: nom can work on partial data and detect when it needs more data to produce a correct result
- [x] **descriptive errors**: The parsers can aggregate a list of error codes with pointers to the incriminated input slice. Those error lists can be pattern matched to provide useful messages.
- [x] **custom error types**: You can provide a specific type to improve errors returned by parsers
- [x] **safe parsing**: nom leverages Rust's safe memory handling and powerful types, and parsers are routinely fuzzed and tested with real world data. So far, the only flaws found by fuzzing were in code written outside of nom
- [x] **speed**: Benchmarks have shown that nom parsers often outperform many parser combinators library like Parsec and attoparsec, some regular expression engines and even handwritten C parsers
Some benchmarks are available on [Github](https://github.com/Geal/nom_benchmarks).
## Rust version requirements (MSRV)
The 7.0 series of nom supports **Rustc version 1.48 or greater**. It is known to work properly on Rust 1.41.1 but there is no guarantee it will stay the case through this major release.
The current policy is that this will only be updated in the next major nom release.
## Installation
nom is available on [crates.io](https://crates.io/crates/nom) and can be included in your Cargo enabled project like this:
```toml
[dependencies]
nom = "7"
```
There are a few compilation features:
* `alloc`: (activated by default) if disabled, nom can work in `no_std` builds without memory allocators. If enabled, combinators that allocate (like `many0`) will be available
* `std`: (activated by default, activates `alloc` too) if disabled, nom can work in `no_std` builds
You can configure those features like this:
```toml
[dependencies.nom]
version = "7"
default-features = false
features = ["alloc"]
```
# Related projects
- [Get line and column info in nom's input type](https://github.com/fflorent/nom_locate)
- [Using nom as lexer and parser](https://github.com/Rydgel/monkey-rust)
# Parsers written with nom
Here is a (non exhaustive) list of known projects using nom:
- Text file formats: [Ceph Crush](https://github.com/cholcombe973/crushtool),
[Cronenberg](https://github.com/ayrat555/cronenberg),
[XFS Runtime Stats](https://github.com/ChrisMacNaughton/xfs-rs),
[CSV](https://github.com/GuillaumeGomez/csv-parser),
[FASTA](https://github.com/TianyiShi2001/nom-fasta),
[FASTQ](https://github.com/elij/fastq.rs),
[INI](https://github.com/Geal/nom/blob/main/tests/ini.rs),
[ISO 8601 dates](https://github.com/badboy/iso8601),
[libconfig-like configuration file format](https://github.com/filipegoncalves/rust-config),
[Web archive](https://github.com/sbeckeriv/warc_nom_parser),
[PDB](https://github.com/TianyiShi2001/nom-pdb),
[proto files](https://github.com/tafia/protobuf-parser),
[Fountain screenplay markup](https://github.com/adamchalmers/fountain-rs),
[vimwiki](https://github.com/chipsenkbeil/vimwiki-server/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-server/tree/master/vimwiki_macros)
- Programming languages:
[PHP](https://github.com/tagua-vm/parser),
[Basic Calculator](https://github.com/balajisivaraman/basic_calculator_rs),
[GLSL](https://github.com/phaazon/glsl),
[Lua](https://github.com/doomrobo/nom-lua53),
[Python](https://github.com/ProgVal/rust-python-parser),
[SQL](https://github.com/ms705/nom-sql),
[Elm](https://github.com/cout970/Elm-interpreter),
[SystemVerilog](https://github.com/dalance/sv-parser),
[Turtle](https://github.com/vandenoever/rome/tree/master/src/io/turtle),
[CSML](https://github.com/CSML-by-Clevy/csml-interpreter),
[Wasm](https://github.com/Strytyp/wasm-nom),
[Pseudocode](https://github.com/Gungy2/pseudocode)
[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch)
- Interface definition formats: [Thrift](https://github.com/thehydroimpulse/thrust)
- Audio, video and image formats:
[GIF](https://github.com/Geal/gif.rs),
[MagicaVoxel .vox](https://github.com/davidedmonds/dot_vox),
[midi](https://github.com/derekdreery/nom-midi-rs),
[SWF](https://github.com/open-flash/swf-parser),
[WAVE](http://github.com/noise-Labs/wave),
[Matroska (MKV)](https://github.com/rust-av/matroska)
- Document formats:
[TAR](https://github.com/Keruspe/tar-parser.rs),
[GZ](https://github.com/nharward/nom-gzip),
[GDSII](https://github.com/erihsu/gds2-io)
- Cryptographic formats:
[X.509](https://github.com/rusticata/x509-parser)
- Network protocol formats:
[Bencode](https://github.com/jbaum98/bencode.rs),
[D-Bus](https://github.com/toshokan/misato),
[DHCP](https://github.com/rusticata/dhcp-parser),
[HTTP](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http),
[URI](https://github.com/santifa/rrp/blob/master/src/uri.rs),
[IMAP](https://github.com/djc/tokio-imap),
[IRC](https://github.com/Detegr/RBot-parser),
[Pcap-NG](https://github.com/richo/pcapng-rs),
[Pcap](https://github.com/ithinuel/pcap-rs),
[Pcap + PcapNG](https://github.com/rusticata/pcap-parser),
[IKEv2](https://github.com/rusticata/ipsec-parser),
[NTP](https://github.com/rusticata/ntp-parser),
[SNMP](https://github.com/rusticata/snmp-parser),
[Kerberos v5](https://github.com/rusticata/kerberos-parser),
[DER](https://github.com/rusticata/der-parser),
[TLS](https://github.com/rusticata/tls-parser),
[IPFIX / Netflow v10](https://github.com/dominotree/rs-ipfix),
[GTP](https://github.com/fuerstenau/gorrosion-gtp),
[SIP](https://github.com/armatusmiles/sipcore/tree/master/crates/sipmsg),
[Prometheus](https://github.com/timberio/vector/blob/master/lib/prometheus-parser/src/line.rs)
- Language specifications:
[BNF](https://github.com/snewt/bnf)
- Misc formats:
[Gameboy ROM](https://github.com/MarkMcCaskey/gameboy-rom-parser),
[ANT FIT](https://github.com/stadelmanma/fitparse-rs),
[Version Numbers](https://github.com/fosskers/rs-versions),
[Telcordia/Bellcore SR-4731 SOR OTDR files](https://github.com/JamesHarrison/otdrs),
[MySQL binary log](https://github.com/PrivateRookie/boxercrab),
[URI](https://github.com/Skasselbard/nom-uri),
[Furigana](https://github.com/sachaarbonel/furigana.rs)
Want to create a new parser using `nom`? A list of not yet implemented formats is available [here](https://github.com/Geal/nom/issues/14).
Want to add your parser here? Create a pull request for it!
# Contributors
nom is the fruit of the work of many contributors over the years, many thanks for your help!
<a href="https://github.com/geal/nom/graphs/contributors">
<img src="https://contributors-img.web.app/image?repo=geal/nom" />
</a>

View File

@@ -0,0 +1,395 @@
# Nom Recipes
These are short recipes for accomplishing common tasks with nom.
* [Whitespace](#whitespace)
+ [Wrapper combinators that eat whitespace before and after a parser](#wrapper-combinators-that-eat-whitespace-before-and-after-a-parser)
* [Comments](#comments)
+ [`// C++/EOL-style comments`](#-ceol-style-comments)
+ [`/* C-style comments */`](#-c-style-comments-)
* [Identifiers](#identifiers)
+ [`Rust-Style Identifiers`](#rust-style-identifiers)
* [Literal Values](#literal-values)
+ [Escaped Strings](#escaped-strings)
+ [Integers](#integers)
- [Hexadecimal](#hexadecimal)
- [Octal](#octal)
- [Binary](#binary)
- [Decimal](#decimal)
+ [Floating Point Numbers](#floating-point-numbers)
## Whitespace
### Wrapper combinators that eat whitespace before and after a parser
```rust
use nom::{
IResult,
error::ParseError,
combinator::value,
sequence::delimited,
character::complete::multispace0,
};
/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
/// trailing whitespace, returning the output of `inner`.
fn ws<'a, F: 'a, O, E: ParseError<&'a str>>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
where
F: Fn(&'a str) -> IResult<&'a str, O, E>,
{
delimited(
multispace0,
inner,
multispace0
)
}
```
To eat only trailing whitespace, replace `delimited(...)` with `terminated(&inner, multispace0)`.
Likewise, the eat only leading whitespace, replace `delimited(...)` with `preceded(multispace0,
&inner)`. You can use your own parser instead of `multispace0` if you want to skip a different set
of lexemes.
## Comments
### `// C++/EOL-style comments`
This version uses `%` to start a comment, does not consume the newline character, and returns an
output of `()`.
```rust
use nom::{
IResult,
error::ParseError,
combinator::value,
sequence::pair,
bytes::complete::is_not,
character::complete::char,
};
pub fn peol_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E>
{
value(
(), // Output is thrown away.
pair(char('%'), is_not("\n\r"))
)(i)
}
```
### `/* C-style comments */`
Inline comments surrounded with sentinel tags `(*` and `*)`. This version returns an output of `()`
and does not handle nested comments.
```rust
use nom::{
IResult,
error::ParseError,
combinator::value,
sequence::tuple,
bytes::complete::{tag, take_until},
};
pub fn pinline_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E> {
value(
(), // Output is thrown away.
tuple((
tag("(*"),
take_until("*)"),
tag("*)")
))
)(i)
}
```
## Identifiers
### `Rust-Style Identifiers`
Parsing identifiers that may start with a letter (or underscore) and may contain underscores,
letters and numbers may be parsed like this:
```rust
use nom::{
IResult,
branch::alt,
multi::many0_count,
combinator::recognize,
sequence::pair,
character::complete::{alpha1, alphanumeric1},
bytes::complete::tag,
};
pub fn identifier(input: &str) -> IResult<&str, &str> {
recognize(
pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_"))))
)
)(input)
}
```
Let's say we apply this to the identifier `hello_world123abc`. The first `alt` parser would
recognize `h`. The `pair` combinator ensures that `ello_world123abc` will be piped to the next
`alphanumeric0` parser, which recognizes every remaining character. However, the `pair` combinator
returns a tuple of the results of its sub-parsers. The `recognize` parser produces a `&str` of the
input text that was parsed, which in this case is the entire `&str` `hello_world123abc`.
## Literal Values
### Escaped Strings
This is [one of the examples](https://github.com/Geal/nom/blob/main/examples/string.rs) in the
examples directory.
### Integers
The following recipes all return string slices rather than integer values. How to obtain an
integer value instead is demonstrated for hexadecimal integers. The others are similar.
The parsers allow the grouping character `_`, which allows one to group the digits by byte, for
example: `0xA4_3F_11_28`. If you prefer to exclude the `_` character, the lambda to convert from a
string slice to an integer value is slightly simpler. You can also strip the `_` from the string
slice that is returned, which is demonstrated in the second hexdecimal number parser.
If you wish to limit the number of digits in a valid integer literal, replace `many1` with
`many_m_n` in the recipes.
#### Hexadecimal
The parser outputs the string slice of the digits without the leading `0x`/`0X`.
```rust
use nom::{
IResult,
branch::alt,
multi::{many0, many1},
combinator::recognize,
sequence::{preceded, terminated},
character::complete::{char, one_of},
bytes::complete::tag,
};
fn hexadecimal(input: &str) -> IResult<&str, &str> { // <'a, E: ParseError<&'a str>>
preceded(
alt((tag("0x"), tag("0X"))),
recognize(
many1(
terminated(one_of("0123456789abcdefABCDEF"), many0(char('_')))
)
)
)(input)
}
```
If you want it to return the integer value instead, use map:
```rust
use nom::{
IResult,
branch::alt,
multi::{many0, many1},
combinator::{map_res, recognize},
sequence::{preceded, terminated},
character::complete::{char, one_of},
bytes::complete::tag,
};
fn hexadecimal_value(input: &str) -> IResult<&str, i64> {
map_res(
preceded(
alt((tag("0x"), tag("0X"))),
recognize(
many1(
terminated(one_of("0123456789abcdefABCDEF"), many0(char('_')))
)
)
),
|out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16)
)(input)
}
```
#### Octal
```rust
use nom::{
IResult,
branch::alt,
multi::{many0, many1},
combinator::recognize,
sequence::{preceded, terminated},
character::complete::{char, one_of},
bytes::complete::tag,
};
fn octal(input: &str) -> IResult<&str, &str> {
preceded(
alt((tag("0o"), tag("0O"))),
recognize(
many1(
terminated(one_of("01234567"), many0(char('_')))
)
)
)(input)
}
```
#### Binary
```rust
use nom::{
IResult,
branch::alt,
multi::{many0, many1},
combinator::recognize,
sequence::{preceded, terminated},
character::complete::{char, one_of},
bytes::complete::tag,
};
fn binary(input: &str) -> IResult<&str, &str> {
preceded(
alt((tag("0b"), tag("0B"))),
recognize(
many1(
terminated(one_of("01"), many0(char('_')))
)
)
)(input)
}
```
#### Decimal
```rust
use nom::{
IResult,
multi::{many0, many1},
combinator::recognize,
sequence::terminated,
character::complete::{char, one_of},
};
fn decimal(input: &str) -> IResult<&str, &str> {
recognize(
many1(
terminated(one_of("0123456789"), many0(char('_')))
)
)(input)
}
```
### Floating Point Numbers
The following is adapted from [the Python parser by Valentin Lorentz (ProgVal)](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs).
```rust
use nom::{
IResult,
branch::alt,
multi::{many0, many1},
combinator::{opt, recognize},
sequence::{preceded, terminated, tuple},
character::complete::{char, one_of},
};
fn float(input: &str) -> IResult<&str, &str> {
alt((
// Case one: .42
recognize(
tuple((
char('.'),
decimal,
opt(tuple((
one_of("eE"),
opt(one_of("+-")),
decimal
)))
))
)
, // Case two: 42e42 and 42.42e42
recognize(
tuple((
decimal,
opt(preceded(
char('.'),
decimal,
)),
one_of("eE"),
opt(one_of("+-")),
decimal
))
)
, // Case three: 42. and 42.42
recognize(
tuple((
decimal,
char('.'),
opt(decimal)
))
)
))(input)
}
fn decimal(input: &str) -> IResult<&str, &str> {
recognize(
many1(
terminated(one_of("0123456789"), many0(char('_')))
)
)(input)
}
```
# implementing FromStr
The [FromStr trait](https://doc.rust-lang.org/std/str/trait.FromStr.html) provides
a common interface to parse from a string.
```rust
use nom::{
IResult, Finish, error::Error,
bytes::complete::{tag, take_while},
};
use std::str::FromStr;
// will recognize the name in "Hello, name!"
fn parse_name(input: &str) -> IResult<&str, &str> {
let (i, _) = tag("Hello, ")(input)?;
let (i, name) = take_while(|c:char| c.is_alphabetic())(i)?;
let (i, _) = tag("!")(i)?;
Ok((i, name))
}
// with FromStr, the result cannot be a reference to the input, it must be owned
#[derive(Debug)]
pub struct Name(pub String);
impl FromStr for Name {
// the error must be owned as well
type Err = Error<String>;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match parse_name(s).finish() {
Ok((_remaining, name)) => Ok(Name(name.to_string())),
Err(Error { input, code }) => Err(Error {
input: input.to_string(),
code,
})
}
}
}
fn main() {
// parsed: Ok(Name("nom"))
println!("parsed: {:?}", "Hello, nom!".parse::<Name>());
// parsed: Err(Error { input: "123!", code: Tag })
println!("parsed: {:?}", "Hello, 123!".parse::<Name>());
}
```

View File

@@ -0,0 +1,150 @@
//! Bit level parsers
//!
use crate::error::{ErrorKind, ParseError};
use crate::internal::{Err, IResult};
use crate::lib::std::ops::{AddAssign, Div, RangeFrom, Shl, Shr};
use crate::traits::{InputIter, InputLength, Slice, ToUsize};
/// Generates a parser taking `count` bits
///
/// # Example
/// ```rust
/// # use nom::bits::complete::take;
/// # use nom::IResult;
/// # use nom::error::{Error, ErrorKind};
/// // Input is a tuple of (input: I, bit_offset: usize)
/// fn parser(input: (&[u8], usize), count: usize)-> IResult<(&[u8], usize), u8> {
/// take(count)(input)
/// }
///
/// // Consumes 0 bits, returns 0
/// assert_eq!(parser(([0b00010010].as_ref(), 0), 0), Ok((([0b00010010].as_ref(), 0), 0)));
///
/// // Consumes 4 bits, returns their values and increase offset to 4
/// assert_eq!(parser(([0b00010010].as_ref(), 0), 4), Ok((([0b00010010].as_ref(), 4), 0b00000001)));
///
/// // Consumes 4 bits, offset is 4, returns their values and increase offset to 0 of next byte
/// assert_eq!(parser(([0b00010010].as_ref(), 4), 4), Ok((([].as_ref(), 0), 0b00000010)));
///
/// // Tries to consume 12 bits but only 8 are available
/// assert_eq!(parser(([0b00010010].as_ref(), 0), 12), Err(nom::Err::Error(Error{input: ([0b00010010].as_ref(), 0), code: ErrorKind::Eof })));
/// ```
pub fn take<I, O, C, E: ParseError<(I, usize)>>(
count: C,
) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E>
where
I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength,
C: ToUsize,
O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>,
{
let count = count.to_usize();
move |(input, bit_offset): (I, usize)| {
if count == 0 {
Ok(((input, bit_offset), 0u8.into()))
} else {
let cnt = (count + bit_offset).div(8);
if input.input_len() * 8 < count + bit_offset {
Err(Err::Error(E::from_error_kind(
(input, bit_offset),
ErrorKind::Eof,
)))
} else {
let mut acc: O = 0_u8.into();
let mut offset: usize = bit_offset;
let mut remaining: usize = count;
let mut end_offset: usize = 0;
for byte in input.iter_elements().take(cnt + 1) {
if remaining == 0 {
break;
}
let val: O = if offset == 0 {
byte.into()
} else {
((byte << offset) as u8 >> offset).into()
};
if remaining < 8 - offset {
acc += val >> (8 - offset - remaining);
end_offset = remaining + offset;
break;
} else {
acc += val << (remaining - (8 - offset));
remaining -= 8 - offset;
offset = 0;
}
}
Ok(((input.slice(cnt..), end_offset), acc))
}
}
}
}
/// Generates a parser taking `count` bits and comparing them to `pattern`
pub fn tag<I, O, C, E: ParseError<(I, usize)>>(
pattern: O,
count: C,
) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E>
where
I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength + Clone,
C: ToUsize,
O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O> + PartialEq,
{
let count = count.to_usize();
move |input: (I, usize)| {
let inp = input.clone();
take(count)(input).and_then(|(i, o)| {
if pattern == o {
Ok((i, o))
} else {
Err(Err::Error(error_position!(inp, ErrorKind::TagBits)))
}
})
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_take_0() {
let input = [0b00010010].as_ref();
let count = 0usize;
assert_eq!(count, 0usize);
let offset = 0usize;
let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset));
assert_eq!(result, Ok(((input, offset), 0)));
}
#[test]
fn test_take_eof() {
let input = [0b00010010].as_ref();
let result: crate::IResult<(&[u8], usize), usize> = take(1usize)((input, 8));
assert_eq!(
result,
Err(crate::Err::Error(crate::error::Error {
input: (input, 8),
code: ErrorKind::Eof
}))
)
}
#[test]
fn test_take_span_over_multiple_bytes() {
let input = [0b00010010, 0b00110100, 0b11111111, 0b11111111].as_ref();
let result: crate::IResult<(&[u8], usize), usize> = take(24usize)((input, 4));
assert_eq!(
result,
Ok((([0b11111111].as_ref(), 4), 0b1000110100111111111111))
);
}
}

View File

@@ -0,0 +1,179 @@
//! Bit level parsers
//!
pub mod complete;
pub mod streaming;
use crate::error::{ErrorKind, ParseError};
use crate::internal::{Err, IResult, Needed};
use crate::lib::std::ops::RangeFrom;
use crate::traits::{ErrorConvert, Slice};
/// Converts a byte-level input to a bit-level input, for consumption by a parser that uses bits.
///
/// Afterwards, the input is converted back to a byte-level parser, with any remaining bits thrown
/// away.
///
/// # Example
/// ```
/// use nom::bits::{bits, streaming::take};
/// use nom::error::Error;
/// use nom::sequence::tuple;
/// use nom::IResult;
///
/// fn parse(input: &[u8]) -> IResult<&[u8], (u8, u8)> {
/// bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize))))(input)
/// }
///
/// let input = &[0x12, 0x34, 0xff, 0xff];
///
/// let output = parse(input).expect("We take 1.5 bytes and the input is longer than 2 bytes");
///
/// // The first byte is consumed, the second byte is partially consumed and dropped.
/// let remaining = output.0;
/// assert_eq!(remaining, [0xff, 0xff]);
///
/// let parsed = output.1;
/// assert_eq!(parsed.0, 0x01);
/// assert_eq!(parsed.1, 0x23);
/// ```
pub fn bits<I, O, E1, E2, P>(mut parser: P) -> impl FnMut(I) -> IResult<I, O, E2>
where
E1: ParseError<(I, usize)> + ErrorConvert<E2>,
E2: ParseError<I>,
I: Slice<RangeFrom<usize>>,
P: FnMut((I, usize)) -> IResult<(I, usize), O, E1>,
{
move |input: I| match parser((input, 0)) {
Ok(((rest, offset), result)) => {
// If the next byte has been partially read, it will be sliced away as well.
// The parser functions might already slice away all fully read bytes.
// That's why `offset / 8` isn't necessarily needed at all times.
let remaining_bytes_index = offset / 8 + if offset % 8 == 0 { 0 } else { 1 };
Ok((rest.slice(remaining_bytes_index..), result))
}
Err(Err::Incomplete(n)) => Err(Err::Incomplete(n.map(|u| u.get() / 8 + 1))),
Err(Err::Error(e)) => Err(Err::Error(e.convert())),
Err(Err::Failure(e)) => Err(Err::Failure(e.convert())),
}
}
/// Counterpart to `bits`, `bytes` transforms its bit stream input into a byte slice for the underlying
/// parser, allowing byte-slice parsers to work on bit streams.
///
/// A partial byte remaining in the input will be ignored and the given parser will start parsing
/// at the next full byte.
///
/// ```
/// use nom::bits::{bits, bytes, streaming::take};
/// use nom::combinator::rest;
/// use nom::error::Error;
/// use nom::sequence::tuple;
/// use nom::IResult;
///
/// fn parse(input: &[u8]) -> IResult<&[u8], (u8, u8, &[u8])> {
/// bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((
/// take(4usize),
/// take(8usize),
/// bytes::<_, _, Error<&[u8]>, _, _>(rest)
/// )))(input)
/// }
///
/// let input = &[0x12, 0x34, 0xff, 0xff];
///
/// assert_eq!(parse( input ), Ok(( &[][..], (0x01, 0x23, &[0xff, 0xff][..]) )));
/// ```
pub fn bytes<I, O, E1, E2, P>(mut parser: P) -> impl FnMut((I, usize)) -> IResult<(I, usize), O, E2>
where
E1: ParseError<I> + ErrorConvert<E2>,
E2: ParseError<(I, usize)>,
I: Slice<RangeFrom<usize>> + Clone,
P: FnMut(I) -> IResult<I, O, E1>,
{
move |(input, offset): (I, usize)| {
let inner = if offset % 8 != 0 {
input.slice((1 + offset / 8)..)
} else {
input.slice((offset / 8)..)
};
let i = (input, offset);
match parser(inner) {
Ok((rest, res)) => Ok(((rest, 0), res)),
Err(Err::Incomplete(Needed::Unknown)) => Err(Err::Incomplete(Needed::Unknown)),
Err(Err::Incomplete(Needed::Size(sz))) => Err(match sz.get().checked_mul(8) {
Some(v) => Err::Incomplete(Needed::new(v)),
None => Err::Failure(E2::from_error_kind(i, ErrorKind::TooLarge)),
}),
Err(Err::Error(e)) => Err(Err::Error(e.convert())),
Err(Err::Failure(e)) => Err(Err::Failure(e.convert())),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::bits::streaming::take;
use crate::error::Error;
use crate::sequence::tuple;
#[test]
/// Take the `bits` function and assert that remaining bytes are correctly returned, if the
/// previous bytes are fully consumed
fn test_complete_byte_consumption_bits() {
let input = &[0x12, 0x34, 0x56, 0x78];
// Take 3 bit slices with sizes [4, 8, 4].
let result: IResult<&[u8], (u8, u8, u8)> =
bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize), take(4usize))))(
input,
);
let output = result.expect("We take 2 bytes and the input is longer than 2 bytes");
let remaining = output.0;
assert_eq!(remaining, [0x56, 0x78]);
let parsed = output.1;
assert_eq!(parsed.0, 0x01);
assert_eq!(parsed.1, 0x23);
assert_eq!(parsed.2, 0x04);
}
#[test]
/// Take the `bits` function and assert that remaining bytes are correctly returned, if the
/// previous bytes are NOT fully consumed. Partially consumed bytes are supposed to be dropped.
/// I.e. if we consume 1.5 bytes of 4 bytes, 2 bytes will be returned, bits 13-16 will be
/// dropped.
fn test_partial_byte_consumption_bits() {
let input = &[0x12, 0x34, 0x56, 0x78];
// Take bit slices with sizes [4, 8].
let result: IResult<&[u8], (u8, u8)> =
bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize))))(input);
let output = result.expect("We take 1.5 bytes and the input is longer than 2 bytes");
let remaining = output.0;
assert_eq!(remaining, [0x56, 0x78]);
let parsed = output.1;
assert_eq!(parsed.0, 0x01);
assert_eq!(parsed.1, 0x23);
}
#[test]
#[cfg(feature = "std")]
/// Ensure that in Incomplete error is thrown, if too few bytes are passed for a given parser.
fn test_incomplete_bits() {
let input = &[0x12];
// Take bit slices with sizes [4, 8].
let result: IResult<&[u8], (u8, u8)> =
bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize))))(input);
assert!(result.is_err());
let error = result.err().unwrap();
assert_eq!("Parsing requires 2 bytes/chars", error.to_string());
}
}

View File

@@ -0,0 +1,129 @@
//! Bit level parsers
//!
use crate::error::{ErrorKind, ParseError};
use crate::internal::{Err, IResult, Needed};
use crate::lib::std::ops::{AddAssign, Div, RangeFrom, Shl, Shr};
use crate::traits::{InputIter, InputLength, Slice, ToUsize};
/// Generates a parser taking `count` bits
pub fn take<I, O, C, E: ParseError<(I, usize)>>(
count: C,
) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E>
where
I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength,
C: ToUsize,
O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>,
{
let count = count.to_usize();
move |(input, bit_offset): (I, usize)| {
if count == 0 {
Ok(((input, bit_offset), 0u8.into()))
} else {
let cnt = (count + bit_offset).div(8);
if input.input_len() * 8 < count + bit_offset {
Err(Err::Incomplete(Needed::new(count as usize)))
} else {
let mut acc: O = 0_u8.into();
let mut offset: usize = bit_offset;
let mut remaining: usize = count;
let mut end_offset: usize = 0;
for byte in input.iter_elements().take(cnt + 1) {
if remaining == 0 {
break;
}
let val: O = if offset == 0 {
byte.into()
} else {
((byte << offset) as u8 >> offset).into()
};
if remaining < 8 - offset {
acc += val >> (8 - offset - remaining);
end_offset = remaining + offset;
break;
} else {
acc += val << (remaining - (8 - offset));
remaining -= 8 - offset;
offset = 0;
}
}
Ok(((input.slice(cnt..), end_offset), acc))
}
}
}
}
/// Generates a parser taking `count` bits and comparing them to `pattern`
pub fn tag<I, O, C, E: ParseError<(I, usize)>>(
pattern: O,
count: C,
) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E>
where
I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength + Clone,
C: ToUsize,
O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O> + PartialEq,
{
let count = count.to_usize();
move |input: (I, usize)| {
let inp = input.clone();
take(count)(input).and_then(|(i, o)| {
if pattern == o {
Ok((i, o))
} else {
Err(Err::Error(error_position!(inp, ErrorKind::TagBits)))
}
})
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_take_0() {
let input = [].as_ref();
let count = 0usize;
assert_eq!(count, 0usize);
let offset = 0usize;
let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset));
assert_eq!(result, Ok(((input, offset), 0)));
}
#[test]
fn test_tag_ok() {
let input = [0b00011111].as_ref();
let offset = 0usize;
let bits_to_take = 4usize;
let value_to_tag = 0b0001;
let result: crate::IResult<(&[u8], usize), usize> =
tag(value_to_tag, bits_to_take)((input, offset));
assert_eq!(result, Ok(((input, bits_to_take), value_to_tag)));
}
#[test]
fn test_tag_err() {
let input = [0b00011111].as_ref();
let offset = 0usize;
let bits_to_take = 4usize;
let value_to_tag = 0b1111;
let result: crate::IResult<(&[u8], usize), usize> =
tag(value_to_tag, bits_to_take)((input, offset));
assert_eq!(
result,
Err(crate::Err::Error(crate::error::Error {
input: (input, offset),
code: ErrorKind::TagBits
}))
);
}
}

View File

@@ -0,0 +1,291 @@
//! Choice combinators
macro_rules! succ (
(0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*));
(1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*));
(2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*));
(3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*));
(4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*));
(5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*));
(6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*));
(7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*));
(8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*));
(9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*));
(10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*));
(11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*));
(12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*));
(13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*));
(14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*));
(15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*));
(16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*));
(17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*));
(18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*));
(19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*));
(20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*));
);
#[cfg(test)]
mod tests;
use crate::error::ErrorKind;
use crate::error::ParseError;
use crate::internal::{Err, IResult, Parser};
/// Helper trait for the [alt()] combinator.
///
/// This trait is implemented for tuples of up to 21 elements
pub trait Alt<I, O, E> {
/// Tests each parser in the tuple and returns the result of the first one that succeeds
fn choice(&mut self, input: I) -> IResult<I, O, E>;
}
/// Tests a list of parsers one by one until one succeeds.
///
/// It takes as argument a tuple of parsers. There is a maximum of 21
/// parsers. If you need more, it is possible to nest them in other `alt` calls,
/// like this: `alt(parser_a, alt(parser_b, parser_c))`
///
/// ```rust
/// # use nom::error_position;
/// # use nom::{Err,error::ErrorKind, Needed, IResult};
/// use nom::character::complete::{alpha1, digit1};
/// use nom::branch::alt;
/// # fn main() {
/// fn parser(input: &str) -> IResult<&str, &str> {
/// alt((alpha1, digit1))(input)
/// };
///
/// // the first parser, alpha1, recognizes the input
/// assert_eq!(parser("abc"), Ok(("", "abc")));
///
/// // the first parser returns an error, so alt tries the second one
/// assert_eq!(parser("123456"), Ok(("", "123456")));
///
/// // both parsers failed, and with the default error type, alt will return the last error
/// assert_eq!(parser(" "), Err(Err::Error(error_position!(" ", ErrorKind::Digit))));
/// # }
/// ```
///
/// With a custom error type, it is possible to have alt return the error of the parser
/// that went the farthest in the input data
pub fn alt<I: Clone, O, E: ParseError<I>, List: Alt<I, O, E>>(
mut l: List,
) -> impl FnMut(I) -> IResult<I, O, E> {
move |i: I| l.choice(i)
}
/// Helper trait for the [permutation()] combinator.
///
/// This trait is implemented for tuples of up to 21 elements
pub trait Permutation<I, O, E> {
/// Tries to apply all parsers in the tuple in various orders until all of them succeed
fn permutation(&mut self, input: I) -> IResult<I, O, E>;
}
/// Applies a list of parsers in any order.
///
/// Permutation will succeed if all of the child parsers succeeded.
/// It takes as argument a tuple of parsers, and returns a
/// tuple of the parser results.
///
/// ```rust
/// # use nom::{Err,error::{Error, ErrorKind}, Needed, IResult};
/// use nom::character::complete::{alpha1, digit1};
/// use nom::branch::permutation;
/// # fn main() {
/// fn parser(input: &str) -> IResult<&str, (&str, &str)> {
/// permutation((alpha1, digit1))(input)
/// }
///
/// // permutation recognizes alphabetic characters then digit
/// assert_eq!(parser("abc123"), Ok(("", ("abc", "123"))));
///
/// // but also in inverse order
/// assert_eq!(parser("123abc"), Ok(("", ("abc", "123"))));
///
/// // it will fail if one of the parsers failed
/// assert_eq!(parser("abc;"), Err(Err::Error(Error::new(";", ErrorKind::Digit))));
/// # }
/// ```
///
/// The parsers are applied greedily: if there are multiple unapplied parsers
/// that could parse the next slice of input, the first one is used.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, IResult};
/// use nom::branch::permutation;
/// use nom::character::complete::{anychar, char};
///
/// fn parser(input: &str) -> IResult<&str, (char, char)> {
/// permutation((anychar, char('a')))(input)
/// }
///
/// // anychar parses 'b', then char('a') parses 'a'
/// assert_eq!(parser("ba"), Ok(("", ('b', 'a'))));
///
/// // anychar parses 'a', then char('a') fails on 'b',
/// // even though char('a') followed by anychar would succeed
/// assert_eq!(parser("ab"), Err(Err::Error(Error::new("b", ErrorKind::Char))));
/// ```
///
pub fn permutation<I: Clone, O, E: ParseError<I>, List: Permutation<I, O, E>>(
mut l: List,
) -> impl FnMut(I) -> IResult<I, O, E> {
move |i: I| l.permutation(i)
}
macro_rules! alt_trait(
($first:ident $second:ident $($id: ident)+) => (
alt_trait!(__impl $first $second; $($id)+);
);
(__impl $($current:ident)*; $head:ident $($id: ident)+) => (
alt_trait_impl!($($current)*);
alt_trait!(__impl $($current)* $head; $($id)+);
);
(__impl $($current:ident)*; $head:ident) => (
alt_trait_impl!($($current)*);
alt_trait_impl!($($current)* $head);
);
);
macro_rules! alt_trait_impl(
($($id:ident)+) => (
impl<
Input: Clone, Output, Error: ParseError<Input>,
$($id: Parser<Input, Output, Error>),+
> Alt<Input, Output, Error> for ( $($id),+ ) {
fn choice(&mut self, input: Input) -> IResult<Input, Output, Error> {
match self.0.parse(input.clone()) {
Err(Err::Error(e)) => alt_trait_inner!(1, self, input, e, $($id)+),
res => res,
}
}
}
);
);
macro_rules! alt_trait_inner(
($it:tt, $self:expr, $input:expr, $err:expr, $head:ident $($id:ident)+) => (
match $self.$it.parse($input.clone()) {
Err(Err::Error(e)) => {
let err = $err.or(e);
succ!($it, alt_trait_inner!($self, $input, err, $($id)+))
}
res => res,
}
);
($it:tt, $self:expr, $input:expr, $err:expr, $head:ident) => (
Err(Err::Error(Error::append($input, ErrorKind::Alt, $err)))
);
);
alt_trait!(A B C D E F G H I J K L M N O P Q R S T U);
// Manually implement Alt for (A,), the 1-tuple type
impl<Input, Output, Error: ParseError<Input>, A: Parser<Input, Output, Error>>
Alt<Input, Output, Error> for (A,)
{
fn choice(&mut self, input: Input) -> IResult<Input, Output, Error> {
self.0.parse(input)
}
}
macro_rules! permutation_trait(
(
$name1:ident $ty1:ident $item1:ident
$name2:ident $ty2:ident $item2:ident
$($name3:ident $ty3:ident $item3:ident)*
) => (
permutation_trait!(__impl $name1 $ty1 $item1, $name2 $ty2 $item2; $($name3 $ty3 $item3)*);
);
(
__impl $($name:ident $ty:ident $item:ident),+;
$name1:ident $ty1:ident $item1:ident $($name2:ident $ty2:ident $item2:ident)*
) => (
permutation_trait_impl!($($name $ty $item),+);
permutation_trait!(__impl $($name $ty $item),+ , $name1 $ty1 $item1; $($name2 $ty2 $item2)*);
);
(__impl $($name:ident $ty:ident $item:ident),+;) => (
permutation_trait_impl!($($name $ty $item),+);
);
);
macro_rules! permutation_trait_impl(
($($name:ident $ty:ident $item:ident),+) => (
impl<
Input: Clone, $($ty),+ , Error: ParseError<Input>,
$($name: Parser<Input, $ty, Error>),+
> Permutation<Input, ( $($ty),+ ), Error> for ( $($name),+ ) {
fn permutation(&mut self, mut input: Input) -> IResult<Input, ( $($ty),+ ), Error> {
let mut res = ($(Option::<$ty>::None),+);
loop {
let mut err: Option<Error> = None;
permutation_trait_inner!(0, self, input, res, err, $($name)+);
// If we reach here, every iterator has either been applied before,
// or errored on the remaining input
if let Some(err) = err {
// There are remaining parsers, and all errored on the remaining input
return Err(Err::Error(Error::append(input, ErrorKind::Permutation, err)));
}
// All parsers were applied
match res {
($(Some($item)),+) => return Ok((input, ($($item),+))),
_ => unreachable!(),
}
}
}
}
);
);
macro_rules! permutation_trait_inner(
($it:tt, $self:expr, $input:ident, $res:expr, $err:expr, $head:ident $($id:ident)*) => (
if $res.$it.is_none() {
match $self.$it.parse($input.clone()) {
Ok((i, o)) => {
$input = i;
$res.$it = Some(o);
continue;
}
Err(Err::Error(e)) => {
$err = Some(match $err {
Some(err) => err.or(e),
None => e,
});
}
Err(e) => return Err(e),
};
}
succ!($it, permutation_trait_inner!($self, $input, $res, $err, $($id)*));
);
($it:tt, $self:expr, $input:ident, $res:expr, $err:expr,) => ();
);
permutation_trait!(
FnA A a
FnB B b
FnC C c
FnD D d
FnE E e
FnF F f
FnG G g
FnH H h
FnI I i
FnJ J j
FnK K k
FnL L l
FnM M m
FnN N n
FnO O o
FnP P p
FnQ Q q
FnR R r
FnS S s
FnT T t
FnU U u
);

View File

@@ -0,0 +1,142 @@
use crate::branch::{alt, permutation};
use crate::bytes::streaming::tag;
use crate::error::ErrorKind;
use crate::internal::{Err, IResult, Needed};
#[cfg(feature = "alloc")]
use crate::{
error::ParseError,
lib::std::{
fmt::Debug,
string::{String, ToString},
},
};
#[cfg(feature = "alloc")]
#[derive(Debug, Clone, PartialEq)]
pub struct ErrorStr(String);
#[cfg(feature = "alloc")]
impl From<u32> for ErrorStr {
fn from(i: u32) -> Self {
ErrorStr(format!("custom error code: {}", i))
}
}
#[cfg(feature = "alloc")]
impl<'a> From<&'a str> for ErrorStr {
fn from(i: &'a str) -> Self {
ErrorStr(format!("custom error message: {}", i))
}
}
#[cfg(feature = "alloc")]
impl<I: Debug> ParseError<I> for ErrorStr {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
ErrorStr(format!("custom error message: ({:?}, {:?})", input, kind))
}
fn append(input: I, kind: ErrorKind, other: Self) -> Self {
ErrorStr(format!(
"custom error message: ({:?}, {:?}) - {:?}",
input, kind, other
))
}
}
#[cfg(feature = "alloc")]
#[test]
fn alt_test() {
fn work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> {
Ok((&b""[..], input))
}
#[allow(unused_variables)]
fn dont_work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> {
Err(Err::Error(ErrorStr("abcd".to_string())))
}
fn work2(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> {
Ok((input, &b""[..]))
}
fn alt1(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> {
alt((dont_work, dont_work))(i)
}
fn alt2(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> {
alt((dont_work, work))(i)
}
fn alt3(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> {
alt((dont_work, dont_work, work2, dont_work))(i)
}
//named!(alt1, alt!(dont_work | dont_work));
//named!(alt2, alt!(dont_work | work));
//named!(alt3, alt!(dont_work | dont_work | work2 | dont_work));
let a = &b"abcd"[..];
assert_eq!(
alt1(a),
Err(Err::Error(error_node_position!(
a,
ErrorKind::Alt,
ErrorStr("abcd".to_string())
)))
);
assert_eq!(alt2(a), Ok((&b""[..], a)));
assert_eq!(alt3(a), Ok((a, &b""[..])));
fn alt4(i: &[u8]) -> IResult<&[u8], &[u8]> {
alt((tag("abcd"), tag("efgh")))(i)
}
let b = &b"efgh"[..];
assert_eq!(alt4(a), Ok((&b""[..], a)));
assert_eq!(alt4(b), Ok((&b""[..], b)));
}
#[test]
fn alt_incomplete() {
fn alt1(i: &[u8]) -> IResult<&[u8], &[u8]> {
alt((tag("a"), tag("bc"), tag("def")))(i)
}
let a = &b""[..];
assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1))));
let a = &b"b"[..];
assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1))));
let a = &b"bcd"[..];
assert_eq!(alt1(a), Ok((&b"d"[..], &b"bc"[..])));
let a = &b"cde"[..];
assert_eq!(alt1(a), Err(Err::Error(error_position!(a, ErrorKind::Tag))));
let a = &b"de"[..];
assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1))));
let a = &b"defg"[..];
assert_eq!(alt1(a), Ok((&b"g"[..], &b"def"[..])));
}
#[test]
fn permutation_test() {
fn perm(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8], &[u8])> {
permutation((tag("abcd"), tag("efg"), tag("hi")))(i)
}
let expected = (&b"abcd"[..], &b"efg"[..], &b"hi"[..]);
let a = &b"abcdefghijk"[..];
assert_eq!(perm(a), Ok((&b"jk"[..], expected)));
let b = &b"efgabcdhijk"[..];
assert_eq!(perm(b), Ok((&b"jk"[..], expected)));
let c = &b"hiefgabcdjk"[..];
assert_eq!(perm(c), Ok((&b"jk"[..], expected)));
let d = &b"efgxyzabcdefghi"[..];
assert_eq!(
perm(d),
Err(Err::Error(error_node_position!(
&b"efgxyzabcdefghi"[..],
ErrorKind::Permutation,
error_position!(&b"xyzabcdefghi"[..], ErrorKind::Tag)
)))
);
let e = &b"efgabc"[..];
assert_eq!(perm(e), Err(Err::Incomplete(Needed::new(1))));
}

View File

@@ -0,0 +1,756 @@
//! Parsers recognizing bytes streams, complete input version
use crate::error::ErrorKind;
use crate::error::ParseError;
use crate::internal::{Err, IResult, Parser};
use crate::lib::std::ops::RangeFrom;
use crate::lib::std::result::Result::*;
use crate::traits::{
Compare, CompareResult, FindSubstring, FindToken, InputIter, InputLength, InputTake,
InputTakeAtPosition, Slice, ToUsize,
};
/// Recognizes a pattern
///
/// The input data will be compared to the tag combinator's argument and will return the part of
/// the input that matches the argument
///
/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, &str> {
/// tag("Hello")(s)
/// }
///
/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// ```
pub fn tag<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + Compare<T>,
T: InputLength + Clone,
{
move |i: Input| {
let tag_len = tag.input_len();
let t = tag.clone();
let res: IResult<_, _, Error> = match i.compare(t) {
CompareResult::Ok => Ok(i.take_split(tag_len)),
_ => {
let e: ErrorKind = ErrorKind::Tag;
Err(Err::Error(Error::from_error_kind(i, e)))
}
};
res
}
}
/// Recognizes a case insensitive pattern.
///
/// The input data will be compared to the tag combinator's argument and will return the part of
/// the input that matches the argument with no regard to case.
///
/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::tag_no_case;
///
/// fn parser(s: &str) -> IResult<&str, &str> {
/// tag_no_case("hello")(s)
/// }
///
/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello")));
/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO")));
/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// ```
pub fn tag_no_case<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + Compare<T>,
T: InputLength + Clone,
{
move |i: Input| {
let tag_len = tag.input_len();
let t = tag.clone();
let res: IResult<_, _, Error> = match (i).compare_no_case(t) {
CompareResult::Ok => Ok(i.take_split(tag_len)),
_ => {
let e: ErrorKind = ErrorKind::Tag;
Err(Err::Error(Error::from_error_kind(i, e)))
}
};
res
}
}
/// Parse till certain characters are met.
///
/// The parser will return the longest slice till one of the characters of the combinator's argument are met.
///
/// It doesn't consume the matched character.
///
/// It will return a `Err::Error(("", ErrorKind::IsNot))` if the pattern wasn't met.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::is_not;
///
/// fn not_space(s: &str) -> IResult<&str, &str> {
/// is_not(" \t\r\n")(s)
/// }
///
/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,")));
/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes")));
/// assert_eq!(not_space("Nospace"), Ok(("", "Nospace")));
/// assert_eq!(not_space(""), Err(Err::Error(Error::new("", ErrorKind::IsNot))));
/// ```
pub fn is_not<T, Input, Error: ParseError<Input>>(
arr: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
T: FindToken<<Input as InputTakeAtPosition>::Item>,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::IsNot;
i.split_at_position1_complete(|c| arr.find_token(c), e)
}
}
/// Returns the longest slice of the matches the pattern.
///
/// The parser will return the longest slice consisting of the characters in provided in the
/// combinator's argument.
///
/// It will return a `Err(Err::Error((_, ErrorKind::IsA)))` if the pattern wasn't met.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::is_a;
///
/// fn hex(s: &str) -> IResult<&str, &str> {
/// is_a("1234567890ABCDEF")(s)
/// }
///
/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123")));
/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF")));
/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE")));
/// assert_eq!(hex("D15EA5E"), Ok(("", "D15EA5E")));
/// assert_eq!(hex(""), Err(Err::Error(Error::new("", ErrorKind::IsA))));
/// ```
pub fn is_a<T, Input, Error: ParseError<Input>>(
arr: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
T: FindToken<<Input as InputTakeAtPosition>::Item>,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::IsA;
i.split_at_position1_complete(|c| !arr.find_token(c), e)
}
}
/// Returns the longest input slice (if any) that matches the predicate.
///
/// The parser will return the longest slice that matches the given predicate *(a function that
/// takes the input and returns a bool)*.
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::complete::take_while;
/// use nom::character::is_alphabetic;
///
/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// take_while(is_alphabetic)(s)
/// }
///
/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..])));
/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
/// assert_eq!(alpha(b""), Ok((&b""[..], &b""[..])));
/// ```
pub fn take_while<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| i.split_at_position_complete(|c| !cond(c))
}
/// Returns the longest (at least 1) input slice that matches the predicate.
///
/// The parser will return the longest slice that matches the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::take_while1;
/// use nom::character::is_alphabetic;
///
/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// take_while1(is_alphabetic)(s)
/// }
///
/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
/// assert_eq!(alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhile1))));
/// ```
pub fn take_while1<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::TakeWhile1;
i.split_at_position1_complete(|c| !cond(c), e)
}
}
/// Returns the longest (m <= len <= n) input slice that matches the predicate.
///
/// The parser will return the longest slice that matches the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met or is out
/// of range (m <= len <= n).
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::take_while_m_n;
/// use nom::character::is_alphabetic;
///
/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// take_while_m_n(3, 6, is_alphabetic)(s)
/// }
///
/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..])));
/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..])));
/// assert_eq!(short_alpha(b"ed"), Err(Err::Error(Error::new(&b"ed"[..], ErrorKind::TakeWhileMN))));
/// assert_eq!(short_alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhileMN))));
/// ```
pub fn take_while_m_n<F, Input, Error: ParseError<Input>>(
m: usize,
n: usize,
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputIter + InputLength + Slice<RangeFrom<usize>>,
F: Fn(<Input as InputIter>::Item) -> bool,
{
move |i: Input| {
let input = i;
match input.position(|c| !cond(c)) {
Some(idx) => {
if idx >= m {
if idx <= n {
let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(idx) {
Ok(input.take_split(index))
} else {
Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::TakeWhileMN,
)))
};
res
} else {
let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(n) {
Ok(input.take_split(index))
} else {
Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::TakeWhileMN,
)))
};
res
}
} else {
let e = ErrorKind::TakeWhileMN;
Err(Err::Error(Error::from_error_kind(input, e)))
}
}
None => {
let len = input.input_len();
if len >= n {
match input.slice_index(n) {
Ok(index) => Ok(input.take_split(index)),
Err(_needed) => Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::TakeWhileMN,
))),
}
} else if len >= m && len <= n {
let res: IResult<_, _, Error> = Ok((input.slice(len..), input));
res
} else {
let e = ErrorKind::TakeWhileMN;
Err(Err::Error(Error::from_error_kind(input, e)))
}
}
}
}
}
/// Returns the longest input slice (if any) till a predicate is met.
///
/// The parser will return the longest slice till the given predicate *(a function that
/// takes the input and returns a bool)*.
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::complete::take_till;
///
/// fn till_colon(s: &str) -> IResult<&str, &str> {
/// take_till(|c| c == ':')(s)
/// }
///
/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed
/// assert_eq!(till_colon("12345"), Ok(("", "12345")));
/// assert_eq!(till_colon(""), Ok(("", "")));
/// ```
pub fn take_till<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| i.split_at_position_complete(|c| cond(c))
}
/// Returns the longest (at least 1) input slice till a predicate is met.
///
/// The parser will return the longest slice till the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// It will return `Err(Err::Error((_, ErrorKind::TakeTill1)))` if the input is empty or the
/// predicate matches the first input.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::take_till1;
///
/// fn till_colon(s: &str) -> IResult<&str, &str> {
/// take_till1(|c| c == ':')(s)
/// }
///
/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
/// assert_eq!(till_colon(":empty matched"), Err(Err::Error(Error::new(":empty matched", ErrorKind::TakeTill1))));
/// assert_eq!(till_colon("12345"), Ok(("", "12345")));
/// assert_eq!(till_colon(""), Err(Err::Error(Error::new("", ErrorKind::TakeTill1))));
/// ```
pub fn take_till1<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::TakeTill1;
i.split_at_position1_complete(|c| cond(c), e)
}
}
/// Returns an input slice containing the first N input elements (Input[..N]).
///
/// It will return `Err(Err::Error((_, ErrorKind::Eof)))` if the input is shorter than the argument.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::take;
///
/// fn take6(s: &str) -> IResult<&str, &str> {
/// take(6usize)(s)
/// }
///
/// assert_eq!(take6("1234567"), Ok(("7", "123456")));
/// assert_eq!(take6("things"), Ok(("", "things")));
/// assert_eq!(take6("short"), Err(Err::Error(Error::new("short", ErrorKind::Eof))));
/// assert_eq!(take6(""), Err(Err::Error(Error::new("", ErrorKind::Eof))));
/// ```
///
/// The units that are taken will depend on the input type. For example, for a
/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will
/// take that many `u8`'s:
///
/// ```rust
/// use nom::error::Error;
/// use nom::bytes::complete::take;
///
/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙"), Ok(("", "💙")));
/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref())));
/// ```
pub fn take<C, Input, Error: ParseError<Input>>(
count: C,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputIter + InputTake,
C: ToUsize,
{
let c = count.to_usize();
move |i: Input| match i.slice_index(c) {
Err(_needed) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::Eof))),
Ok(index) => Ok(i.take_split(index)),
}
}
/// Returns the input slice up to the first occurrence of the pattern.
///
/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))`
/// if the pattern wasn't met.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::take_until;
///
/// fn until_eof(s: &str) -> IResult<&str, &str> {
/// take_until("eof")(s)
/// }
///
/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil))));
/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil))));
/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
/// ```
pub fn take_until<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + FindSubstring<T>,
T: InputLength + Clone,
{
move |i: Input| {
let t = tag.clone();
let res: IResult<_, _, Error> = match i.find_substring(t) {
None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))),
Some(index) => Ok(i.take_split(index)),
};
res
}
}
/// Returns the non empty input slice up to the first occurrence of the pattern.
///
/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))`
/// if the pattern wasn't met.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::complete::take_until1;
///
/// fn until_eof(s: &str) -> IResult<&str, &str> {
/// take_until1("eof")(s)
/// }
///
/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil))));
/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil))));
/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
/// assert_eq!(until_eof("eof"), Err(Err::Error(Error::new("eof", ErrorKind::TakeUntil))));
/// ```
pub fn take_until1<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + FindSubstring<T>,
T: InputLength + Clone,
{
move |i: Input| {
let t = tag.clone();
let res: IResult<_, _, Error> = match i.find_substring(t) {
None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))),
Some(0) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))),
Some(index) => Ok(i.take_split(index)),
};
res
}
}
/// Matches a byte string with escaped characters.
///
/// * The first argument matches the normal characters (it must not accept the control character)
/// * The second argument is the control character (like `\` in most languages)
/// * The third argument matches the escaped characters
/// # Example
/// ```
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// # use nom::character::complete::digit1;
/// use nom::bytes::complete::escaped;
/// use nom::character::complete::one_of;
///
/// fn esc(s: &str) -> IResult<&str, &str> {
/// escaped(digit1, '\\', one_of(r#""n\"#))(s)
/// }
///
/// assert_eq!(esc("123;"), Ok((";", "123")));
/// assert_eq!(esc(r#"12\"34;"#), Ok((";", r#"12\"34"#)));
/// ```
///
pub fn escaped<'a, Input: 'a, Error, F, G, O1, O2>(
mut normal: F,
control_char: char,
mut escapable: G,
) -> impl FnMut(Input) -> IResult<Input, Input, Error>
where
Input: Clone
+ crate::traits::Offset
+ InputLength
+ InputTake
+ InputTakeAtPosition
+ Slice<RangeFrom<usize>>
+ InputIter,
<Input as InputIter>::Item: crate::traits::AsChar,
F: Parser<Input, O1, Error>,
G: Parser<Input, O2, Error>,
Error: ParseError<Input>,
{
use crate::traits::AsChar;
move |input: Input| {
let mut i = input.clone();
while i.input_len() > 0 {
let current_len = i.input_len();
match normal.parse(i.clone()) {
Ok((i2, _)) => {
// return if we consumed everything or if the normal parser
// does not consume anything
if i2.input_len() == 0 {
return Ok((input.slice(input.input_len()..), input));
} else if i2.input_len() == current_len {
let index = input.offset(&i2);
return Ok(input.take_split(index));
} else {
i = i2;
}
}
Err(Err::Error(_)) => {
// unwrap() should be safe here since index < $i.input_len()
if i.iter_elements().next().unwrap().as_char() == control_char {
let next = control_char.len_utf8();
if next >= i.input_len() {
return Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::Escaped,
)));
} else {
match escapable.parse(i.slice(next..)) {
Ok((i2, _)) => {
if i2.input_len() == 0 {
return Ok((input.slice(input.input_len()..), input));
} else {
i = i2;
}
}
Err(e) => return Err(e),
}
}
} else {
let index = input.offset(&i);
if index == 0 {
return Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::Escaped,
)));
}
return Ok(input.take_split(index));
}
}
Err(e) => {
return Err(e);
}
}
}
Ok((input.slice(input.input_len()..), input))
}
}
/// Matches a byte string with escaped characters.
///
/// * The first argument matches the normal characters (it must not match the control character)
/// * The second argument is the control character (like `\` in most languages)
/// * The third argument matches the escaped characters and transforms them
///
/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character)
///
/// ```
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// # use std::str::from_utf8;
/// use nom::bytes::complete::{escaped_transform, tag};
/// use nom::character::complete::alpha1;
/// use nom::branch::alt;
/// use nom::combinator::value;
///
/// fn parser(input: &str) -> IResult<&str, String> {
/// escaped_transform(
/// alpha1,
/// '\\',
/// alt((
/// value("\\", tag("\\")),
/// value("\"", tag("\"")),
/// value("\n", tag("n")),
/// ))
/// )(input)
/// }
///
/// assert_eq!(parser("ab\\\"cd"), Ok(("", String::from("ab\"cd"))));
/// assert_eq!(parser("ab\\ncd"), Ok(("", String::from("ab\ncd"))));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn escaped_transform<Input, Error, F, G, O1, O2, ExtendItem, Output>(
mut normal: F,
control_char: char,
mut transform: G,
) -> impl FnMut(Input) -> IResult<Input, Output, Error>
where
Input: Clone
+ crate::traits::Offset
+ InputLength
+ InputTake
+ InputTakeAtPosition
+ Slice<RangeFrom<usize>>
+ InputIter,
Input: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
<Input as InputIter>::Item: crate::traits::AsChar,
F: Parser<Input, O1, Error>,
G: Parser<Input, O2, Error>,
Error: ParseError<Input>,
{
use crate::traits::AsChar;
move |input: Input| {
let mut index = 0;
let mut res = input.new_builder();
let i = input.clone();
while index < i.input_len() {
let current_len = i.input_len();
let remainder = i.slice(index..);
match normal.parse(remainder.clone()) {
Ok((i2, o)) => {
o.extend_into(&mut res);
if i2.input_len() == 0 {
return Ok((i.slice(i.input_len()..), res));
} else if i2.input_len() == current_len {
return Ok((remainder, res));
} else {
index = input.offset(&i2);
}
}
Err(Err::Error(_)) => {
// unwrap() should be safe here since index < $i.input_len()
if remainder.iter_elements().next().unwrap().as_char() == control_char {
let next = index + control_char.len_utf8();
let input_len = input.input_len();
if next >= input_len {
return Err(Err::Error(Error::from_error_kind(
remainder,
ErrorKind::EscapedTransform,
)));
} else {
match transform.parse(i.slice(next..)) {
Ok((i2, o)) => {
o.extend_into(&mut res);
if i2.input_len() == 0 {
return Ok((i.slice(i.input_len()..), res));
} else {
index = input.offset(&i2);
}
}
Err(e) => return Err(e),
}
}
} else {
if index == 0 {
return Err(Err::Error(Error::from_error_kind(
remainder,
ErrorKind::EscapedTransform,
)));
}
return Ok((remainder, res));
}
}
Err(e) => return Err(e),
}
}
Ok((input.slice(index..), res))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn complete_take_while_m_n_utf8_all_matching() {
let result: IResult<&str, &str> =
super::take_while_m_n(1, 4, |c: char| c.is_alphabetic())("øn");
assert_eq!(result, Ok(("", "øn")));
}
#[test]
fn complete_take_while_m_n_utf8_all_matching_substring() {
let result: IResult<&str, &str> =
super::take_while_m_n(1, 1, |c: char| c.is_alphabetic())("øn");
assert_eq!(result, Ok(("n", "ø")));
}
// issue #1336 "escaped hangs if normal parser accepts empty"
fn escaped_string(input: &str) -> IResult<&str, &str> {
use crate::character::complete::{alpha0, one_of};
escaped(alpha0, '\\', one_of("n"))(input)
}
// issue #1336 "escaped hangs if normal parser accepts empty"
#[test]
fn escaped_hang() {
escaped_string("7").unwrap();
escaped_string("a7").unwrap();
}
// issue ##1118 escaped does not work with empty string
fn unquote<'a>(input: &'a str) -> IResult<&'a str, &'a str> {
use crate::bytes::complete::*;
use crate::character::complete::*;
use crate::combinator::opt;
use crate::sequence::delimited;
delimited(
char('"'),
escaped(opt(none_of(r#"\""#)), '\\', one_of(r#"\"rnt"#)),
char('"'),
)(input)
}
#[test]
fn escaped_hang_1118() {
assert_eq!(unquote(r#""""#), Ok(("", "")));
}
}

View File

@@ -0,0 +1,6 @@
//! Parsers recognizing bytes streams
pub mod complete;
pub mod streaming;
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,700 @@
//! Parsers recognizing bytes streams, streaming version
use crate::error::ErrorKind;
use crate::error::ParseError;
use crate::internal::{Err, IResult, Needed, Parser};
use crate::lib::std::ops::RangeFrom;
use crate::lib::std::result::Result::*;
use crate::traits::{
Compare, CompareResult, FindSubstring, FindToken, InputIter, InputLength, InputTake,
InputTakeAtPosition, Slice, ToUsize,
};
/// Recognizes a pattern.
///
/// The input data will be compared to the tag combinator's argument and will return the part of
/// the input that matches the argument.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::streaming::tag;
///
/// fn parser(s: &str) -> IResult<&str, &str> {
/// tag("Hello")(s)
/// }
///
/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
/// assert_eq!(parser("S"), Err(Err::Error(Error::new("S", ErrorKind::Tag))));
/// assert_eq!(parser("H"), Err(Err::Incomplete(Needed::new(4))));
/// ```
pub fn tag<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputLength + Compare<T>,
T: InputLength + Clone,
{
move |i: Input| {
let tag_len = tag.input_len();
let t = tag.clone();
let res: IResult<_, _, Error> = match i.compare(t) {
CompareResult::Ok => Ok(i.take_split(tag_len)),
CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(tag_len - i.input_len()))),
CompareResult::Error => {
let e: ErrorKind = ErrorKind::Tag;
Err(Err::Error(Error::from_error_kind(i, e)))
}
};
res
}
}
/// Recognizes a case insensitive pattern.
///
/// The input data will be compared to the tag combinator's argument and will return the part of
/// the input that matches the argument with no regard to case.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::streaming::tag_no_case;
///
/// fn parser(s: &str) -> IResult<&str, &str> {
/// tag_no_case("hello")(s)
/// }
///
/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello")));
/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello")));
/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO")));
/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Incomplete(Needed::new(5))));
/// ```
pub fn tag_no_case<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputLength + Compare<T>,
T: InputLength + Clone,
{
move |i: Input| {
let tag_len = tag.input_len();
let t = tag.clone();
let res: IResult<_, _, Error> = match (i).compare_no_case(t) {
CompareResult::Ok => Ok(i.take_split(tag_len)),
CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(tag_len - i.input_len()))),
CompareResult::Error => {
let e: ErrorKind = ErrorKind::Tag;
Err(Err::Error(Error::from_error_kind(i, e)))
}
};
res
}
}
/// Parse till certain characters are met.
///
/// The parser will return the longest slice till one of the characters of the combinator's argument are met.
///
/// It doesn't consume the matched character.
///
/// It will return a `Err::Incomplete(Needed::new(1))` if the pattern wasn't met.
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::streaming::is_not;
///
/// fn not_space(s: &str) -> IResult<&str, &str> {
/// is_not(" \t\r\n")(s)
/// }
///
/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,")));
/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes")));
/// assert_eq!(not_space("Nospace"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(not_space(""), Err(Err::Incomplete(Needed::new(1))));
/// ```
pub fn is_not<T, Input, Error: ParseError<Input>>(
arr: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
T: FindToken<<Input as InputTakeAtPosition>::Item>,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::IsNot;
i.split_at_position1(|c| arr.find_token(c), e)
}
}
/// Returns the longest slice of the matches the pattern.
///
/// The parser will return the longest slice consisting of the characters in provided in the
/// combinator's argument.
///
/// # Streaming specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the pattern wasn't met
/// or if the pattern reaches the end of the input.
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::streaming::is_a;
///
/// fn hex(s: &str) -> IResult<&str, &str> {
/// is_a("1234567890ABCDEF")(s)
/// }
///
/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123")));
/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF")));
/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE")));
/// assert_eq!(hex("D15EA5E"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(hex(""), Err(Err::Incomplete(Needed::new(1))));
/// ```
pub fn is_a<T, Input, Error: ParseError<Input>>(
arr: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
T: FindToken<<Input as InputTakeAtPosition>::Item>,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::IsA;
i.split_at_position1(|c| !arr.find_token(c), e)
}
}
/// Returns the longest input slice (if any) that matches the predicate.
///
/// The parser will return the longest slice that matches the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the pattern reaches the end of the input.
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::streaming::take_while;
/// use nom::character::is_alphabetic;
///
/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// take_while(is_alphabetic)(s)
/// }
///
/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..])));
/// assert_eq!(alpha(b"latin"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(alpha(b""), Err(Err::Incomplete(Needed::new(1))));
/// ```
pub fn take_while<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| i.split_at_position(|c| !cond(c))
}
/// Returns the longest (at least 1) input slice that matches the predicate.
///
/// The parser will return the longest slice that matches the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met.
///
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` or if the pattern reaches the end of the input.
///
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::streaming::take_while1;
/// use nom::character::is_alphabetic;
///
/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// take_while1(is_alphabetic)(s)
/// }
///
/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
/// assert_eq!(alpha(b"latin"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhile1))));
/// ```
pub fn take_while1<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::TakeWhile1;
i.split_at_position1(|c| !cond(c), e)
}
}
/// Returns the longest (m <= len <= n) input slice that matches the predicate.
///
/// The parser will return the longest slice that matches the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met.
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the pattern reaches the end of the input or is too short.
///
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::streaming::take_while_m_n;
/// use nom::character::is_alphabetic;
///
/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// take_while_m_n(3, 6, is_alphabetic)(s)
/// }
///
/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..])));
/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..])));
/// assert_eq!(short_alpha(b"latin"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(short_alpha(b"ed"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(short_alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhileMN))));
/// ```
pub fn take_while_m_n<F, Input, Error: ParseError<Input>>(
m: usize,
n: usize,
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputIter + InputLength,
F: Fn(<Input as InputIter>::Item) -> bool,
{
move |i: Input| {
let input = i;
match input.position(|c| !cond(c)) {
Some(idx) => {
if idx >= m {
if idx <= n {
let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(idx) {
Ok(input.take_split(index))
} else {
Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::TakeWhileMN,
)))
};
res
} else {
let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(n) {
Ok(input.take_split(index))
} else {
Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::TakeWhileMN,
)))
};
res
}
} else {
let e = ErrorKind::TakeWhileMN;
Err(Err::Error(Error::from_error_kind(input, e)))
}
}
None => {
let len = input.input_len();
if len >= n {
match input.slice_index(n) {
Ok(index) => Ok(input.take_split(index)),
Err(_needed) => Err(Err::Error(Error::from_error_kind(
input,
ErrorKind::TakeWhileMN,
))),
}
} else {
let needed = if m > len { m - len } else { 1 };
Err(Err::Incomplete(Needed::new(needed)))
}
}
}
}
}
/// Returns the longest input slice (if any) till a predicate is met.
///
/// The parser will return the longest slice till the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the match reaches the
/// end of input or if there was not match.
///
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::streaming::take_till;
///
/// fn till_colon(s: &str) -> IResult<&str, &str> {
/// take_till(|c| c == ':')(s)
/// }
///
/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed
/// assert_eq!(till_colon("12345"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(till_colon(""), Err(Err::Incomplete(Needed::new(1))));
/// ```
pub fn take_till<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| i.split_at_position(|c| cond(c))
}
/// Returns the longest (at least 1) input slice till a predicate is met.
///
/// The parser will return the longest slice till the given predicate *(a function that
/// takes the input and returns a bool)*.
///
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the match reaches the
/// end of input or if there was not match.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::streaming::take_till1;
///
/// fn till_colon(s: &str) -> IResult<&str, &str> {
/// take_till1(|c| c == ':')(s)
/// }
///
/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin")));
/// assert_eq!(till_colon(":empty matched"), Err(Err::Error(Error::new(":empty matched", ErrorKind::TakeTill1))));
/// assert_eq!(till_colon("12345"), Err(Err::Incomplete(Needed::new(1))));
/// assert_eq!(till_colon(""), Err(Err::Incomplete(Needed::new(1))));
/// ```
pub fn take_till1<F, Input, Error: ParseError<Input>>(
cond: F,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTakeAtPosition,
F: Fn(<Input as InputTakeAtPosition>::Item) -> bool,
{
move |i: Input| {
let e: ErrorKind = ErrorKind::TakeTill1;
i.split_at_position1(|c| cond(c), e)
}
}
/// Returns an input slice containing the first N input elements (Input[..N]).
///
/// # Streaming Specific
/// *Streaming version* if the input has less than N elements, `take` will
/// return a `Err::Incomplete(Needed::new(M))` where M is the number of
/// additional bytes the parser would need to succeed.
/// It is well defined for `&[u8]` as the number of elements is the byte size,
/// but for types like `&str`, we cannot know how many bytes correspond for
/// the next few chars, so the result will be `Err::Incomplete(Needed::Unknown)`
///
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::streaming::take;
///
/// fn take6(s: &str) -> IResult<&str, &str> {
/// take(6usize)(s)
/// }
///
/// assert_eq!(take6("1234567"), Ok(("7", "123456")));
/// assert_eq!(take6("things"), Ok(("", "things")));
/// assert_eq!(take6("short"), Err(Err::Incomplete(Needed::Unknown)));
/// ```
pub fn take<C, Input, Error: ParseError<Input>>(
count: C,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputIter + InputTake + InputLength,
C: ToUsize,
{
let c = count.to_usize();
move |i: Input| match i.slice_index(c) {
Err(i) => Err(Err::Incomplete(i)),
Ok(index) => Ok(i.take_split(index)),
}
}
/// Returns the input slice up to the first occurrence of the pattern.
///
/// It doesn't consume the pattern.
///
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(N))` if the input doesn't
/// contain the pattern or if the input is smaller than the pattern.
/// # Example
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::bytes::streaming::take_until;
///
/// fn until_eof(s: &str) -> IResult<&str, &str> {
/// take_until("eof")(s)
/// }
///
/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
/// assert_eq!(until_eof("hello, world"), Err(Err::Incomplete(Needed::Unknown)));
/// assert_eq!(until_eof("hello, worldeo"), Err(Err::Incomplete(Needed::Unknown)));
/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
/// ```
pub fn take_until<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputLength + FindSubstring<T>,
T: Clone,
{
move |i: Input| {
let t = tag.clone();
let res: IResult<_, _, Error> = match i.find_substring(t) {
None => Err(Err::Incomplete(Needed::Unknown)),
Some(index) => Ok(i.take_split(index)),
};
res
}
}
/// Returns the non empty input slice up to the first occurrence of the pattern.
///
/// It doesn't consume the pattern.
///
/// # Streaming Specific
/// *Streaming version* will return a `Err::Incomplete(Needed::new(N))` if the input doesn't
/// contain the pattern or if the input is smaller than the pattern.
/// # Example
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::bytes::streaming::take_until1;
///
/// fn until_eof(s: &str) -> IResult<&str, &str> {
/// take_until1("eof")(s)
/// }
///
/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
/// assert_eq!(until_eof("hello, world"), Err(Err::Incomplete(Needed::Unknown)));
/// assert_eq!(until_eof("hello, worldeo"), Err(Err::Incomplete(Needed::Unknown)));
/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1")));
/// assert_eq!(until_eof("eof"), Err(Err::Error(Error::new("eof", ErrorKind::TakeUntil))));
/// ```
pub fn take_until1<T, Input, Error: ParseError<Input>>(
tag: T,
) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputLength + FindSubstring<T>,
T: Clone,
{
move |i: Input| {
let t = tag.clone();
let res: IResult<_, _, Error> = match i.find_substring(t) {
None => Err(Err::Incomplete(Needed::Unknown)),
Some(0) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))),
Some(index) => Ok(i.take_split(index)),
};
res
}
}
/// Matches a byte string with escaped characters.
///
/// * The first argument matches the normal characters (it must not accept the control character)
/// * The second argument is the control character (like `\` in most languages)
/// * The third argument matches the escaped characters
/// # Example
/// ```
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// # use nom::character::complete::digit1;
/// use nom::bytes::streaming::escaped;
/// use nom::character::streaming::one_of;
///
/// fn esc(s: &str) -> IResult<&str, &str> {
/// escaped(digit1, '\\', one_of("\"n\\"))(s)
/// }
///
/// assert_eq!(esc("123;"), Ok((";", "123")));
/// assert_eq!(esc("12\\\"34;"), Ok((";", "12\\\"34")));
/// ```
///
pub fn escaped<Input, Error, F, G, O1, O2>(
mut normal: F,
control_char: char,
mut escapable: G,
) -> impl FnMut(Input) -> IResult<Input, Input, Error>
where
Input: Clone
+ crate::traits::Offset
+ InputLength
+ InputTake
+ InputTakeAtPosition
+ Slice<RangeFrom<usize>>
+ InputIter,
<Input as InputIter>::Item: crate::traits::AsChar,
F: Parser<Input, O1, Error>,
G: Parser<Input, O2, Error>,
Error: ParseError<Input>,
{
use crate::traits::AsChar;
move |input: Input| {
let mut i = input.clone();
while i.input_len() > 0 {
let current_len = i.input_len();
match normal.parse(i.clone()) {
Ok((i2, _)) => {
if i2.input_len() == 0 {
return Err(Err::Incomplete(Needed::Unknown));
} else if i2.input_len() == current_len {
let index = input.offset(&i2);
return Ok(input.take_split(index));
} else {
i = i2;
}
}
Err(Err::Error(_)) => {
// unwrap() should be safe here since index < $i.input_len()
if i.iter_elements().next().unwrap().as_char() == control_char {
let next = control_char.len_utf8();
if next >= i.input_len() {
return Err(Err::Incomplete(Needed::new(1)));
} else {
match escapable.parse(i.slice(next..)) {
Ok((i2, _)) => {
if i2.input_len() == 0 {
return Err(Err::Incomplete(Needed::Unknown));
} else {
i = i2;
}
}
Err(e) => return Err(e),
}
}
} else {
let index = input.offset(&i);
return Ok(input.take_split(index));
}
}
Err(e) => {
return Err(e);
}
}
}
Err(Err::Incomplete(Needed::Unknown))
}
}
/// Matches a byte string with escaped characters.
///
/// * The first argument matches the normal characters (it must not match the control character)
/// * The second argument is the control character (like `\` in most languages)
/// * The third argument matches the escaped characters and transforms them
///
/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character)
///
/// ```
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// # use std::str::from_utf8;
/// use nom::bytes::streaming::{escaped_transform, tag};
/// use nom::character::streaming::alpha1;
/// use nom::branch::alt;
/// use nom::combinator::value;
///
/// fn parser(input: &str) -> IResult<&str, String> {
/// escaped_transform(
/// alpha1,
/// '\\',
/// alt((
/// value("\\", tag("\\")),
/// value("\"", tag("\"")),
/// value("\n", tag("n")),
/// ))
/// )(input)
/// }
///
/// assert_eq!(parser("ab\\\"cd\""), Ok(("\"", String::from("ab\"cd"))));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn escaped_transform<Input, Error, F, G, O1, O2, ExtendItem, Output>(
mut normal: F,
control_char: char,
mut transform: G,
) -> impl FnMut(Input) -> IResult<Input, Output, Error>
where
Input: Clone
+ crate::traits::Offset
+ InputLength
+ InputTake
+ InputTakeAtPosition
+ Slice<RangeFrom<usize>>
+ InputIter,
Input: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>,
<Input as InputIter>::Item: crate::traits::AsChar,
F: Parser<Input, O1, Error>,
G: Parser<Input, O2, Error>,
Error: ParseError<Input>,
{
use crate::traits::AsChar;
move |input: Input| {
let mut index = 0;
let mut res = input.new_builder();
let i = input.clone();
while index < i.input_len() {
let current_len = i.input_len();
let remainder = i.slice(index..);
match normal.parse(remainder.clone()) {
Ok((i2, o)) => {
o.extend_into(&mut res);
if i2.input_len() == 0 {
return Err(Err::Incomplete(Needed::Unknown));
} else if i2.input_len() == current_len {
return Ok((remainder, res));
} else {
index = input.offset(&i2);
}
}
Err(Err::Error(_)) => {
// unwrap() should be safe here since index < $i.input_len()
if remainder.iter_elements().next().unwrap().as_char() == control_char {
let next = index + control_char.len_utf8();
let input_len = input.input_len();
if next >= input_len {
return Err(Err::Incomplete(Needed::Unknown));
} else {
match transform.parse(i.slice(next..)) {
Ok((i2, o)) => {
o.extend_into(&mut res);
if i2.input_len() == 0 {
return Err(Err::Incomplete(Needed::Unknown));
} else {
index = input.offset(&i2);
}
}
Err(e) => return Err(e),
}
}
} else {
return Ok((remainder, res));
}
}
Err(e) => return Err(e),
}
}
Err(Err::Incomplete(Needed::Unknown))
}
}

View File

@@ -0,0 +1,636 @@
use crate::character::is_alphabetic;
use crate::character::streaming::{
alpha1 as alpha, alphanumeric1 as alphanumeric, digit1 as digit, hex_digit1 as hex_digit,
multispace1 as multispace, oct_digit1 as oct_digit, space1 as space,
};
use crate::error::ErrorKind;
use crate::internal::{Err, IResult, Needed};
#[cfg(feature = "alloc")]
use crate::{
branch::alt,
bytes::complete::{escaped, escaped_transform, tag},
combinator::{map, value},
lib::std::string::String,
lib::std::vec::Vec,
};
#[test]
fn is_a() {
use crate::bytes::streaming::is_a;
fn a_or_b(i: &[u8]) -> IResult<&[u8], &[u8]> {
is_a("ab")(i)
}
let a = &b"abcd"[..];
assert_eq!(a_or_b(a), Ok((&b"cd"[..], &b"ab"[..])));
let b = &b"bcde"[..];
assert_eq!(a_or_b(b), Ok((&b"cde"[..], &b"b"[..])));
let c = &b"cdef"[..];
assert_eq!(
a_or_b(c),
Err(Err::Error(error_position!(c, ErrorKind::IsA)))
);
let d = &b"bacdef"[..];
assert_eq!(a_or_b(d), Ok((&b"cdef"[..], &b"ba"[..])));
}
#[test]
fn is_not() {
use crate::bytes::streaming::is_not;
fn a_or_b(i: &[u8]) -> IResult<&[u8], &[u8]> {
is_not("ab")(i)
}
let a = &b"cdab"[..];
assert_eq!(a_or_b(a), Ok((&b"ab"[..], &b"cd"[..])));
let b = &b"cbde"[..];
assert_eq!(a_or_b(b), Ok((&b"bde"[..], &b"c"[..])));
let c = &b"abab"[..];
assert_eq!(
a_or_b(c),
Err(Err::Error(error_position!(c, ErrorKind::IsNot)))
);
let d = &b"cdefba"[..];
assert_eq!(a_or_b(d), Ok((&b"ba"[..], &b"cdef"[..])));
let e = &b"e"[..];
assert_eq!(a_or_b(e), Err(Err::Incomplete(Needed::new(1))));
}
#[cfg(feature = "alloc")]
#[allow(unused_variables)]
#[test]
fn escaping() {
use crate::character::streaming::one_of;
fn esc(i: &[u8]) -> IResult<&[u8], &[u8]> {
escaped(alpha, '\\', one_of("\"n\\"))(i)
}
assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], &b"abcd"[..])));
assert_eq!(esc(&b"ab\\\"cd;"[..]), Ok((&b";"[..], &b"ab\\\"cd"[..])));
assert_eq!(esc(&b"\\\"abcd;"[..]), Ok((&b";"[..], &b"\\\"abcd"[..])));
assert_eq!(esc(&b"\\n;"[..]), Ok((&b";"[..], &b"\\n"[..])));
assert_eq!(esc(&b"ab\\\"12"[..]), Ok((&b"12"[..], &b"ab\\\""[..])));
assert_eq!(
esc(&b"AB\\"[..]),
Err(Err::Error(error_position!(
&b"AB\\"[..],
ErrorKind::Escaped
)))
);
assert_eq!(
esc(&b"AB\\A"[..]),
Err(Err::Error(error_node_position!(
&b"AB\\A"[..],
ErrorKind::Escaped,
error_position!(&b"A"[..], ErrorKind::OneOf)
)))
);
fn esc2(i: &[u8]) -> IResult<&[u8], &[u8]> {
escaped(digit, '\\', one_of("\"n\\"))(i)
}
assert_eq!(esc2(&b"12\\nnn34"[..]), Ok((&b"nn34"[..], &b"12\\n"[..])));
}
#[cfg(feature = "alloc")]
#[test]
fn escaping_str() {
use crate::character::streaming::one_of;
fn esc(i: &str) -> IResult<&str, &str> {
escaped(alpha, '\\', one_of("\"n\\"))(i)
}
assert_eq!(esc("abcd;"), Ok((";", "abcd")));
assert_eq!(esc("ab\\\"cd;"), Ok((";", "ab\\\"cd")));
assert_eq!(esc("\\\"abcd;"), Ok((";", "\\\"abcd")));
assert_eq!(esc("\\n;"), Ok((";", "\\n")));
assert_eq!(esc("ab\\\"12"), Ok(("12", "ab\\\"")));
assert_eq!(
esc("AB\\"),
Err(Err::Error(error_position!("AB\\", ErrorKind::Escaped)))
);
assert_eq!(
esc("AB\\A"),
Err(Err::Error(error_node_position!(
"AB\\A",
ErrorKind::Escaped,
error_position!("A", ErrorKind::OneOf)
)))
);
fn esc2(i: &str) -> IResult<&str, &str> {
escaped(digit, '\\', one_of("\"n\\"))(i)
}
assert_eq!(esc2("12\\nnn34"), Ok(("nn34", "12\\n")));
fn esc3(i: &str) -> IResult<&str, &str> {
escaped(alpha, '\u{241b}', one_of("\"n"))(i)
}
assert_eq!(esc3("ab␛ncd;"), Ok((";", "ab␛ncd")));
}
#[cfg(feature = "alloc")]
fn to_s(i: Vec<u8>) -> String {
String::from_utf8_lossy(&i).into_owned()
}
#[cfg(feature = "alloc")]
#[test]
fn escape_transform() {
fn esc(i: &[u8]) -> IResult<&[u8], String> {
map(
escaped_transform(
alpha,
'\\',
alt((
value(&b"\\"[..], tag("\\")),
value(&b"\""[..], tag("\"")),
value(&b"\n"[..], tag("n")),
)),
),
to_s,
)(i)
}
assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], String::from("abcd"))));
assert_eq!(
esc(&b"ab\\\"cd;"[..]),
Ok((&b";"[..], String::from("ab\"cd")))
);
assert_eq!(
esc(&b"\\\"abcd;"[..]),
Ok((&b";"[..], String::from("\"abcd")))
);
assert_eq!(esc(&b"\\n;"[..]), Ok((&b";"[..], String::from("\n"))));
assert_eq!(
esc(&b"ab\\\"12"[..]),
Ok((&b"12"[..], String::from("ab\"")))
);
assert_eq!(
esc(&b"AB\\"[..]),
Err(Err::Error(error_position!(
&b"\\"[..],
ErrorKind::EscapedTransform
)))
);
assert_eq!(
esc(&b"AB\\A"[..]),
Err(Err::Error(error_node_position!(
&b"AB\\A"[..],
ErrorKind::EscapedTransform,
error_position!(&b"A"[..], ErrorKind::Tag)
)))
);
fn esc2(i: &[u8]) -> IResult<&[u8], String> {
map(
escaped_transform(
alpha,
'&',
alt((
value("è".as_bytes(), tag("egrave;")),
value("à".as_bytes(), tag("agrave;")),
)),
),
to_s,
)(i)
}
assert_eq!(
esc2(&b"ab&egrave;DEF;"[..]),
Ok((&b";"[..], String::from("abèDEF")))
);
assert_eq!(
esc2(&b"ab&egrave;D&agrave;EF;"[..]),
Ok((&b";"[..], String::from("abèDàEF")))
);
}
#[cfg(feature = "std")]
#[test]
fn escape_transform_str() {
fn esc(i: &str) -> IResult<&str, String> {
escaped_transform(
alpha,
'\\',
alt((
value("\\", tag("\\")),
value("\"", tag("\"")),
value("\n", tag("n")),
)),
)(i)
}
assert_eq!(esc("abcd;"), Ok((";", String::from("abcd"))));
assert_eq!(esc("ab\\\"cd;"), Ok((";", String::from("ab\"cd"))));
assert_eq!(esc("\\\"abcd;"), Ok((";", String::from("\"abcd"))));
assert_eq!(esc("\\n;"), Ok((";", String::from("\n"))));
assert_eq!(esc("ab\\\"12"), Ok(("12", String::from("ab\""))));
assert_eq!(
esc("AB\\"),
Err(Err::Error(error_position!(
"\\",
ErrorKind::EscapedTransform
)))
);
assert_eq!(
esc("AB\\A"),
Err(Err::Error(error_node_position!(
"AB\\A",
ErrorKind::EscapedTransform,
error_position!("A", ErrorKind::Tag)
)))
);
fn esc2(i: &str) -> IResult<&str, String> {
escaped_transform(
alpha,
'&',
alt((value("è", tag("egrave;")), value("à", tag("agrave;")))),
)(i)
}
assert_eq!(esc2("ab&egrave;DEF;"), Ok((";", String::from("abèDEF"))));
assert_eq!(
esc2("ab&egrave;D&agrave;EF;"),
Ok((";", String::from("abèDàEF")))
);
fn esc3(i: &str) -> IResult<&str, String> {
escaped_transform(
alpha,
'␛',
alt((value("\0", tag("0")), value("\n", tag("n")))),
)(i)
}
assert_eq!(esc3("a␛0bc␛n"), Ok(("", String::from("a\0bc\n"))));
}
#[test]
fn take_until_incomplete() {
use crate::bytes::streaming::take_until;
fn y(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_until("end")(i)
}
assert_eq!(y(&b"nd"[..]), Err(Err::Incomplete(Needed::Unknown)));
assert_eq!(y(&b"123"[..]), Err(Err::Incomplete(Needed::Unknown)));
assert_eq!(y(&b"123en"[..]), Err(Err::Incomplete(Needed::Unknown)));
}
#[test]
fn take_until_incomplete_s() {
use crate::bytes::streaming::take_until;
fn ys(i: &str) -> IResult<&str, &str> {
take_until("end")(i)
}
assert_eq!(ys("123en"), Err(Err::Incomplete(Needed::Unknown)));
}
#[test]
fn recognize() {
use crate::bytes::streaming::{tag, take};
use crate::combinator::recognize;
use crate::sequence::delimited;
fn x(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(delimited(tag("<!--"), take(5_usize), tag("-->")))(i)
}
let r = x(&b"<!-- abc --> aaa"[..]);
assert_eq!(r, Ok((&b" aaa"[..], &b"<!-- abc -->"[..])));
let semicolon = &b";"[..];
fn ya(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(alpha)(i)
}
let ra = ya(&b"abc;"[..]);
assert_eq!(ra, Ok((semicolon, &b"abc"[..])));
fn yd(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(digit)(i)
}
let rd = yd(&b"123;"[..]);
assert_eq!(rd, Ok((semicolon, &b"123"[..])));
fn yhd(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(hex_digit)(i)
}
let rhd = yhd(&b"123abcDEF;"[..]);
assert_eq!(rhd, Ok((semicolon, &b"123abcDEF"[..])));
fn yod(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(oct_digit)(i)
}
let rod = yod(&b"1234567;"[..]);
assert_eq!(rod, Ok((semicolon, &b"1234567"[..])));
fn yan(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(alphanumeric)(i)
}
let ran = yan(&b"123abc;"[..]);
assert_eq!(ran, Ok((semicolon, &b"123abc"[..])));
fn ys(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(space)(i)
}
let rs = ys(&b" \t;"[..]);
assert_eq!(rs, Ok((semicolon, &b" \t"[..])));
fn yms(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(multispace)(i)
}
let rms = yms(&b" \t\r\n;"[..]);
assert_eq!(rms, Ok((semicolon, &b" \t\r\n"[..])));
}
#[test]
fn take_while() {
use crate::bytes::streaming::take_while;
fn f(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_while(is_alphabetic)(i)
}
let a = b"";
let b = b"abcd";
let c = b"abcd123";
let d = b"123";
assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&c[..]), Ok((&d[..], &b[..])));
assert_eq!(f(&d[..]), Ok((&d[..], &a[..])));
}
#[test]
fn take_while1() {
use crate::bytes::streaming::take_while1;
fn f(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_while1(is_alphabetic)(i)
}
let a = b"";
let b = b"abcd";
let c = b"abcd123";
let d = b"123";
assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&c[..]), Ok((&b"123"[..], &b[..])));
assert_eq!(
f(&d[..]),
Err(Err::Error(error_position!(&d[..], ErrorKind::TakeWhile1)))
);
}
#[test]
fn take_while_m_n() {
use crate::bytes::streaming::take_while_m_n;
fn x(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_while_m_n(2, 4, is_alphabetic)(i)
}
let a = b"";
let b = b"a";
let c = b"abc";
let d = b"abc123";
let e = b"abcde";
let f = b"123";
assert_eq!(x(&a[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(x(&b[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(x(&c[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(x(&d[..]), Ok((&b"123"[..], &c[..])));
assert_eq!(x(&e[..]), Ok((&b"e"[..], &b"abcd"[..])));
assert_eq!(
x(&f[..]),
Err(Err::Error(error_position!(&f[..], ErrorKind::TakeWhileMN)))
);
}
#[test]
fn take_till() {
use crate::bytes::streaming::take_till;
fn f(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_till(is_alphabetic)(i)
}
let a = b"";
let b = b"abcd";
let c = b"123abcd";
let d = b"123";
assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&b[..]), Ok((&b"abcd"[..], &b""[..])));
assert_eq!(f(&c[..]), Ok((&b"abcd"[..], &b"123"[..])));
assert_eq!(f(&d[..]), Err(Err::Incomplete(Needed::new(1))));
}
#[test]
fn take_till1() {
use crate::bytes::streaming::take_till1;
fn f(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_till1(is_alphabetic)(i)
}
let a = b"";
let b = b"abcd";
let c = b"123abcd";
let d = b"123";
assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(
f(&b[..]),
Err(Err::Error(error_position!(&b[..], ErrorKind::TakeTill1)))
);
assert_eq!(f(&c[..]), Ok((&b"abcd"[..], &b"123"[..])));
assert_eq!(f(&d[..]), Err(Err::Incomplete(Needed::new(1))));
}
#[test]
fn take_while_utf8() {
use crate::bytes::streaming::take_while;
fn f(i: &str) -> IResult<&str, &str> {
take_while(|c| c != '點')(i)
}
assert_eq!(f(""), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f("abcd"), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f("abcd點"), Ok(("", "abcd")));
assert_eq!(f("abcd點a"), Ok(("點a", "abcd")));
fn g(i: &str) -> IResult<&str, &str> {
take_while(|c| c == '點')(i)
}
assert_eq!(g(""), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(g("點abcd"), Ok(("abcd", "")));
assert_eq!(g("點點點a"), Ok(("a", "點點點")));
}
#[test]
fn take_till_utf8() {
use crate::bytes::streaming::take_till;
fn f(i: &str) -> IResult<&str, &str> {
take_till(|c| c == '點')(i)
}
assert_eq!(f(""), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f("abcd"), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f("abcd點"), Ok(("", "abcd")));
assert_eq!(f("abcd點a"), Ok(("點a", "abcd")));
fn g(i: &str) -> IResult<&str, &str> {
take_till(|c| c != '點')(i)
}
assert_eq!(g(""), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(g("點abcd"), Ok(("abcd", "")));
assert_eq!(g("點點點a"), Ok(("a", "點點點")));
}
#[test]
fn take_utf8() {
use crate::bytes::streaming::{take, take_while};
fn f(i: &str) -> IResult<&str, &str> {
take(3_usize)(i)
}
assert_eq!(f(""), Err(Err::Incomplete(Needed::Unknown)));
assert_eq!(f("ab"), Err(Err::Incomplete(Needed::Unknown)));
assert_eq!(f(""), Err(Err::Incomplete(Needed::Unknown)));
assert_eq!(f("ab點cd"), Ok(("cd", "ab點")));
assert_eq!(f("a點bcd"), Ok(("cd", "a點b")));
assert_eq!(f("a點b"), Ok(("", "a點b")));
fn g(i: &str) -> IResult<&str, &str> {
take_while(|c| c == '點')(i)
}
assert_eq!(g(""), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(g("點abcd"), Ok(("abcd", "")));
assert_eq!(g("點點點a"), Ok(("a", "點點點")));
}
#[test]
fn take_while_m_n_utf8() {
use crate::bytes::streaming::take_while_m_n;
fn parser(i: &str) -> IResult<&str, &str> {
take_while_m_n(1, 1, |c| c == 'A' || c == '😃')(i)
}
assert_eq!(parser("A!"), Ok(("!", "A")));
assert_eq!(parser("😃!"), Ok(("!", "😃")));
}
#[test]
fn take_while_m_n_utf8_full_match() {
use crate::bytes::streaming::take_while_m_n;
fn parser(i: &str) -> IResult<&str, &str> {
take_while_m_n(1, 1, |c: char| c.is_alphabetic())(i)
}
assert_eq!(parser("øn"), Ok(("n", "ø")));
}
#[test]
#[cfg(feature = "std")]
fn recognize_take_while() {
use crate::bytes::streaming::take_while;
use crate::character::is_alphanumeric;
use crate::combinator::recognize;
fn x(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_while(is_alphanumeric)(i)
}
fn y(i: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(x)(i)
}
assert_eq!(x(&b"ab."[..]), Ok((&b"."[..], &b"ab"[..])));
println!("X: {:?}", x(&b"ab"[..]));
assert_eq!(y(&b"ab."[..]), Ok((&b"."[..], &b"ab"[..])));
}
#[test]
fn length_bytes() {
use crate::{bytes::streaming::tag, multi::length_data, number::streaming::le_u8};
fn x(i: &[u8]) -> IResult<&[u8], &[u8]> {
length_data(le_u8)(i)
}
assert_eq!(x(b"\x02..>>"), Ok((&b">>"[..], &b".."[..])));
assert_eq!(x(b"\x02.."), Ok((&[][..], &b".."[..])));
assert_eq!(x(b"\x02."), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(x(b"\x02"), Err(Err::Incomplete(Needed::new(2))));
fn y(i: &[u8]) -> IResult<&[u8], &[u8]> {
let (i, _) = tag("magic")(i)?;
length_data(le_u8)(i)
}
assert_eq!(y(b"magic\x02..>>"), Ok((&b">>"[..], &b".."[..])));
assert_eq!(y(b"magic\x02.."), Ok((&[][..], &b".."[..])));
assert_eq!(y(b"magic\x02."), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(y(b"magic\x02"), Err(Err::Incomplete(Needed::new(2))));
}
#[cfg(feature = "alloc")]
#[test]
fn case_insensitive() {
use crate::bytes::streaming::tag_no_case;
fn test(i: &[u8]) -> IResult<&[u8], &[u8]> {
tag_no_case("ABcd")(i)
}
assert_eq!(test(&b"aBCdefgh"[..]), Ok((&b"efgh"[..], &b"aBCd"[..])));
assert_eq!(test(&b"abcdefgh"[..]), Ok((&b"efgh"[..], &b"abcd"[..])));
assert_eq!(test(&b"ABCDefgh"[..]), Ok((&b"efgh"[..], &b"ABCD"[..])));
assert_eq!(test(&b"ab"[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(
test(&b"Hello"[..]),
Err(Err::Error(error_position!(&b"Hello"[..], ErrorKind::Tag)))
);
assert_eq!(
test(&b"Hel"[..]),
Err(Err::Error(error_position!(&b"Hel"[..], ErrorKind::Tag)))
);
fn test2(i: &str) -> IResult<&str, &str> {
tag_no_case("ABcd")(i)
}
assert_eq!(test2("aBCdefgh"), Ok(("efgh", "aBCd")));
assert_eq!(test2("abcdefgh"), Ok(("efgh", "abcd")));
assert_eq!(test2("ABCDefgh"), Ok(("efgh", "ABCD")));
assert_eq!(test2("ab"), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(
test2("Hello"),
Err(Err::Error(error_position!(&"Hello"[..], ErrorKind::Tag)))
);
assert_eq!(
test2("Hel"),
Err(Err::Error(error_position!(&"Hel"[..], ErrorKind::Tag)))
);
}
#[test]
fn tag_fixed_size_array() {
use crate::bytes::streaming::tag;
fn test(i: &[u8]) -> IResult<&[u8], &[u8]> {
tag([0x42])(i)
}
fn test2(i: &[u8]) -> IResult<&[u8], &[u8]> {
tag(&[0x42])(i)
}
let input = [0x42, 0x00];
assert_eq!(test(&input), Ok((&b"\x00"[..], &b"\x42"[..])));
assert_eq!(test2(&input), Ok((&b"\x00"[..], &b"\x42"[..])));
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,116 @@
//! Character specific parsers and combinators
//!
//! Functions recognizing specific characters
#[cfg(test)]
mod tests;
pub mod complete;
pub mod streaming;
/// Tests if byte is ASCII alphabetic: A-Z, a-z
///
/// # Example
///
/// ```
/// # use nom::character::is_alphabetic;
/// assert_eq!(is_alphabetic(b'9'), false);
/// assert_eq!(is_alphabetic(b'a'), true);
/// ```
#[inline]
pub fn is_alphabetic(chr: u8) -> bool {
(chr >= 0x41 && chr <= 0x5A) || (chr >= 0x61 && chr <= 0x7A)
}
/// Tests if byte is ASCII digit: 0-9
///
/// # Example
///
/// ```
/// # use nom::character::is_digit;
/// assert_eq!(is_digit(b'a'), false);
/// assert_eq!(is_digit(b'9'), true);
/// ```
#[inline]
pub fn is_digit(chr: u8) -> bool {
chr >= 0x30 && chr <= 0x39
}
/// Tests if byte is ASCII hex digit: 0-9, A-F, a-f
///
/// # Example
///
/// ```
/// # use nom::character::is_hex_digit;
/// assert_eq!(is_hex_digit(b'a'), true);
/// assert_eq!(is_hex_digit(b'9'), true);
/// assert_eq!(is_hex_digit(b'A'), true);
/// assert_eq!(is_hex_digit(b'x'), false);
/// ```
#[inline]
pub fn is_hex_digit(chr: u8) -> bool {
(chr >= 0x30 && chr <= 0x39) || (chr >= 0x41 && chr <= 0x46) || (chr >= 0x61 && chr <= 0x66)
}
/// Tests if byte is ASCII octal digit: 0-7
///
/// # Example
///
/// ```
/// # use nom::character::is_oct_digit;
/// assert_eq!(is_oct_digit(b'a'), false);
/// assert_eq!(is_oct_digit(b'9'), false);
/// assert_eq!(is_oct_digit(b'6'), true);
/// ```
#[inline]
pub fn is_oct_digit(chr: u8) -> bool {
chr >= 0x30 && chr <= 0x37
}
/// Tests if byte is ASCII alphanumeric: A-Z, a-z, 0-9
///
/// # Example
///
/// ```
/// # use nom::character::is_alphanumeric;
/// assert_eq!(is_alphanumeric(b'-'), false);
/// assert_eq!(is_alphanumeric(b'a'), true);
/// assert_eq!(is_alphanumeric(b'9'), true);
/// assert_eq!(is_alphanumeric(b'A'), true);
/// ```
#[inline]
pub fn is_alphanumeric(chr: u8) -> bool {
is_alphabetic(chr) || is_digit(chr)
}
/// Tests if byte is ASCII space or tab
///
/// # Example
///
/// ```
/// # use nom::character::is_space;
/// assert_eq!(is_space(b'\n'), false);
/// assert_eq!(is_space(b'\r'), false);
/// assert_eq!(is_space(b' '), true);
/// assert_eq!(is_space(b'\t'), true);
/// ```
#[inline]
pub fn is_space(chr: u8) -> bool {
chr == b' ' || chr == b'\t'
}
/// Tests if byte is ASCII newline: \n
///
/// # Example
///
/// ```
/// # use nom::character::is_newline;
/// assert_eq!(is_newline(b'\n'), true);
/// assert_eq!(is_newline(b'\r'), false);
/// assert_eq!(is_newline(b' '), false);
/// assert_eq!(is_newline(b'\t'), false);
/// ```
#[inline]
pub fn is_newline(chr: u8) -> bool {
chr == b'\n'
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
use super::streaming::*;
use crate::error::ErrorKind;
use crate::internal::{Err, IResult};
#[test]
fn one_of_test() {
fn f(i: &[u8]) -> IResult<&[u8], char> {
one_of("ab")(i)
}
let a = &b"abcd"[..];
assert_eq!(f(a), Ok((&b"bcd"[..], 'a')));
let b = &b"cde"[..];
assert_eq!(f(b), Err(Err::Error(error_position!(b, ErrorKind::OneOf))));
fn utf8(i: &str) -> IResult<&str, char> {
one_of("+\u{FF0B}")(i)
}
assert!(utf8("+").is_ok());
assert!(utf8("\u{FF0B}").is_ok());
}
#[test]
fn none_of_test() {
fn f(i: &[u8]) -> IResult<&[u8], char> {
none_of("ab")(i)
}
let a = &b"abcd"[..];
assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::NoneOf))));
let b = &b"cde"[..];
assert_eq!(f(b), Ok((&b"de"[..], 'c')));
}
#[test]
fn char_byteslice() {
fn f(i: &[u8]) -> IResult<&[u8], char> {
char('c')(i)
}
let a = &b"abcd"[..];
assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char))));
let b = &b"cde"[..];
assert_eq!(f(b), Ok((&b"de"[..], 'c')));
}
#[test]
fn char_str() {
fn f(i: &str) -> IResult<&str, char> {
char('c')(i)
}
let a = &"abcd"[..];
assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char))));
let b = &"cde"[..];
assert_eq!(f(b), Ok((&"de"[..], 'c')));
}

View File

@@ -0,0 +1,768 @@
//! General purpose combinators
#![allow(unused_imports)]
#[cfg(feature = "alloc")]
use crate::lib::std::boxed::Box;
use crate::error::{ErrorKind, FromExternalError, ParseError};
use crate::internal::*;
use crate::lib::std::borrow::Borrow;
use crate::lib::std::convert::Into;
#[cfg(feature = "std")]
use crate::lib::std::fmt::Debug;
use crate::lib::std::mem::transmute;
use crate::lib::std::ops::{Range, RangeFrom, RangeTo};
use crate::traits::{AsChar, InputIter, InputLength, InputTakeAtPosition, ParseTo};
use crate::traits::{Compare, CompareResult, Offset, Slice};
#[cfg(test)]
mod tests;
/// Return the remaining input.
///
/// ```rust
/// # use nom::error::ErrorKind;
/// use nom::combinator::rest;
/// assert_eq!(rest::<_,(_, ErrorKind)>("abc"), Ok(("", "abc")));
/// assert_eq!(rest::<_,(_, ErrorKind)>(""), Ok(("", "")));
/// ```
#[inline]
pub fn rest<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
where
T: Slice<RangeFrom<usize>>,
T: InputLength,
{
Ok((input.slice(input.input_len()..), input))
}
/// Return the length of the remaining input.
///
/// ```rust
/// # use nom::error::ErrorKind;
/// use nom::combinator::rest_len;
/// assert_eq!(rest_len::<_,(_, ErrorKind)>("abc"), Ok(("abc", 3)));
/// assert_eq!(rest_len::<_,(_, ErrorKind)>(""), Ok(("", 0)));
/// ```
#[inline]
pub fn rest_len<T, E: ParseError<T>>(input: T) -> IResult<T, usize, E>
where
T: InputLength,
{
let len = input.input_len();
Ok((input, len))
}
/// Maps a function on the result of a parser.
///
/// ```rust
/// use nom::{Err,error::ErrorKind, IResult,Parser};
/// use nom::character::complete::digit1;
/// use nom::combinator::map;
/// # fn main() {
///
/// let mut parser = map(digit1, |s: &str| s.len());
///
/// // the parser will count how many characters were returned by digit1
/// assert_eq!(parser.parse("123456"), Ok(("", 6)));
///
/// // this will fail if digit1 fails
/// assert_eq!(parser.parse("abc"), Err(Err::Error(("abc", ErrorKind::Digit))));
/// # }
/// ```
pub fn map<I, O1, O2, E, F, G>(mut parser: F, mut f: G) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: FnMut(O1) -> O2,
{
move |input: I| {
let (input, o1) = parser.parse(input)?;
Ok((input, f(o1)))
}
}
/// Applies a function returning a `Result` over the result of a parser.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::character::complete::digit1;
/// use nom::combinator::map_res;
/// # fn main() {
///
/// let mut parse = map_res(digit1, |s: &str| s.parse::<u8>());
///
/// // the parser will convert the result of digit1 to a number
/// assert_eq!(parse("123"), Ok(("", 123)));
///
/// // this will fail if digit1 fails
/// assert_eq!(parse("abc"), Err(Err::Error(("abc", ErrorKind::Digit))));
///
/// // this will fail if the mapped function fails (a `u8` is too small to hold `123456`)
/// assert_eq!(parse("123456"), Err(Err::Error(("123456", ErrorKind::MapRes))));
/// # }
/// ```
pub fn map_res<I: Clone, O1, O2, E: FromExternalError<I, E2>, E2, F, G>(
mut parser: F,
mut f: G,
) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: FnMut(O1) -> Result<O2, E2>,
{
move |input: I| {
let i = input.clone();
let (input, o1) = parser.parse(input)?;
match f(o1) {
Ok(o2) => Ok((input, o2)),
Err(e) => Err(Err::Error(E::from_external_error(i, ErrorKind::MapRes, e))),
}
}
}
/// Applies a function returning an `Option` over the result of a parser.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::character::complete::digit1;
/// use nom::combinator::map_opt;
/// # fn main() {
///
/// let mut parse = map_opt(digit1, |s: &str| s.parse::<u8>().ok());
///
/// // the parser will convert the result of digit1 to a number
/// assert_eq!(parse("123"), Ok(("", 123)));
///
/// // this will fail if digit1 fails
/// assert_eq!(parse("abc"), Err(Err::Error(("abc", ErrorKind::Digit))));
///
/// // this will fail if the mapped function fails (a `u8` is too small to hold `123456`)
/// assert_eq!(parse("123456"), Err(Err::Error(("123456", ErrorKind::MapOpt))));
/// # }
/// ```
pub fn map_opt<I: Clone, O1, O2, E: ParseError<I>, F, G>(
mut parser: F,
mut f: G,
) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: FnMut(O1) -> Option<O2>,
{
move |input: I| {
let i = input.clone();
let (input, o1) = parser.parse(input)?;
match f(o1) {
Some(o2) => Ok((input, o2)),
None => Err(Err::Error(E::from_error_kind(i, ErrorKind::MapOpt))),
}
}
}
/// Applies a parser over the result of another one.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::character::complete::digit1;
/// use nom::bytes::complete::take;
/// use nom::combinator::map_parser;
/// # fn main() {
///
/// let mut parse = map_parser(take(5u8), digit1);
///
/// assert_eq!(parse("12345"), Ok(("", "12345")));
/// assert_eq!(parse("123ab"), Ok(("", "123")));
/// assert_eq!(parse("123"), Err(Err::Error(("123", ErrorKind::Eof))));
/// # }
/// ```
pub fn map_parser<I, O1, O2, E: ParseError<I>, F, G>(
mut parser: F,
mut applied_parser: G,
) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: Parser<O1, O2, E>,
{
move |input: I| {
let (input, o1) = parser.parse(input)?;
let (_, o2) = applied_parser.parse(o1)?;
Ok((input, o2))
}
}
/// Creates a new parser from the output of the first parser, then apply that parser over the rest of the input.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::bytes::complete::take;
/// use nom::number::complete::u8;
/// use nom::combinator::flat_map;
/// # fn main() {
///
/// let mut parse = flat_map(u8, take);
///
/// assert_eq!(parse(&[2, 0, 1, 2][..]), Ok((&[2][..], &[0, 1][..])));
/// assert_eq!(parse(&[4, 0, 1, 2][..]), Err(Err::Error((&[0, 1, 2][..], ErrorKind::Eof))));
/// # }
/// ```
pub fn flat_map<I, O1, O2, E: ParseError<I>, F, G, H>(
mut parser: F,
mut applied_parser: G,
) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: FnMut(O1) -> H,
H: Parser<I, O2, E>,
{
move |input: I| {
let (input, o1) = parser.parse(input)?;
applied_parser(o1).parse(input)
}
}
/// Optional parser: Will return `None` if not successful.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::opt;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// fn parser(i: &str) -> IResult<&str, Option<&str>> {
/// opt(alpha1)(i)
/// }
///
/// assert_eq!(parser("abcd;"), Ok((";", Some("abcd"))));
/// assert_eq!(parser("123;"), Ok(("123;", None)));
/// # }
/// ```
pub fn opt<I: Clone, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Option<O>, E>
where
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match f.parse(input) {
Ok((i, o)) => Ok((i, Some(o))),
Err(Err::Error(_)) => Ok((i, None)),
Err(e) => Err(e),
}
}
}
/// Calls the parser if the condition is met.
///
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, IResult};
/// use nom::combinator::cond;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// fn parser(b: bool, i: &str) -> IResult<&str, Option<&str>> {
/// cond(b, alpha1)(i)
/// }
///
/// assert_eq!(parser(true, "abcd;"), Ok((";", Some("abcd"))));
/// assert_eq!(parser(false, "abcd;"), Ok(("abcd;", None)));
/// assert_eq!(parser(true, "123;"), Err(Err::Error(Error::new("123;", ErrorKind::Alpha))));
/// assert_eq!(parser(false, "123;"), Ok(("123;", None)));
/// # }
/// ```
pub fn cond<I, O, E: ParseError<I>, F>(
b: bool,
mut f: F,
) -> impl FnMut(I) -> IResult<I, Option<O>, E>
where
F: Parser<I, O, E>,
{
move |input: I| {
if b {
match f.parse(input) {
Ok((i, o)) => Ok((i, Some(o))),
Err(e) => Err(e),
}
} else {
Ok((input, None))
}
}
}
/// Tries to apply its parser without consuming the input.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::peek;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// let mut parser = peek(alpha1);
///
/// assert_eq!(parser("abcd;"), Ok(("abcd;", "abcd")));
/// assert_eq!(parser("123;"), Err(Err::Error(("123;", ErrorKind::Alpha))));
/// # }
/// ```
pub fn peek<I: Clone, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, O, E>
where
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match f.parse(input) {
Ok((_, o)) => Ok((i, o)),
Err(e) => Err(e),
}
}
}
/// returns its input if it is at the end of input data
///
/// When we're at the end of the data, this combinator
/// will succeed
///
/// ```
/// # use std::str;
/// # use nom::{Err, error::ErrorKind, IResult};
/// # use nom::combinator::eof;
///
/// # fn main() {
/// let parser = eof;
/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Eof))));
/// assert_eq!(parser(""), Ok(("", "")));
/// # }
/// ```
pub fn eof<I: InputLength + Clone, E: ParseError<I>>(input: I) -> IResult<I, I, E> {
if input.input_len() == 0 {
let clone = input.clone();
Ok((input, clone))
} else {
Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof)))
}
}
/// Transforms Incomplete into `Error`.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::bytes::streaming::take;
/// use nom::combinator::complete;
/// # fn main() {
///
/// let mut parser = complete(take(5u8));
///
/// assert_eq!(parser("abcdefg"), Ok(("fg", "abcde")));
/// assert_eq!(parser("abcd"), Err(Err::Error(("abcd", ErrorKind::Complete))));
/// # }
/// ```
pub fn complete<I: Clone, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, O, E>
where
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match f.parse(input) {
Err(Err::Incomplete(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Complete))),
rest => rest,
}
}
}
/// Succeeds if all the input has been consumed by its child parser.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::all_consuming;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// let mut parser = all_consuming(alpha1);
///
/// assert_eq!(parser("abcd"), Ok(("", "abcd")));
/// assert_eq!(parser("abcd;"),Err(Err::Error((";", ErrorKind::Eof))));
/// assert_eq!(parser("123abcd;"),Err(Err::Error(("123abcd;", ErrorKind::Alpha))));
/// # }
/// ```
pub fn all_consuming<I, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, O, E>
where
I: InputLength,
F: Parser<I, O, E>,
{
move |input: I| {
let (input, res) = f.parse(input)?;
if input.input_len() == 0 {
Ok((input, res))
} else {
Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof)))
}
}
}
/// Returns the result of the child parser if it satisfies a verification function.
///
/// The verification function takes as argument a reference to the output of the
/// parser.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::verify;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// let mut parser = verify(alpha1, |s: &str| s.len() == 4);
///
/// assert_eq!(parser("abcd"), Ok(("", "abcd")));
/// assert_eq!(parser("abcde"), Err(Err::Error(("abcde", ErrorKind::Verify))));
/// assert_eq!(parser("123abcd;"),Err(Err::Error(("123abcd;", ErrorKind::Alpha))));
/// # }
/// ```
pub fn verify<I: Clone, O1, O2, E: ParseError<I>, F, G>(
mut first: F,
second: G,
) -> impl FnMut(I) -> IResult<I, O1, E>
where
F: Parser<I, O1, E>,
G: Fn(&O2) -> bool,
O1: Borrow<O2>,
O2: ?Sized,
{
move |input: I| {
let i = input.clone();
let (input, o) = first.parse(input)?;
if second(o.borrow()) {
Ok((input, o))
} else {
Err(Err::Error(E::from_error_kind(i, ErrorKind::Verify)))
}
}
}
/// Returns the provided value if the child parser succeeds.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::value;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// let mut parser = value(1234, alpha1);
///
/// assert_eq!(parser("abcd"), Ok(("", 1234)));
/// assert_eq!(parser("123abcd;"), Err(Err::Error(("123abcd;", ErrorKind::Alpha))));
/// # }
/// ```
pub fn value<I, O1: Clone, O2, E: ParseError<I>, F>(
val: O1,
mut parser: F,
) -> impl FnMut(I) -> IResult<I, O1, E>
where
F: Parser<I, O2, E>,
{
move |input: I| parser.parse(input).map(|(i, _)| (i, val.clone()))
}
/// Succeeds if the child parser returns an error.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::not;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// let mut parser = not(alpha1);
///
/// assert_eq!(parser("123"), Ok(("123", ())));
/// assert_eq!(parser("abcd"), Err(Err::Error(("abcd", ErrorKind::Not))));
/// # }
/// ```
pub fn not<I: Clone, O, E: ParseError<I>, F>(mut parser: F) -> impl FnMut(I) -> IResult<I, (), E>
where
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match parser.parse(input) {
Ok(_) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Not))),
Err(Err::Error(_)) => Ok((i, ())),
Err(e) => Err(e),
}
}
}
/// If the child parser was successful, return the consumed input as produced value.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::recognize;
/// use nom::character::complete::{char, alpha1};
/// use nom::sequence::separated_pair;
/// # fn main() {
///
/// let mut parser = recognize(separated_pair(alpha1, char(','), alpha1));
///
/// assert_eq!(parser("abcd,efgh"), Ok(("", "abcd,efgh")));
/// assert_eq!(parser("abcd;"),Err(Err::Error((";", ErrorKind::Char))));
/// # }
/// ```
pub fn recognize<I: Clone + Offset + Slice<RangeTo<usize>>, O, E: ParseError<I>, F>(
mut parser: F,
) -> impl FnMut(I) -> IResult<I, I, E>
where
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match parser.parse(i) {
Ok((i, _)) => {
let index = input.offset(&i);
Ok((i, input.slice(..index)))
}
Err(e) => Err(e),
}
}
}
/// if the child parser was successful, return the consumed input with the output
/// as a tuple. Functions similarly to [recognize](fn.recognize.html) except it
/// returns the parser output as well.
///
/// This can be useful especially in cases where the output is not the same type
/// as the input, or the input is a user defined type.
///
/// Returned tuple is of the format `(consumed input, produced output)`.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::{consumed, value, recognize, map};
/// use nom::character::complete::{char, alpha1};
/// use nom::bytes::complete::tag;
/// use nom::sequence::separated_pair;
///
/// fn inner_parser(input: &str) -> IResult<&str, bool> {
/// value(true, tag("1234"))(input)
/// }
///
/// # fn main() {
///
/// let mut consumed_parser = consumed(value(true, separated_pair(alpha1, char(','), alpha1)));
///
/// assert_eq!(consumed_parser("abcd,efgh1"), Ok(("1", ("abcd,efgh", true))));
/// assert_eq!(consumed_parser("abcd;"),Err(Err::Error((";", ErrorKind::Char))));
///
///
/// // the first output (representing the consumed input)
/// // should be the same as that of the `recognize` parser.
/// let mut recognize_parser = recognize(inner_parser);
/// let mut consumed_parser = map(consumed(inner_parser), |(consumed, output)| consumed);
///
/// assert_eq!(recognize_parser("1234"), consumed_parser("1234"));
/// assert_eq!(recognize_parser("abcd"), consumed_parser("abcd"));
/// # }
/// ```
pub fn consumed<I, O, F, E>(mut parser: F) -> impl FnMut(I) -> IResult<I, (I, O), E>
where
I: Clone + Offset + Slice<RangeTo<usize>>,
E: ParseError<I>,
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match parser.parse(i) {
Ok((remaining, result)) => {
let index = input.offset(&remaining);
let consumed = input.slice(..index);
Ok((remaining, (consumed, result)))
}
Err(e) => Err(e),
}
}
}
/// transforms an error to failure
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::combinator::cut;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// let mut parser = cut(alpha1);
///
/// assert_eq!(parser("abcd;"), Ok((";", "abcd")));
/// assert_eq!(parser("123;"), Err(Err::Failure(("123;", ErrorKind::Alpha))));
/// # }
/// ```
pub fn cut<I, O, E: ParseError<I>, F>(mut parser: F) -> impl FnMut(I) -> IResult<I, O, E>
where
F: Parser<I, O, E>,
{
move |input: I| match parser.parse(input) {
Err(Err::Error(e)) => Err(Err::Failure(e)),
rest => rest,
}
}
/// automatically converts the child parser's result to another type
///
/// it will be able to convert the output value and the error value
/// as long as the `Into` implementations are available
///
/// ```rust
/// # use nom::IResult;
/// use nom::combinator::into;
/// use nom::character::complete::alpha1;
/// # fn main() {
///
/// fn parser1(i: &str) -> IResult<&str, &str> {
/// alpha1(i)
/// }
///
/// let mut parser2 = into(parser1);
///
/// // the parser converts the &str output of the child parser into a Vec<u8>
/// let bytes: IResult<&str, Vec<u8>> = parser2("abcd");
/// assert_eq!(bytes, Ok(("", vec![97, 98, 99, 100])));
/// # }
/// ```
pub fn into<I, O1, O2, E1, E2, F>(mut parser: F) -> impl FnMut(I) -> IResult<I, O2, E2>
where
O1: Into<O2>,
E1: Into<E2>,
E1: ParseError<I>,
E2: ParseError<I>,
F: Parser<I, O1, E1>,
{
//map(parser, Into::into)
move |input: I| match parser.parse(input) {
Ok((i, o)) => Ok((i, o.into())),
Err(Err::Error(e)) => Err(Err::Error(e.into())),
Err(Err::Failure(e)) => Err(Err::Failure(e.into())),
Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)),
}
}
/// Creates an iterator from input data and a parser.
///
/// Call the iterator's [ParserIterator::finish] method to get the remaining input if successful,
/// or the error value if we encountered an error.
///
/// ```rust
/// use nom::{combinator::iterator, IResult, bytes::complete::tag, character::complete::alpha1, sequence::terminated};
/// use std::collections::HashMap;
///
/// let data = "abc|defg|hijkl|mnopqr|123";
/// let mut it = iterator(data, terminated(alpha1, tag("|")));
///
/// let parsed = it.map(|v| (v, v.len())).collect::<HashMap<_,_>>();
/// let res: IResult<_,_> = it.finish();
///
/// assert_eq!(parsed, [("abc", 3usize), ("defg", 4), ("hijkl", 5), ("mnopqr", 6)].iter().cloned().collect());
/// assert_eq!(res, Ok(("123", ())));
/// ```
pub fn iterator<Input, Output, Error, F>(input: Input, f: F) -> ParserIterator<Input, Error, F>
where
F: Parser<Input, Output, Error>,
Error: ParseError<Input>,
{
ParserIterator {
iterator: f,
input,
state: Some(State::Running),
}
}
/// Main structure associated to the [iterator] function.
pub struct ParserIterator<I, E, F> {
iterator: F,
input: I,
state: Option<State<E>>,
}
impl<I: Clone, E, F> ParserIterator<I, E, F> {
/// Returns the remaining input if parsing was successful, or the error if we encountered an error.
pub fn finish(mut self) -> IResult<I, (), E> {
match self.state.take().unwrap() {
State::Running | State::Done => Ok((self.input, ())),
State::Failure(e) => Err(Err::Failure(e)),
State::Incomplete(i) => Err(Err::Incomplete(i)),
}
}
}
impl<'a, Input, Output, Error, F> core::iter::Iterator for &'a mut ParserIterator<Input, Error, F>
where
F: FnMut(Input) -> IResult<Input, Output, Error>,
Input: Clone,
{
type Item = Output;
fn next(&mut self) -> Option<Self::Item> {
if let State::Running = self.state.take().unwrap() {
let input = self.input.clone();
match (self.iterator)(input) {
Ok((i, o)) => {
self.input = i;
self.state = Some(State::Running);
Some(o)
}
Err(Err::Error(_)) => {
self.state = Some(State::Done);
None
}
Err(Err::Failure(e)) => {
self.state = Some(State::Failure(e));
None
}
Err(Err::Incomplete(i)) => {
self.state = Some(State::Incomplete(i));
None
}
}
} else {
None
}
}
}
enum State<E> {
Running,
Done,
Failure(E),
Incomplete(Needed),
}
/// a parser which always succeeds with given value without consuming any input.
///
/// It can be used for example as the last alternative in `alt` to
/// specify the default case.
///
/// ```rust
/// # use nom::{Err,error::ErrorKind, IResult};
/// use nom::branch::alt;
/// use nom::combinator::{success, value};
/// use nom::character::complete::char;
/// # fn main() {
///
/// let mut parser = success::<_,_,(_,ErrorKind)>(10);
/// assert_eq!(parser("xyz"), Ok(("xyz", 10)));
///
/// let mut sign = alt((value(-1, char('-')), value(1, char('+')), success::<_,_,(_,ErrorKind)>(1)));
/// assert_eq!(sign("+10"), Ok(("10", 1)));
/// assert_eq!(sign("-10"), Ok(("10", -1)));
/// assert_eq!(sign("10"), Ok(("10", 1)));
/// # }
/// ```
pub fn success<I, O: Clone, E: ParseError<I>>(val: O) -> impl Fn(I) -> IResult<I, O, E> {
move |input: I| Ok((input, val.clone()))
}
/// A parser which always fails.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, IResult};
/// use nom::combinator::fail;
///
/// let s = "string";
/// assert_eq!(fail::<_, &str, _>(s), Err(Err::Error((s, ErrorKind::Fail))));
/// ```
pub fn fail<I, O, E: ParseError<I>>(i: I) -> IResult<I, O, E> {
Err(Err::Error(E::from_error_kind(i, ErrorKind::Fail)))
}

View File

@@ -0,0 +1,275 @@
use super::*;
use crate::bytes::complete::take;
use crate::bytes::streaming::tag;
use crate::error::ErrorKind;
use crate::error::ParseError;
use crate::internal::{Err, IResult, Needed};
#[cfg(feature = "alloc")]
use crate::lib::std::boxed::Box;
use crate::number::complete::u8;
macro_rules! assert_parse(
($left: expr, $right: expr) => {
let res: $crate::IResult<_, _, (_, ErrorKind)> = $left;
assert_eq!(res, $right);
};
);
/*#[test]
fn t1() {
let v1:Vec<u8> = vec![1,2,3];
let v2:Vec<u8> = vec![4,5,6];
let d = Ok((&v1[..], &v2[..]));
let res = d.flat_map(print);
assert_eq!(res, Ok((&v2[..], ())));
}*/
#[test]
fn eof_on_slices() {
let not_over: &[u8] = &b"Hello, world!"[..];
let is_over: &[u8] = &b""[..];
let res_not_over = eof(not_over);
assert_parse!(
res_not_over,
Err(Err::Error(error_position!(not_over, ErrorKind::Eof)))
);
let res_over = eof(is_over);
assert_parse!(res_over, Ok((is_over, is_over)));
}
#[test]
fn eof_on_strs() {
let not_over: &str = "Hello, world!";
let is_over: &str = "";
let res_not_over = eof(not_over);
assert_parse!(
res_not_over,
Err(Err::Error(error_position!(not_over, ErrorKind::Eof)))
);
let res_over = eof(is_over);
assert_parse!(res_over, Ok((is_over, is_over)));
}
/*
#[test]
fn end_of_input() {
let not_over = &b"Hello, world!"[..];
let is_over = &b""[..];
named!(eof_test, eof!());
let res_not_over = eof_test(not_over);
assert_eq!(res_not_over, Err(Err::Error(error_position!(not_over, ErrorKind::Eof))));
let res_over = eof_test(is_over);
assert_eq!(res_over, Ok((is_over, is_over)));
}
*/
#[test]
fn rest_on_slices() {
let input: &[u8] = &b"Hello, world!"[..];
let empty: &[u8] = &b""[..];
assert_parse!(rest(input), Ok((empty, input)));
}
#[test]
fn rest_on_strs() {
let input: &str = "Hello, world!";
let empty: &str = "";
assert_parse!(rest(input), Ok((empty, input)));
}
#[test]
fn rest_len_on_slices() {
let input: &[u8] = &b"Hello, world!"[..];
assert_parse!(rest_len(input), Ok((input, input.len())));
}
use crate::lib::std::convert::From;
impl From<u32> for CustomError {
fn from(_: u32) -> Self {
CustomError
}
}
impl<I> ParseError<I> for CustomError {
fn from_error_kind(_: I, _: ErrorKind) -> Self {
CustomError
}
fn append(_: I, _: ErrorKind, _: CustomError) -> Self {
CustomError
}
}
struct CustomError;
#[allow(dead_code)]
fn custom_error(input: &[u8]) -> IResult<&[u8], &[u8], CustomError> {
//fix_error!(input, CustomError, alphanumeric)
crate::character::streaming::alphanumeric1(input)
}
#[test]
fn test_flat_map() {
let input: &[u8] = &[3, 100, 101, 102, 103, 104][..];
assert_parse!(
flat_map(u8, take)(input),
Ok((&[103, 104][..], &[100, 101, 102][..]))
);
}
#[test]
fn test_map_opt() {
let input: &[u8] = &[50][..];
assert_parse!(
map_opt(u8, |u| if u < 20 { Some(u) } else { None })(input),
Err(Err::Error((&[50][..], ErrorKind::MapOpt)))
);
assert_parse!(
map_opt(u8, |u| if u > 20 { Some(u) } else { None })(input),
Ok((&[][..], 50))
);
}
#[test]
fn test_map_parser() {
let input: &[u8] = &[100, 101, 102, 103, 104][..];
assert_parse!(
map_parser(take(4usize), take(2usize))(input),
Ok((&[104][..], &[100, 101][..]))
);
}
#[test]
fn test_all_consuming() {
let input: &[u8] = &[100, 101, 102][..];
assert_parse!(
all_consuming(take(2usize))(input),
Err(Err::Error((&[102][..], ErrorKind::Eof)))
);
assert_parse!(
all_consuming(take(3usize))(input),
Ok((&[][..], &[100, 101, 102][..]))
);
}
#[test]
#[allow(unused)]
fn test_verify_ref() {
use crate::bytes::complete::take;
let mut parser1 = verify(take(3u8), |s: &[u8]| s == &b"abc"[..]);
assert_eq!(parser1(&b"abcd"[..]), Ok((&b"d"[..], &b"abc"[..])));
assert_eq!(
parser1(&b"defg"[..]),
Err(Err::Error((&b"defg"[..], ErrorKind::Verify)))
);
fn parser2(i: &[u8]) -> IResult<&[u8], u32> {
verify(crate::number::streaming::be_u32, |val: &u32| *val < 3)(i)
}
}
#[test]
#[cfg(feature = "alloc")]
fn test_verify_alloc() {
use crate::bytes::complete::take;
let mut parser1 = verify(map(take(3u8), |s: &[u8]| s.to_vec()), |s: &[u8]| {
s == &b"abc"[..]
});
assert_eq!(parser1(&b"abcd"[..]), Ok((&b"d"[..], (&b"abc").to_vec())));
assert_eq!(
parser1(&b"defg"[..]),
Err(Err::Error((&b"defg"[..], ErrorKind::Verify)))
);
}
#[test]
#[cfg(feature = "std")]
fn test_into() {
use crate::bytes::complete::take;
use crate::{
error::{Error, ParseError},
Err,
};
let mut parser = into(take::<_, _, Error<_>>(3u8));
let result: IResult<&[u8], Vec<u8>> = parser(&b"abcdefg"[..]);
assert_eq!(result, Ok((&b"defg"[..], vec![97, 98, 99])));
}
#[test]
fn opt_test() {
fn opt_abcd(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
opt(tag("abcd"))(i)
}
let a = &b"abcdef"[..];
let b = &b"bcdefg"[..];
let c = &b"ab"[..];
assert_eq!(opt_abcd(a), Ok((&b"ef"[..], Some(&b"abcd"[..]))));
assert_eq!(opt_abcd(b), Ok((&b"bcdefg"[..], None)));
assert_eq!(opt_abcd(c), Err(Err::Incomplete(Needed::new(2))));
}
#[test]
fn peek_test() {
fn peek_tag(i: &[u8]) -> IResult<&[u8], &[u8]> {
peek(tag("abcd"))(i)
}
assert_eq!(peek_tag(&b"abcdef"[..]), Ok((&b"abcdef"[..], &b"abcd"[..])));
assert_eq!(peek_tag(&b"ab"[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(
peek_tag(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn not_test() {
fn not_aaa(i: &[u8]) -> IResult<&[u8], ()> {
not(tag("aaa"))(i)
}
assert_eq!(
not_aaa(&b"aaa"[..]),
Err(Err::Error(error_position!(&b"aaa"[..], ErrorKind::Not)))
);
assert_eq!(not_aaa(&b"aa"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(not_aaa(&b"abcd"[..]), Ok((&b"abcd"[..], ())));
}
#[test]
fn verify_test() {
use crate::bytes::streaming::take;
fn test(i: &[u8]) -> IResult<&[u8], &[u8]> {
verify(take(5u8), |slice: &[u8]| slice[0] == b'a')(i)
}
assert_eq!(test(&b"bcd"[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(
test(&b"bcdefg"[..]),
Err(Err::Error(error_position!(
&b"bcdefg"[..],
ErrorKind::Verify
)))
);
assert_eq!(test(&b"abcdefg"[..]), Ok((&b"fg"[..], &b"abcde"[..])));
}
#[test]
fn fail_test() {
let a = "string";
let b = "another string";
assert_eq!(fail::<_, &str, _>(a), Err(Err::Error((a, ErrorKind::Fail))));
assert_eq!(fail::<_, &str, _>(b), Err(Err::Error((b, ErrorKind::Fail))));
}

View File

@@ -0,0 +1,831 @@
//! Error management
//!
//! Parsers are generic over their error type, requiring that it implements
//! the `error::ParseError<Input>` trait.
use crate::internal::Parser;
use crate::lib::std::fmt;
/// This trait must be implemented by the error type of a nom parser.
///
/// There are already implementations of it for `(Input, ErrorKind)`
/// and `VerboseError<Input>`.
///
/// It provides methods to create an error from some combinators,
/// and combine existing errors in combinators like `alt`.
pub trait ParseError<I>: Sized {
/// Creates an error from the input position and an [ErrorKind]
fn from_error_kind(input: I, kind: ErrorKind) -> Self;
/// Combines an existing error with a new one created from the input
/// position and an [ErrorKind]. This is useful when backtracking
/// through a parse tree, accumulating error context on the way
fn append(input: I, kind: ErrorKind, other: Self) -> Self;
/// Creates an error from an input position and an expected character
fn from_char(input: I, _: char) -> Self {
Self::from_error_kind(input, ErrorKind::Char)
}
/// Combines two existing errors. This function is used to compare errors
/// generated in various branches of `alt`.
fn or(self, other: Self) -> Self {
other
}
}
/// This trait is required by the `context` combinator to add a static string
/// to an existing error
pub trait ContextError<I>: Sized {
/// Creates a new error from an input position, a static string and an existing error.
/// This is used mainly in the [context] combinator, to add user friendly information
/// to errors when backtracking through a parse tree
fn add_context(_input: I, _ctx: &'static str, other: Self) -> Self {
other
}
}
/// This trait is required by the `map_res` combinator to integrate
/// error types from external functions, like [std::str::FromStr]
pub trait FromExternalError<I, E> {
/// Creates a new error from an input position, an [ErrorKind] indicating the
/// wrapping parser, and an external error
fn from_external_error(input: I, kind: ErrorKind, e: E) -> Self;
}
/// default error type, only contains the error' location and code
#[derive(Debug, PartialEq)]
pub struct Error<I> {
/// position of the error in the input data
pub input: I,
/// nom error code
pub code: ErrorKind,
}
impl<I> Error<I> {
/// creates a new basic error
pub fn new(input: I, code: ErrorKind) -> Error<I> {
Error { input, code }
}
}
impl<I> ParseError<I> for Error<I> {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
Error { input, code: kind }
}
fn append(_: I, _: ErrorKind, other: Self) -> Self {
other
}
}
impl<I> ContextError<I> for Error<I> {}
impl<I, E> FromExternalError<I, E> for Error<I> {
/// Create a new error from an input position and an external error
fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
Error { input, code: kind }
}
}
/// The Display implementation allows the std::error::Error implementation
impl<I: fmt::Display> fmt::Display for Error<I> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "error {:?} at: {}", self.code, self.input)
}
}
#[cfg(feature = "std")]
impl<I: fmt::Debug + fmt::Display> std::error::Error for Error<I> {}
// for backward compatibility, keep those trait implementations
// for the previously used error type
impl<I> ParseError<I> for (I, ErrorKind) {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
(input, kind)
}
fn append(_: I, _: ErrorKind, other: Self) -> Self {
other
}
}
impl<I> ContextError<I> for (I, ErrorKind) {}
impl<I, E> FromExternalError<I, E> for (I, ErrorKind) {
fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
(input, kind)
}
}
impl<I> ParseError<I> for () {
fn from_error_kind(_: I, _: ErrorKind) -> Self {}
fn append(_: I, _: ErrorKind, _: Self) -> Self {}
}
impl<I> ContextError<I> for () {}
impl<I, E> FromExternalError<I, E> for () {
fn from_external_error(_input: I, _kind: ErrorKind, _e: E) -> Self {}
}
/// Creates an error from the input position and an [ErrorKind]
pub fn make_error<I, E: ParseError<I>>(input: I, kind: ErrorKind) -> E {
E::from_error_kind(input, kind)
}
/// Combines an existing error with a new one created from the input
/// position and an [ErrorKind]. This is useful when backtracking
/// through a parse tree, accumulating error context on the way
pub fn append_error<I, E: ParseError<I>>(input: I, kind: ErrorKind, other: E) -> E {
E::append(input, kind, other)
}
/// This error type accumulates errors and their position when backtracking
/// through a parse tree. With some post processing (cf `examples/json.rs`),
/// it can be used to display user friendly error messages
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
#[derive(Clone, Debug, PartialEq)]
pub struct VerboseError<I> {
/// List of errors accumulated by `VerboseError`, containing the affected
/// part of input data, and some context
pub errors: crate::lib::std::vec::Vec<(I, VerboseErrorKind)>,
}
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
#[derive(Clone, Debug, PartialEq)]
/// Error context for `VerboseError`
pub enum VerboseErrorKind {
/// Static string added by the `context` function
Context(&'static str),
/// Indicates which character was expected by the `char` function
Char(char),
/// Error kind given by various nom parsers
Nom(ErrorKind),
}
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
impl<I> ParseError<I> for VerboseError<I> {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
VerboseError {
errors: vec![(input, VerboseErrorKind::Nom(kind))],
}
}
fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
other.errors.push((input, VerboseErrorKind::Nom(kind)));
other
}
fn from_char(input: I, c: char) -> Self {
VerboseError {
errors: vec![(input, VerboseErrorKind::Char(c))],
}
}
}
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
impl<I> ContextError<I> for VerboseError<I> {
fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self {
other.errors.push((input, VerboseErrorKind::Context(ctx)));
other
}
}
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
impl<I, E> FromExternalError<I, E> for VerboseError<I> {
/// Create a new error from an input position and an external error
fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
Self::from_error_kind(input, kind)
}
}
#[cfg(feature = "alloc")]
impl<I: fmt::Display> fmt::Display for VerboseError<I> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Parse error:")?;
for (input, error) in &self.errors {
match error {
VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?,
VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?,
VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?,
}
}
Ok(())
}
}
#[cfg(feature = "std")]
impl<I: fmt::Debug + fmt::Display> std::error::Error for VerboseError<I> {}
use crate::internal::{Err, IResult};
/// Create a new error from an input position, a static string and an existing error.
/// This is used mainly in the [context] combinator, to add user friendly information
/// to errors when backtracking through a parse tree
pub fn context<I: Clone, E: ContextError<I>, F, O>(
context: &'static str,
mut f: F,
) -> impl FnMut(I) -> IResult<I, O, E>
where
F: Parser<I, O, E>,
{
move |i: I| match f.parse(i.clone()) {
Ok(o) => Ok(o),
Err(Err::Incomplete(i)) => Err(Err::Incomplete(i)),
Err(Err::Error(e)) => Err(Err::Error(E::add_context(i, context, e))),
Err(Err::Failure(e)) => Err(Err::Failure(E::add_context(i, context, e))),
}
}
/// Transforms a `VerboseError` into a trace with input position information
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn convert_error<I: core::ops::Deref<Target = str>>(
input: I,
e: VerboseError<I>,
) -> crate::lib::std::string::String {
use crate::lib::std::fmt::Write;
use crate::traits::Offset;
let mut result = crate::lib::std::string::String::new();
for (i, (substring, kind)) in e.errors.iter().enumerate() {
let offset = input.offset(substring);
if input.is_empty() {
match kind {
VerboseErrorKind::Char(c) => {
write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c)
}
VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s),
VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e),
}
} else {
let prefix = &input.as_bytes()[..offset];
// Count the number of newlines in the first `offset` bytes of input
let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;
// Find the line that includes the subslice:
// Find the *last* newline before the substring starts
let line_begin = prefix
.iter()
.rev()
.position(|&b| b == b'\n')
.map(|pos| offset - pos)
.unwrap_or(0);
// Find the full line after that newline
let line = input[line_begin..]
.lines()
.next()
.unwrap_or(&input[line_begin..])
.trim_end();
// The (1-indexed) column number is the offset of our substring into that line
let column_number = line.offset(substring) + 1;
match kind {
VerboseErrorKind::Char(c) => {
if let Some(actual) = substring.chars().next() {
write!(
&mut result,
"{i}: at line {line_number}:\n\
{line}\n\
{caret:>column$}\n\
expected '{expected}', found {actual}\n\n",
i = i,
line_number = line_number,
line = line,
caret = '^',
column = column_number,
expected = c,
actual = actual,
)
} else {
write!(
&mut result,
"{i}: at line {line_number}:\n\
{line}\n\
{caret:>column$}\n\
expected '{expected}', got end of input\n\n",
i = i,
line_number = line_number,
line = line,
caret = '^',
column = column_number,
expected = c,
)
}
}
VerboseErrorKind::Context(s) => write!(
&mut result,
"{i}: at line {line_number}, in {context}:\n\
{line}\n\
{caret:>column$}\n\n",
i = i,
line_number = line_number,
context = s,
line = line,
caret = '^',
column = column_number,
),
VerboseErrorKind::Nom(e) => write!(
&mut result,
"{i}: at line {line_number}, in {nom_err:?}:\n\
{line}\n\
{caret:>column$}\n\n",
i = i,
line_number = line_number,
nom_err = e,
line = line,
caret = '^',
column = column_number,
),
}
}
// Because `write!` to a `String` is infallible, this `unwrap` is fine.
.unwrap();
}
result
}
/// Indicates which parser returned an error
#[rustfmt::skip]
#[derive(Debug,PartialEq,Eq,Hash,Clone,Copy)]
#[allow(deprecated,missing_docs)]
pub enum ErrorKind {
Tag,
MapRes,
MapOpt,
Alt,
IsNot,
IsA,
SeparatedList,
SeparatedNonEmptyList,
Many0,
Many1,
ManyTill,
Count,
TakeUntil,
LengthValue,
TagClosure,
Alpha,
Digit,
HexDigit,
OctDigit,
AlphaNumeric,
Space,
MultiSpace,
LengthValueFn,
Eof,
Switch,
TagBits,
OneOf,
NoneOf,
Char,
CrLf,
RegexpMatch,
RegexpMatches,
RegexpFind,
RegexpCapture,
RegexpCaptures,
TakeWhile1,
Complete,
Fix,
Escaped,
EscapedTransform,
NonEmpty,
ManyMN,
Not,
Permutation,
Verify,
TakeTill1,
TakeWhileMN,
TooLarge,
Many0Count,
Many1Count,
Float,
Satisfy,
Fail,
}
#[rustfmt::skip]
#[allow(deprecated)]
/// Converts an ErrorKind to a number
pub fn error_to_u32(e: &ErrorKind) -> u32 {
match *e {
ErrorKind::Tag => 1,
ErrorKind::MapRes => 2,
ErrorKind::MapOpt => 3,
ErrorKind::Alt => 4,
ErrorKind::IsNot => 5,
ErrorKind::IsA => 6,
ErrorKind::SeparatedList => 7,
ErrorKind::SeparatedNonEmptyList => 8,
ErrorKind::Many1 => 9,
ErrorKind::Count => 10,
ErrorKind::TakeUntil => 12,
ErrorKind::LengthValue => 15,
ErrorKind::TagClosure => 16,
ErrorKind::Alpha => 17,
ErrorKind::Digit => 18,
ErrorKind::AlphaNumeric => 19,
ErrorKind::Space => 20,
ErrorKind::MultiSpace => 21,
ErrorKind::LengthValueFn => 22,
ErrorKind::Eof => 23,
ErrorKind::Switch => 27,
ErrorKind::TagBits => 28,
ErrorKind::OneOf => 29,
ErrorKind::NoneOf => 30,
ErrorKind::Char => 40,
ErrorKind::CrLf => 41,
ErrorKind::RegexpMatch => 42,
ErrorKind::RegexpMatches => 43,
ErrorKind::RegexpFind => 44,
ErrorKind::RegexpCapture => 45,
ErrorKind::RegexpCaptures => 46,
ErrorKind::TakeWhile1 => 47,
ErrorKind::Complete => 48,
ErrorKind::Fix => 49,
ErrorKind::Escaped => 50,
ErrorKind::EscapedTransform => 51,
ErrorKind::NonEmpty => 56,
ErrorKind::ManyMN => 57,
ErrorKind::HexDigit => 59,
ErrorKind::OctDigit => 61,
ErrorKind::Many0 => 62,
ErrorKind::Not => 63,
ErrorKind::Permutation => 64,
ErrorKind::ManyTill => 65,
ErrorKind::Verify => 66,
ErrorKind::TakeTill1 => 67,
ErrorKind::TakeWhileMN => 69,
ErrorKind::TooLarge => 70,
ErrorKind::Many0Count => 71,
ErrorKind::Many1Count => 72,
ErrorKind::Float => 73,
ErrorKind::Satisfy => 74,
ErrorKind::Fail => 75,
}
}
impl ErrorKind {
#[rustfmt::skip]
#[allow(deprecated)]
/// Converts an ErrorKind to a text description
pub fn description(&self) -> &str {
match *self {
ErrorKind::Tag => "Tag",
ErrorKind::MapRes => "Map on Result",
ErrorKind::MapOpt => "Map on Option",
ErrorKind::Alt => "Alternative",
ErrorKind::IsNot => "IsNot",
ErrorKind::IsA => "IsA",
ErrorKind::SeparatedList => "Separated list",
ErrorKind::SeparatedNonEmptyList => "Separated non empty list",
ErrorKind::Many0 => "Many0",
ErrorKind::Many1 => "Many1",
ErrorKind::Count => "Count",
ErrorKind::TakeUntil => "Take until",
ErrorKind::LengthValue => "Length followed by value",
ErrorKind::TagClosure => "Tag closure",
ErrorKind::Alpha => "Alphabetic",
ErrorKind::Digit => "Digit",
ErrorKind::AlphaNumeric => "AlphaNumeric",
ErrorKind::Space => "Space",
ErrorKind::MultiSpace => "Multiple spaces",
ErrorKind::LengthValueFn => "LengthValueFn",
ErrorKind::Eof => "End of file",
ErrorKind::Switch => "Switch",
ErrorKind::TagBits => "Tag on bitstream",
ErrorKind::OneOf => "OneOf",
ErrorKind::NoneOf => "NoneOf",
ErrorKind::Char => "Char",
ErrorKind::CrLf => "CrLf",
ErrorKind::RegexpMatch => "RegexpMatch",
ErrorKind::RegexpMatches => "RegexpMatches",
ErrorKind::RegexpFind => "RegexpFind",
ErrorKind::RegexpCapture => "RegexpCapture",
ErrorKind::RegexpCaptures => "RegexpCaptures",
ErrorKind::TakeWhile1 => "TakeWhile1",
ErrorKind::Complete => "Complete",
ErrorKind::Fix => "Fix",
ErrorKind::Escaped => "Escaped",
ErrorKind::EscapedTransform => "EscapedTransform",
ErrorKind::NonEmpty => "NonEmpty",
ErrorKind::ManyMN => "Many(m, n)",
ErrorKind::HexDigit => "Hexadecimal Digit",
ErrorKind::OctDigit => "Octal digit",
ErrorKind::Not => "Negation",
ErrorKind::Permutation => "Permutation",
ErrorKind::ManyTill => "ManyTill",
ErrorKind::Verify => "predicate verification",
ErrorKind::TakeTill1 => "TakeTill1",
ErrorKind::TakeWhileMN => "TakeWhileMN",
ErrorKind::TooLarge => "Needed data size is too large",
ErrorKind::Many0Count => "Count occurrence of >=0 patterns",
ErrorKind::Many1Count => "Count occurrence of >=1 patterns",
ErrorKind::Float => "Float",
ErrorKind::Satisfy => "Satisfy",
ErrorKind::Fail => "Fail",
}
}
}
/// Creates a parse error from a `nom::ErrorKind`
/// and the position in the input
#[allow(unused_variables)]
#[macro_export(local_inner_macros)]
macro_rules! error_position(
($input:expr, $code:expr) => ({
$crate::error::make_error($input, $code)
});
);
/// Creates a parse error from a `nom::ErrorKind`,
/// the position in the input and the next error in
/// the parsing tree
#[allow(unused_variables)]
#[macro_export(local_inner_macros)]
macro_rules! error_node_position(
($input:expr, $code:expr, $next:expr) => ({
$crate::error::append_error($input, $code, $next)
});
);
/// Prints a message and the input if the parser fails.
///
/// The message prints the `Error` or `Incomplete`
/// and the parser's calling code.
///
/// It also displays the input in hexdump format
///
/// ```rust
/// use nom::{IResult, error::dbg_dmp, bytes::complete::tag};
///
/// fn f(i: &[u8]) -> IResult<&[u8], &[u8]> {
/// dbg_dmp(tag("abcd"), "tag")(i)
/// }
///
/// let a = &b"efghijkl"[..];
///
/// // Will print the following message:
/// // Error(Position(0, [101, 102, 103, 104, 105, 106, 107, 108])) at l.5 by ' tag ! ( "abcd" ) '
/// // 00000000 65 66 67 68 69 6a 6b 6c efghijkl
/// f(a);
/// ```
#[cfg(feature = "std")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "std")))]
pub fn dbg_dmp<'a, F, O, E: std::fmt::Debug>(
f: F,
context: &'static str,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], O, E>
where
F: Fn(&'a [u8]) -> IResult<&'a [u8], O, E>,
{
use crate::HexDisplay;
move |i: &'a [u8]| match f(i) {
Err(e) => {
println!("{}: Error({:?}) at:\n{}", context, e, i.to_hex(8));
Err(e)
}
a => a,
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
use crate::character::complete::char;
#[test]
fn convert_error_panic() {
let input = "";
let _result: IResult<_, _, VerboseError<&str>> = char('x')(input);
}
}
/*
#[cfg(feature = "alloc")]
use lib::std::{vec::Vec, collections::HashMap};
#[cfg(feature = "std")]
use lib::std::hash::Hash;
#[cfg(feature = "std")]
pub fn add_error_pattern<'a, I: Clone + Hash + Eq, O, E: Clone + Hash + Eq>(
h: &mut HashMap<VerboseError<I>, &'a str>,
e: VerboseError<I>,
message: &'a str,
) -> bool {
h.insert(e, message);
true
}
pub fn slice_to_offsets(input: &[u8], s: &[u8]) -> (usize, usize) {
let start = input.as_ptr();
let off1 = s.as_ptr() as usize - start as usize;
let off2 = off1 + s.len();
(off1, off2)
}
#[cfg(feature = "std")]
pub fn prepare_errors<O, E: Clone>(input: &[u8], e: VerboseError<&[u8]>) -> Option<Vec<(ErrorKind, usize, usize)>> {
let mut v: Vec<(ErrorKind, usize, usize)> = Vec::new();
for (p, kind) in e.errors.drain(..) {
let (o1, o2) = slice_to_offsets(input, p);
v.push((kind, o1, o2));
}
v.reverse();
Some(v)
}
#[cfg(feature = "std")]
pub fn print_error<O, E: Clone>(input: &[u8], res: VerboseError<&[u8]>) {
if let Some(v) = prepare_errors(input, res) {
let colors = generate_colors(&v);
println!("parser codes: {}", print_codes(&colors, &HashMap::new()));
println!("{}", print_offsets(input, 0, &v));
} else {
println!("not an error");
}
}
#[cfg(feature = "std")]
pub fn generate_colors<E>(v: &[(ErrorKind, usize, usize)]) -> HashMap<u32, u8> {
let mut h: HashMap<u32, u8> = HashMap::new();
let mut color = 0;
for &(ref c, _, _) in v.iter() {
h.insert(error_to_u32(c), color + 31);
color = color + 1 % 7;
}
h
}
pub fn code_from_offset(v: &[(ErrorKind, usize, usize)], offset: usize) -> Option<u32> {
let mut acc: Option<(u32, usize, usize)> = None;
for &(ref ek, s, e) in v.iter() {
let c = error_to_u32(ek);
if s <= offset && offset <= e {
if let Some((_, start, end)) = acc {
if start <= s && e <= end {
acc = Some((c, s, e));
}
} else {
acc = Some((c, s, e));
}
}
}
if let Some((code, _, _)) = acc {
return Some(code);
} else {
return None;
}
}
#[cfg(feature = "alloc")]
pub fn reset_color(v: &mut Vec<u8>) {
v.push(0x1B);
v.push(b'[');
v.push(0);
v.push(b'm');
}
#[cfg(feature = "alloc")]
pub fn write_color(v: &mut Vec<u8>, color: u8) {
v.push(0x1B);
v.push(b'[');
v.push(1);
v.push(b';');
let s = color.to_string();
let bytes = s.as_bytes();
v.extend(bytes.iter().cloned());
v.push(b'm');
}
#[cfg(feature = "std")]
#[cfg_attr(feature = "cargo-clippy", allow(implicit_hasher))]
pub fn print_codes(colors: &HashMap<u32, u8>, names: &HashMap<u32, &str>) -> String {
let mut v = Vec::new();
for (code, &color) in colors {
if let Some(&s) = names.get(code) {
let bytes = s.as_bytes();
write_color(&mut v, color);
v.extend(bytes.iter().cloned());
} else {
let s = code.to_string();
let bytes = s.as_bytes();
write_color(&mut v, color);
v.extend(bytes.iter().cloned());
}
reset_color(&mut v);
v.push(b' ');
}
reset_color(&mut v);
String::from_utf8_lossy(&v[..]).into_owned()
}
#[cfg(feature = "std")]
pub fn print_offsets(input: &[u8], from: usize, offsets: &[(ErrorKind, usize, usize)]) -> String {
let mut v = Vec::with_capacity(input.len() * 3);
let mut i = from;
let chunk_size = 8;
let mut current_code: Option<u32> = None;
let mut current_code2: Option<u32> = None;
let colors = generate_colors(&offsets);
for chunk in input.chunks(chunk_size) {
let s = format!("{:08x}", i);
for &ch in s.as_bytes().iter() {
v.push(ch);
}
v.push(b'\t');
let mut k = i;
let mut l = i;
for &byte in chunk {
if let Some(code) = code_from_offset(&offsets, k) {
if let Some(current) = current_code {
if current != code {
reset_color(&mut v);
current_code = Some(code);
if let Some(&color) = colors.get(&code) {
write_color(&mut v, color);
}
}
} else {
current_code = Some(code);
if let Some(&color) = colors.get(&code) {
write_color(&mut v, color);
}
}
}
v.push(CHARS[(byte >> 4) as usize]);
v.push(CHARS[(byte & 0xf) as usize]);
v.push(b' ');
k = k + 1;
}
reset_color(&mut v);
if chunk_size > chunk.len() {
for _ in 0..(chunk_size - chunk.len()) {
v.push(b' ');
v.push(b' ');
v.push(b' ');
}
}
v.push(b'\t');
for &byte in chunk {
if let Some(code) = code_from_offset(&offsets, l) {
if let Some(current) = current_code2 {
if current != code {
reset_color(&mut v);
current_code2 = Some(code);
if let Some(&color) = colors.get(&code) {
write_color(&mut v, color);
}
}
} else {
current_code2 = Some(code);
if let Some(&color) = colors.get(&code) {
write_color(&mut v, color);
}
}
}
if (byte >= 32 && byte <= 126) || byte >= 128 {
v.push(byte);
} else {
v.push(b'.');
}
l = l + 1;
}
reset_color(&mut v);
v.push(b'\n');
i = i + chunk_size;
}
String::from_utf8_lossy(&v[..]).into_owned()
}
*/

View File

@@ -0,0 +1,487 @@
//! Basic types to build the parsers
use self::Needed::*;
use crate::error::{self, ErrorKind};
use crate::lib::std::fmt;
use core::num::NonZeroUsize;
/// Holds the result of parsing functions
///
/// It depends on the input type `I`, the output type `O`, and the error type `E`
/// (by default `(I, nom::ErrorKind)`)
///
/// The `Ok` side is a pair containing the remainder of the input (the part of the data that
/// was not parsed) and the produced value. The `Err` side contains an instance of `nom::Err`.
///
/// Outside of the parsing code, you can use the [Finish::finish] method to convert
/// it to a more common result type
pub type IResult<I, O, E = error::Error<I>> = Result<(I, O), Err<E>>;
/// Helper trait to convert a parser's result to a more manageable type
pub trait Finish<I, O, E> {
/// converts the parser's result to a type that is more consumable by error
/// management libraries. It keeps the same `Ok` branch, and merges `Err::Error`
/// and `Err::Failure` into the `Err` side.
///
/// *warning*: if the result is `Err(Err::Incomplete(_))`, this method will panic.
/// - "complete" parsers: It will not be an issue, `Incomplete` is never used
/// - "streaming" parsers: `Incomplete` will be returned if there's not enough data
/// for the parser to decide, and you should gather more data before parsing again.
/// Once the parser returns either `Ok(_)`, `Err(Err::Error(_))` or `Err(Err::Failure(_))`,
/// you can get out of the parsing loop and call `finish()` on the parser's result
fn finish(self) -> Result<(I, O), E>;
}
impl<I, O, E> Finish<I, O, E> for IResult<I, O, E> {
fn finish(self) -> Result<(I, O), E> {
match self {
Ok(res) => Ok(res),
Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e),
Err(Err::Incomplete(_)) => {
panic!("Cannot call `finish()` on `Err(Err::Incomplete(_))`: this result means that the parser does not have enough data to decide, you should gather more data and try to reapply the parser instead")
}
}
}
}
/// Contains information on needed data if a parser returned `Incomplete`
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub enum Needed {
/// Needs more data, but we do not know how much
Unknown,
/// Contains the required data size in bytes
Size(NonZeroUsize),
}
impl Needed {
/// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero
pub fn new(s: usize) -> Self {
match NonZeroUsize::new(s) {
Some(sz) => Needed::Size(sz),
None => Needed::Unknown,
}
}
/// Indicates if we know how many bytes we need
pub fn is_known(&self) -> bool {
*self != Unknown
}
/// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value.
#[inline]
pub fn map<F: Fn(NonZeroUsize) -> usize>(self, f: F) -> Needed {
match self {
Unknown => Unknown,
Size(n) => Needed::new(f(n)),
}
}
}
/// The `Err` enum indicates the parser was not successful
///
/// It has three cases:
///
/// * `Incomplete` indicates that more data is needed to decide. The `Needed` enum
/// can contain how many additional bytes are necessary. If you are sure your parser
/// is working on full data, you can wrap your parser with the `complete` combinator
/// to transform that case in `Error`
/// * `Error` means some parser did not succeed, but another one might (as an example,
/// when testing different branches of an `alt` combinator)
/// * `Failure` indicates an unrecoverable error. As an example, if you recognize a prefix
/// to decide on the next parser to apply, and that parser fails, you know there's no need
/// to try other parsers, you were already in the right branch, so the data is invalid
///
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub enum Err<E> {
/// There was not enough data
Incomplete(Needed),
/// The parser had an error (recoverable)
Error(E),
/// The parser had an unrecoverable error: we got to the right
/// branch and we know other branches won't work, so backtrack
/// as fast as possible
Failure(E),
}
impl<E> Err<E> {
/// Tests if the result is Incomplete
pub fn is_incomplete(&self) -> bool {
if let Err::Incomplete(_) = self {
true
} else {
false
}
}
/// Applies the given function to the inner error
pub fn map<E2, F>(self, f: F) -> Err<E2>
where
F: FnOnce(E) -> E2,
{
match self {
Err::Incomplete(n) => Err::Incomplete(n),
Err::Failure(t) => Err::Failure(f(t)),
Err::Error(t) => Err::Error(f(t)),
}
}
/// Automatically converts between errors if the underlying type supports it
pub fn convert<F>(e: Err<F>) -> Self
where
E: From<F>,
{
e.map(crate::lib::std::convert::Into::into)
}
}
impl<T> Err<(T, ErrorKind)> {
/// Maps `Err<(T, ErrorKind)>` to `Err<(U, ErrorKind)>` with the given `F: T -> U`
pub fn map_input<U, F>(self, f: F) -> Err<(U, ErrorKind)>
where
F: FnOnce(T) -> U,
{
match self {
Err::Incomplete(n) => Err::Incomplete(n),
Err::Failure((input, k)) => Err::Failure((f(input), k)),
Err::Error((input, k)) => Err::Error((f(input), k)),
}
}
}
impl<T> Err<error::Error<T>> {
/// Maps `Err<error::Error<T>>` to `Err<error::Error<U>>` with the given `F: T -> U`
pub fn map_input<U, F>(self, f: F) -> Err<error::Error<U>>
where
F: FnOnce(T) -> U,
{
match self {
Err::Incomplete(n) => Err::Incomplete(n),
Err::Failure(error::Error { input, code }) => Err::Failure(error::Error {
input: f(input),
code,
}),
Err::Error(error::Error { input, code }) => Err::Error(error::Error {
input: f(input),
code,
}),
}
}
}
#[cfg(feature = "alloc")]
use crate::lib::std::{borrow::ToOwned, string::String, vec::Vec};
#[cfg(feature = "alloc")]
impl Err<(&[u8], ErrorKind)> {
/// Obtaining ownership
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn to_owned(self) -> Err<(Vec<u8>, ErrorKind)> {
self.map_input(ToOwned::to_owned)
}
}
#[cfg(feature = "alloc")]
impl Err<(&str, ErrorKind)> {
/// Obtaining ownership
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn to_owned(self) -> Err<(String, ErrorKind)> {
self.map_input(ToOwned::to_owned)
}
}
#[cfg(feature = "alloc")]
impl Err<error::Error<&[u8]>> {
/// Obtaining ownership
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn to_owned(self) -> Err<error::Error<Vec<u8>>> {
self.map_input(ToOwned::to_owned)
}
}
#[cfg(feature = "alloc")]
impl Err<error::Error<&str>> {
/// Obtaining ownership
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn to_owned(self) -> Err<error::Error<String>> {
self.map_input(ToOwned::to_owned)
}
}
impl<E: Eq> Eq for Err<E> {}
impl<E> fmt::Display for Err<E>
where
E: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Err::Incomplete(Needed::Size(u)) => write!(f, "Parsing requires {} bytes/chars", u),
Err::Incomplete(Needed::Unknown) => write!(f, "Parsing requires more data"),
Err::Failure(c) => write!(f, "Parsing Failure: {:?}", c),
Err::Error(c) => write!(f, "Parsing Error: {:?}", c),
}
}
}
#[cfg(feature = "std")]
use std::error::Error;
#[cfg(feature = "std")]
impl<E> Error for Err<E>
where
E: fmt::Debug,
{
fn source(&self) -> Option<&(dyn Error + 'static)> {
None // no underlying error
}
}
/// All nom parsers implement this trait
pub trait Parser<I, O, E> {
/// A parser takes in input type, and returns a `Result` containing
/// either the remaining input and the output value, or an error
fn parse(&mut self, input: I) -> IResult<I, O, E>;
/// Maps a function over the result of a parser
fn map<G, O2>(self, g: G) -> Map<Self, G, O>
where
G: Fn(O) -> O2,
Self: core::marker::Sized,
{
Map {
f: self,
g,
phantom: core::marker::PhantomData,
}
}
/// Creates a second parser from the output of the first one, then apply over the rest of the input
fn flat_map<G, H, O2>(self, g: G) -> FlatMap<Self, G, O>
where
G: FnMut(O) -> H,
H: Parser<I, O2, E>,
Self: core::marker::Sized,
{
FlatMap {
f: self,
g,
phantom: core::marker::PhantomData,
}
}
/// Applies a second parser over the output of the first one
fn and_then<G, O2>(self, g: G) -> AndThen<Self, G, O>
where
G: Parser<O, O2, E>,
Self: core::marker::Sized,
{
AndThen {
f: self,
g,
phantom: core::marker::PhantomData,
}
}
/// Applies a second parser after the first one, return their results as a tuple
fn and<G, O2>(self, g: G) -> And<Self, G>
where
G: Parser<I, O2, E>,
Self: core::marker::Sized,
{
And { f: self, g }
}
/// Applies a second parser over the input if the first one failed
fn or<G>(self, g: G) -> Or<Self, G>
where
G: Parser<I, O, E>,
Self: core::marker::Sized,
{
Or { f: self, g }
}
/// automatically converts the parser's output and error values to another type, as long as they
/// implement the `From` trait
fn into<O2: From<O>, E2: From<E>>(self) -> Into<Self, O, O2, E, E2>
where
Self: core::marker::Sized,
{
Into {
f: self,
phantom_out1: core::marker::PhantomData,
phantom_err1: core::marker::PhantomData,
phantom_out2: core::marker::PhantomData,
phantom_err2: core::marker::PhantomData,
}
}
}
impl<'a, I, O, E, F> Parser<I, O, E> for F
where
F: FnMut(I) -> IResult<I, O, E> + 'a,
{
fn parse(&mut self, i: I) -> IResult<I, O, E> {
self(i)
}
}
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
#[cfg(feature = "alloc")]
impl<'a, I, O, E> Parser<I, O, E> for Box<dyn Parser<I, O, E> + 'a> {
fn parse(&mut self, input: I) -> IResult<I, O, E> {
(**self).parse(input)
}
}
/// Implementation of `Parser::map`
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub struct Map<F, G, O1> {
f: F,
g: G,
phantom: core::marker::PhantomData<O1>,
}
impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> O2> Parser<I, O2, E> for Map<F, G, O1> {
fn parse(&mut self, i: I) -> IResult<I, O2, E> {
match self.f.parse(i) {
Err(e) => Err(e),
Ok((i, o)) => Ok((i, (self.g)(o))),
}
}
}
/// Implementation of `Parser::flat_map`
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub struct FlatMap<F, G, O1> {
f: F,
g: G,
phantom: core::marker::PhantomData<O1>,
}
impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> H, H: Parser<I, O2, E>> Parser<I, O2, E>
for FlatMap<F, G, O1>
{
fn parse(&mut self, i: I) -> IResult<I, O2, E> {
let (i, o1) = self.f.parse(i)?;
(self.g)(o1).parse(i)
}
}
/// Implementation of `Parser::and_then`
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub struct AndThen<F, G, O1> {
f: F,
g: G,
phantom: core::marker::PhantomData<O1>,
}
impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<O1, O2, E>> Parser<I, O2, E>
for AndThen<F, G, O1>
{
fn parse(&mut self, i: I) -> IResult<I, O2, E> {
let (i, o1) = self.f.parse(i)?;
let (_, o2) = self.g.parse(o1)?;
Ok((i, o2))
}
}
/// Implementation of `Parser::and`
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub struct And<F, G> {
f: F,
g: G,
}
impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<I, O2, E>> Parser<I, (O1, O2), E>
for And<F, G>
{
fn parse(&mut self, i: I) -> IResult<I, (O1, O2), E> {
let (i, o1) = self.f.parse(i)?;
let (i, o2) = self.g.parse(i)?;
Ok((i, (o1, o2)))
}
}
/// Implementation of `Parser::or`
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub struct Or<F, G> {
f: F,
g: G,
}
impl<'a, I: Clone, O, E: crate::error::ParseError<I>, F: Parser<I, O, E>, G: Parser<I, O, E>>
Parser<I, O, E> for Or<F, G>
{
fn parse(&mut self, i: I) -> IResult<I, O, E> {
match self.f.parse(i.clone()) {
Err(Err::Error(e1)) => match self.g.parse(i) {
Err(Err::Error(e2)) => Err(Err::Error(e1.or(e2))),
res => res,
},
res => res,
}
}
}
/// Implementation of `Parser::into`
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
pub struct Into<F, O1, O2: From<O1>, E1, E2: From<E1>> {
f: F,
phantom_out1: core::marker::PhantomData<O1>,
phantom_err1: core::marker::PhantomData<E1>,
phantom_out2: core::marker::PhantomData<O2>,
phantom_err2: core::marker::PhantomData<E2>,
}
impl<
'a,
I: Clone,
O1,
O2: From<O1>,
E1,
E2: crate::error::ParseError<I> + From<E1>,
F: Parser<I, O1, E1>,
> Parser<I, O2, E2> for Into<F, O1, O2, E1, E2>
{
fn parse(&mut self, i: I) -> IResult<I, O2, E2> {
match self.f.parse(i) {
Ok((i, o)) => Ok((i, o.into())),
Err(Err::Error(e)) => Err(Err::Error(e.into())),
Err(Err::Failure(e)) => Err(Err::Failure(e.into())),
Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::ErrorKind;
#[doc(hidden)]
#[macro_export]
macro_rules! assert_size (
($t:ty, $sz:expr) => (
assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz);
);
);
#[test]
#[cfg(target_pointer_width = "64")]
fn size_test() {
assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, 40);
assert_size!(IResult<&str, &str, u32>, 40);
assert_size!(Needed, 8);
assert_size!(Err<u32>, 16);
assert_size!(ErrorKind, 1);
}
#[test]
fn err_map_test() {
let e = Err::Error(1);
assert_eq!(e.map(|v| v + 1), Err::Error(2));
}
}

View File

@@ -0,0 +1,463 @@
//! # nom, eating data byte by byte
//!
//! nom is a parser combinator library with a focus on safe parsing,
//! streaming patterns, and as much as possible zero copy.
//!
//! ## Example
//!
//! ```rust
//! use nom::{
//! IResult,
//! bytes::complete::{tag, take_while_m_n},
//! combinator::map_res,
//! sequence::tuple};
//!
//! #[derive(Debug,PartialEq)]
//! pub struct Color {
//! pub red: u8,
//! pub green: u8,
//! pub blue: u8,
//! }
//!
//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
//! u8::from_str_radix(input, 16)
//! }
//!
//! fn is_hex_digit(c: char) -> bool {
//! c.is_digit(16)
//! }
//!
//! fn hex_primary(input: &str) -> IResult<&str, u8> {
//! map_res(
//! take_while_m_n(2, 2, is_hex_digit),
//! from_hex
//! )(input)
//! }
//!
//! fn hex_color(input: &str) -> IResult<&str, Color> {
//! let (input, _) = tag("#")(input)?;
//! let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?;
//!
//! Ok((input, Color { red, green, blue }))
//! }
//!
//! fn main() {
//! assert_eq!(hex_color("#2F14DF"), Ok(("", Color {
//! red: 47,
//! green: 20,
//! blue: 223,
//! })));
//! }
//! ```
//!
//! The code is available on [Github](https://github.com/Geal/nom)
//!
//! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details
//! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md),
//! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md).
//! You can also check out the [recipes] module that contains examples of common patterns.
//!
//! **Looking for a specific combinator? Read the
//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)**
//!
//! If you are upgrading to nom 5.0, please read the
//! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md).
//!
//! ## Parser combinators
//!
//! Parser combinators are an approach to parsers that is very different from
//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
//! in a separate syntax and generating the corresponding code, you use very small
//! functions with very specific purposes, like "take 5 bytes", or "recognize the
//! word 'HTTP'", and assemble them in meaningful patterns like "recognize
//! 'HTTP', then a space, then a version".
//! The resulting code is small, and looks like the grammar you would have
//! written with other parser approaches.
//!
//! This gives us a few advantages:
//!
//! - The parsers are small and easy to write
//! - The parsers components are easy to reuse (if they're general enough, please add them to nom!)
//! - The parsers components are easy to test separately (unit tests and property-based tests)
//! - The parser combination code looks close to the grammar you would have written
//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest
//!
//! Here is an example of one such parser, to recognize text between parentheses:
//!
//! ```rust
//! use nom::{
//! IResult,
//! sequence::delimited,
//! // see the "streaming/complete" paragraph lower for an explanation of these submodules
//! character::complete::char,
//! bytes::complete::is_not
//! };
//!
//! fn parens(input: &str) -> IResult<&str, &str> {
//! delimited(char('('), is_not(")"), char(')'))(input)
//! }
//! ```
//!
//! It defines a function named `parens` which will recognize a sequence of the
//! character `(`, the longest byte array not containing `)`, then the character
//! `)`, and will return the byte array in the middle.
//!
//! Here is another parser, written without using nom's combinators this time:
//!
//! ```rust
//! use nom::{IResult, Err, Needed};
//!
//! # fn main() {
//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{
//! if i.len() < 4 {
//! Err(Err::Incomplete(Needed::new(4)))
//! } else {
//! Ok((&i[4..], &i[0..4]))
//! }
//! }
//! # }
//! ```
//!
//! This function takes a byte array as input, and tries to consume 4 bytes.
//! Writing all the parsers manually, like this, is dangerous, despite Rust's
//! safety features. There are still a lot of mistakes one can make. That's why
//! nom provides a list of functions to help in developing parsers.
//!
//! With functions, you would write it like this:
//!
//! ```rust
//! use nom::{IResult, bytes::streaming::take};
//! fn take4(input: &str) -> IResult<&str, &str> {
//! take(4u8)(input)
//! }
//! ```
//!
//! A parser in nom is a function which, for an input type `I`, an output type `O`
//! and an optional error type `E`, will have the following signature:
//!
//! ```rust,compile_fail
//! fn parser(input: I) -> IResult<I, O, E>;
//! ```
//!
//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default):
//!
//! ```rust,compile_fail
//! fn parser(input: I) -> IResult<I, O>;
//! ```
//!
//! `IResult` is an alias for the `Result` type:
//!
//! ```rust
//! use nom::{Needed, error::Error};
//!
//! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>;
//!
//! enum Err<E> {
//! Incomplete(Needed),
//! Error(E),
//! Failure(E),
//! }
//! ```
//!
//! It can have the following values:
//!
//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value;
//! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error
//! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed
//! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser
//!
//! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers.
//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc).
//!
//! ## Making new parsers with function combinators
//!
//! nom is based on functions that generate parsers, with a signature like
//! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`.
//! The arguments of a combinator can be direct values (like `take` which uses
//! a number of bytes or character as argument) or even other parsers (like
//! `delimited` which takes as argument 3 parsers, and returns the result of
//! the second one if all are successful).
//!
//! Here are some examples:
//!
//! ```rust
//! use nom::IResult;
//! use nom::bytes::complete::{tag, take};
//! fn abcd_parser(i: &str) -> IResult<&str, &str> {
//! tag("abcd")(i) // will consume bytes if the input begins with "abcd"
//! }
//!
//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> {
//! take(10u8)(i) // will consume and return 10 bytes of input
//! }
//! ```
//!
//! ## Combining parsers
//!
//! There are higher level patterns, like the **`alt`** combinator, which
//! provides a choice between multiple parsers. If one branch fails, it tries
//! the next, and returns the result of the first parser that succeeds:
//!
//! ```rust
//! use nom::IResult;
//! use nom::branch::alt;
//! use nom::bytes::complete::tag;
//!
//! let mut alt_tags = alt((tag("abcd"), tag("efgh")));
//!
//! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..])));
//! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..])));
//! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(nom::Err::Error((&b"ijklxxx"[..], nom::error::ErrorKind::Tag))));
//! ```
//!
//! The **`opt`** combinator makes a parser optional. If the child parser returns
//! an error, **`opt`** will still succeed and return None:
//!
//! ```rust
//! use nom::{IResult, combinator::opt, bytes::complete::tag};
//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
//! opt(tag("abcd"))(i)
//! }
//!
//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..]))));
//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None)));
//! ```
//!
//! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results:
//!
//! ```rust
//! # #[cfg(feature = "alloc")]
//! # fn main() {
//! use nom::{IResult, multi::many0, bytes::complete::tag};
//! use std::str;
//!
//! fn multi(i: &str) -> IResult<&str, Vec<&str>> {
//! many0(tag("abcd"))(i)
//! }
//!
//! let a = "abcdef";
//! let b = "abcdabcdef";
//! let c = "azerty";
//! assert_eq!(multi(a), Ok(("ef", vec!["abcd"])));
//! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"])));
//! assert_eq!(multi(c), Ok(("azerty", Vec::new())));
//! # }
//! # #[cfg(not(feature = "alloc"))]
//! # fn main() {}
//! ```
//!
//! Here are some basic combinators available:
//!
//! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`)
//! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`)
//! - **`many1`**: Will apply the parser 1 or more times
//!
//! There are more complex (and more useful) parsers like `tuple`, which is
//! used to apply a series of parsers then assemble their results.
//!
//! Example with `tuple`:
//!
//! ```rust
//! # fn main() {
//! use nom::{error::ErrorKind, Needed,
//! number::streaming::be_u16,
//! bytes::streaming::{tag, take},
//! sequence::tuple};
//!
//! let mut tpl = tuple((be_u16, take(3u8), tag("fg")));
//!
//! assert_eq!(
//! tpl(&b"abcdefgh"[..]),
//! Ok((
//! &b"h"[..],
//! (0x6162u16, &b"cde"[..], &b"fg"[..])
//! ))
//! );
//! assert_eq!(tpl(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::new(2))));
//! let input = &b"abcdejk"[..];
//! assert_eq!(tpl(input), Err(nom::Err::Error((&input[5..], ErrorKind::Tag))));
//! # }
//! ```
//!
//! But you can also use a sequence of combinators written in imperative style,
//! thanks to the `?` operator:
//!
//! ```rust
//! # fn main() {
//! use nom::{IResult, bytes::complete::tag};
//!
//! #[derive(Debug, PartialEq)]
//! struct A {
//! a: u8,
//! b: u8
//! }
//!
//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) }
//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) }
//!
//! fn f(i: &[u8]) -> IResult<&[u8], A> {
//! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure
//! let (i, _) = tag("abcd")(i)?;
//! let (i, a) = ret_int1(i)?;
//! let (i, _) = tag("efgh")(i)?;
//! let (i, b) = ret_int2(i)?;
//!
//! Ok((i, A { a, b }))
//! }
//!
//! let r = f(b"abcdefghX");
//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2})));
//! # }
//! ```
//!
//! ## Streaming / Complete
//!
//! Some of nom's modules have `streaming` or `complete` submodules. They hold
//! different variants of the same combinators.
//!
//! A streaming parser assumes that we might not have all of the input data.
//! This can happen with some network protocol or large file parsers, where the
//! input buffer can be full and need to be resized or refilled.
//!
//! A complete parser assumes that we already have all of the input data.
//! This will be the common case with small files that can be read entirely to
//! memory.
//!
//! Here is how it works in practice:
//!
//! ```rust
//! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character};
//!
//! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> {
//! bytes::streaming::take(4u8)(i)
//! }
//!
//! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> {
//! bytes::complete::take(4u8)(i)
//! }
//!
//! // both parsers will take 4 bytes as expected
//! assert_eq!(take_streaming(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
//! assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
//!
//! // if the input is smaller than 4 bytes, the streaming parser
//! // will return `Incomplete` to indicate that we need more data
//! assert_eq!(take_streaming(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1))));
//!
//! // but the complete parser will return an error
//! assert_eq!(take_complete(&b"abc"[..]), Err(Err::Error(Error::new(&b"abc"[..], ErrorKind::Eof))));
//!
//! // the alpha0 function recognizes 0 or more alphabetic characters
//! fn alpha0_streaming(i: &str) -> IResult<&str, &str> {
//! character::streaming::alpha0(i)
//! }
//!
//! fn alpha0_complete(i: &str) -> IResult<&str, &str> {
//! character::complete::alpha0(i)
//! }
//!
//! // if there's a clear limit to the recognized characters, both parsers work the same way
//! assert_eq!(alpha0_streaming("abcd;"), Ok((";", "abcd")));
//! assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd")));
//!
//! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot
//! // know if more input data should be recognized. The whole input could be "abcd;", or
//! // "abcde;"
//! assert_eq!(alpha0_streaming("abcd"), Err(Err::Incomplete(Needed::new(1))));
//!
//! // while the complete version knows that all of the data is there
//! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd")));
//! ```
//! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/main/doc),
//! check out the [recipes]!
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))]
#![cfg_attr(feature = "docsrs", feature(doc_cfg))]
#![cfg_attr(feature = "docsrs", feature(extended_key_value_attributes))]
#![deny(missing_docs)]
#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
#[cfg(feature = "alloc")]
#[macro_use]
extern crate alloc;
#[cfg(doctest)]
extern crate doc_comment;
#[cfg(doctest)]
doc_comment::doctest!("../README.md");
/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does
/// it, albeit there it is not public.
#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
pub mod lib {
/// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally,
/// as well as `core` or `std`
#[cfg(not(feature = "std"))]
#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
/// internal std exports for no_std compatibility
pub mod std {
#[doc(hidden)]
#[cfg(not(feature = "alloc"))]
pub use core::borrow;
#[cfg(feature = "alloc")]
#[doc(hidden)]
pub use alloc::{borrow, boxed, string, vec};
#[doc(hidden)]
pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str};
/// internal reproduction of std prelude
#[doc(hidden)]
pub mod prelude {
pub use core::prelude as v1;
}
}
#[cfg(feature = "std")]
#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
/// internal std exports for no_std compatibility
pub mod std {
#[doc(hidden)]
pub use std::{
alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result,
slice, str, string, vec,
};
/// internal reproduction of std prelude
#[doc(hidden)]
pub mod prelude {
pub use std::prelude as v1;
}
}
}
pub use self::bits::*;
pub use self::internal::*;
pub use self::traits::*;
pub use self::str::*;
#[macro_use]
pub mod error;
pub mod combinator;
mod internal;
mod traits;
#[macro_use]
pub mod branch;
pub mod multi;
pub mod sequence;
pub mod bits;
pub mod bytes;
pub mod character;
mod str;
pub mod number;
#[cfg(feature = "docsrs")]
#[cfg_attr(feature = "docsrs", cfg_attr(feature = "docsrs", doc = include_str!("../doc/nom_recipes.md")))]
pub mod recipes {}

View File

@@ -0,0 +1,981 @@
//! Combinators applying their child parser multiple times
#[cfg(test)]
mod tests;
use crate::error::ErrorKind;
use crate::error::ParseError;
use crate::internal::{Err, IResult, Needed, Parser};
#[cfg(feature = "alloc")]
use crate::lib::std::vec::Vec;
use crate::traits::{InputLength, InputTake, ToUsize};
use core::num::NonZeroUsize;
/// Repeats the embedded parser until it fails
/// and returns the results in a `Vec`.
///
/// # Arguments
/// * `f` The parser to apply.
///
/// *Note*: if the parser passed to `many0` accepts empty inputs
/// (like `alpha0` or `digit0`), `many0` will return an error,
/// to prevent going into an infinite loop
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::many0;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// many0(tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn many0<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
E: ParseError<I>,
{
move |mut i: I| {
let mut acc = crate::lib::std::vec::Vec::with_capacity(4);
loop {
let len = i.input_len();
match f.parse(i.clone()) {
Err(Err::Error(_)) => return Ok((i, acc)),
Err(e) => return Err(e),
Ok((i1, o)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(E::from_error_kind(i, ErrorKind::Many0)));
}
i = i1;
acc.push(o);
}
}
}
}
}
/// Runs the embedded parser until it fails and
/// returns the results in a `Vec`. Fails if
/// the embedded parser does not produce at least
/// one result.
///
/// # Arguments
/// * `f` The parser to apply.
///
/// *Note*: If the parser passed to `many1` accepts empty inputs
/// (like `alpha0` or `digit0`), `many1` will return an error,
/// to prevent going into an infinite loop.
///
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::many1;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// many1(tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn many1<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
E: ParseError<I>,
{
move |mut i: I| match f.parse(i.clone()) {
Err(Err::Error(err)) => Err(Err::Error(E::append(i, ErrorKind::Many1, err))),
Err(e) => Err(e),
Ok((i1, o)) => {
let mut acc = crate::lib::std::vec::Vec::with_capacity(4);
acc.push(o);
i = i1;
loop {
let len = i.input_len();
match f.parse(i.clone()) {
Err(Err::Error(_)) => return Ok((i, acc)),
Err(e) => return Err(e),
Ok((i1, o)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1)));
}
i = i1;
acc.push(o);
}
}
}
}
}
}
/// Applies the parser `f` until the parser `g` produces
/// a result. Returns a pair consisting of the results of
/// `f` in a `Vec` and the result of `g`.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::many_till;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> {
/// many_till(tag("abc"), tag("end"))(s)
/// };
///
/// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end"))));
/// assert_eq!(parser("abc123end"), Err(Err::Error(Error::new("123end", ErrorKind::Tag))));
/// assert_eq!(parser("123123end"), Err(Err::Error(Error::new("123123end", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// assert_eq!(parser("abcendefg"), Ok(("efg", (vec!["abc"], "end"))));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn many_till<I, O, P, E, F, G>(
mut f: F,
mut g: G,
) -> impl FnMut(I) -> IResult<I, (Vec<O>, P), E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: Parser<I, P, E>,
E: ParseError<I>,
{
move |mut i: I| {
let mut res = crate::lib::std::vec::Vec::new();
loop {
let len = i.input_len();
match g.parse(i.clone()) {
Ok((i1, o)) => return Ok((i1, (res, o))),
Err(Err::Error(_)) => {
match f.parse(i.clone()) {
Err(Err::Error(err)) => return Err(Err::Error(E::append(i, ErrorKind::ManyTill, err))),
Err(e) => return Err(e),
Ok((i1, o)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(E::from_error_kind(i1, ErrorKind::ManyTill)));
}
res.push(o);
i = i1;
}
}
}
Err(e) => return Err(e),
}
}
}
}
/// Alternates between two parsers to produce
/// a list of elements.
/// # Arguments
/// * `sep` Parses the separator between list elements.
/// * `f` Parses the elements of the list.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::separated_list0;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// separated_list0(tag("|"), tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"])));
/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"])));
/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![])));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn separated_list0<I, O, O2, E, F, G>(
mut sep: G,
mut f: F,
) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: Parser<I, O2, E>,
E: ParseError<I>,
{
move |mut i: I| {
let mut res = Vec::new();
match f.parse(i.clone()) {
Err(Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i1, o)) => {
res.push(o);
i = i1;
}
}
loop {
let len = i.input_len();
match sep.parse(i.clone()) {
Err(Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i1, _)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(E::from_error_kind(i1, ErrorKind::SeparatedList)));
}
match f.parse(i1.clone()) {
Err(Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i2, o)) => {
res.push(o);
i = i2;
}
}
}
}
}
}
}
/// Alternates between two parsers to produce
/// a list of elements. Fails if the element
/// parser does not produce at least one element.
/// # Arguments
/// * `sep` Parses the separator between list elements.
/// * `f` Parses the elements of the list.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::separated_list1;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// separated_list1(tag("|"), tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"])));
/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"])));
/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"])));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// assert_eq!(parser("def|abc"), Err(Err::Error(Error::new("def|abc", ErrorKind::Tag))));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn separated_list1<I, O, O2, E, F, G>(
mut sep: G,
mut f: F,
) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: Parser<I, O2, E>,
E: ParseError<I>,
{
move |mut i: I| {
let mut res = Vec::new();
// Parse the first element
match f.parse(i.clone()) {
Err(e) => return Err(e),
Ok((i1, o)) => {
res.push(o);
i = i1;
}
}
loop {
let len = i.input_len();
match sep.parse(i.clone()) {
Err(Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i1, _)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(E::from_error_kind(i1, ErrorKind::SeparatedList)));
}
match f.parse(i1.clone()) {
Err(Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i2, o)) => {
res.push(o);
i = i2;
}
}
}
}
}
}
}
/// Repeats the embedded parser `n` times or until it fails
/// and returns the results in a `Vec`. Fails if the
/// embedded parser does not succeed at least `m` times.
/// # Arguments
/// * `m` The minimum number of iterations.
/// * `n` The maximum number of iterations.
/// * `f` The parser to apply.
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::many_m_n;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// many_m_n(0, 2, tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"])));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn many_m_n<I, O, E, F>(
min: usize,
max: usize,
mut parse: F,
) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
E: ParseError<I>,
{
move |mut input: I| {
if min > max {
return Err(Err::Failure(E::from_error_kind(input, ErrorKind::ManyMN)));
}
let mut res = crate::lib::std::vec::Vec::with_capacity(min);
for count in 0..max {
let len = input.input_len();
match parse.parse(input.clone()) {
Ok((tail, value)) => {
// infinite loop check: the parser must always consume
if tail.input_len() == len {
return Err(Err::Error(E::from_error_kind(input, ErrorKind::ManyMN)));
}
res.push(value);
input = tail;
}
Err(Err::Error(e)) => {
if count < min {
return Err(Err::Error(E::append(input, ErrorKind::ManyMN, e)));
} else {
return Ok((input, res));
}
}
Err(e) => {
return Err(e);
}
}
}
Ok((input, res))
}
}
/// Repeats the embedded parser until it fails
/// and returns the number of successful iterations.
/// # Arguments
/// * `f` The parser to apply.
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::many0_count;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, usize> {
/// many0_count(tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", 2)));
/// assert_eq!(parser("abc123"), Ok(("123", 1)));
/// assert_eq!(parser("123123"), Ok(("123123", 0)));
/// assert_eq!(parser(""), Ok(("", 0)));
/// ```
pub fn many0_count<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, usize, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
E: ParseError<I>,
{
move |i: I| {
let mut input = i;
let mut count = 0;
loop {
let input_ = input.clone();
let len = input.input_len();
match f.parse(input_) {
Ok((i, _)) => {
// infinite loop check: the parser must always consume
if i.input_len() == len {
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0Count)));
}
input = i;
count += 1;
}
Err(Err::Error(_)) => return Ok((input, count)),
Err(e) => return Err(e),
}
}
}
}
/// Repeats the embedded parser until it fails
/// and returns the number of successful iterations.
/// Fails if the embedded parser does not succeed
/// at least once.
/// # Arguments
/// * `f` The parser to apply.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::many1_count;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, usize> {
/// many1_count(tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", 2)));
/// assert_eq!(parser("abc123"), Ok(("123", 1)));
/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Many1Count))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Many1Count))));
/// ```
pub fn many1_count<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, usize, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
E: ParseError<I>,
{
move |i: I| {
let i_ = i.clone();
match f.parse(i_) {
Err(Err::Error(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1Count))),
Err(i) => Err(i),
Ok((i1, _)) => {
let mut count = 1;
let mut input = i1;
loop {
let len = input.input_len();
let input_ = input.clone();
match f.parse(input_) {
Err(Err::Error(_)) => return Ok((input, count)),
Err(e) => return Err(e),
Ok((i, _)) => {
// infinite loop check: the parser must always consume
if i.input_len() == len {
return Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1Count)));
}
count += 1;
input = i;
}
}
}
}
}
}
}
/// Runs the embedded parser a specified number
/// of times. Returns the results in a `Vec`.
/// # Arguments
/// * `f` The parser to apply.
/// * `count` How often to apply the parser.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::count;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// count(tag("abc"), 2)(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Err(Err::Error(Error::new("123", ErrorKind::Tag))));
/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"])));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn count<I, O, E, F>(mut f: F, count: usize) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + PartialEq,
F: Parser<I, O, E>,
E: ParseError<I>,
{
move |i: I| {
let mut input = i.clone();
let mut res = crate::lib::std::vec::Vec::with_capacity(count);
for _ in 0..count {
let input_ = input.clone();
match f.parse(input_) {
Ok((i, o)) => {
res.push(o);
input = i;
}
Err(Err::Error(e)) => {
return Err(Err::Error(E::append(i, ErrorKind::Count, e)));
}
Err(e) => {
return Err(e);
}
}
}
Ok((input, res))
}
}
/// Runs the embedded parser repeatedly, filling the given slice with results. This parser fails if
/// the input runs out before the given slice is full.
/// # Arguments
/// * `f` The parser to apply.
/// * `buf` The slice to fill
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::fill;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, [&str; 2]> {
/// let mut buf = ["", ""];
/// let (rest, ()) = fill(tag("abc"), &mut buf)(s)?;
/// Ok((rest, buf))
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", ["abc", "abc"])));
/// assert_eq!(parser("abc123"), Err(Err::Error(Error::new("123", ErrorKind::Tag))));
/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", ["abc", "abc"])));
/// ```
pub fn fill<'a, I, O, E, F>(f: F, buf: &'a mut [O]) -> impl FnMut(I) -> IResult<I, (), E> + 'a
where
I: Clone + PartialEq,
F: Fn(I) -> IResult<I, O, E> + 'a,
E: ParseError<I>,
{
move |i: I| {
let mut input = i.clone();
for elem in buf.iter_mut() {
let input_ = input.clone();
match f(input_) {
Ok((i, o)) => {
*elem = o;
input = i;
}
Err(Err::Error(e)) => {
return Err(Err::Error(E::append(i, ErrorKind::Count, e)));
}
Err(e) => {
return Err(e);
}
}
}
Ok((input, ()))
}
}
/// Applies a parser until it fails and accumulates
/// the results using a given function and initial value.
/// # Arguments
/// * `f` The parser to apply.
/// * `init` A function returning the initial value.
/// * `g` The function that combines a result of `f` with
/// the current accumulator.
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::fold_many0;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// fold_many0(
/// tag("abc"),
/// Vec::new,
/// |mut acc: Vec<_>, item| {
/// acc.push(item);
/// acc
/// }
/// )(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// ```
pub fn fold_many0<I, O, E, F, G, H, R>(
mut f: F,
mut init: H,
mut g: G,
) -> impl FnMut(I) -> IResult<I, R, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: FnMut(R, O) -> R,
H: FnMut() -> R,
E: ParseError<I>,
{
move |i: I| {
let mut res = init();
let mut input = i;
loop {
let i_ = input.clone();
let len = input.input_len();
match f.parse(i_) {
Ok((i, o)) => {
// infinite loop check: the parser must always consume
if i.input_len() == len {
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
}
res = g(res, o);
input = i;
}
Err(Err::Error(_)) => {
return Ok((input, res));
}
Err(e) => {
return Err(e);
}
}
}
}
}
/// Applies a parser until it fails and accumulates
/// the results using a given function and initial value.
/// Fails if the embedded parser does not succeed at least
/// once.
/// # Arguments
/// * `f` The parser to apply.
/// * `init` A function returning the initial value.
/// * `g` The function that combines a result of `f` with
/// the current accumulator.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::multi::fold_many1;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// fold_many1(
/// tag("abc"),
/// Vec::new,
/// |mut acc: Vec<_>, item| {
/// acc.push(item);
/// acc
/// }
/// )(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Many1))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Many1))));
/// ```
pub fn fold_many1<I, O, E, F, G, H, R>(
mut f: F,
mut init: H,
mut g: G,
) -> impl FnMut(I) -> IResult<I, R, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: FnMut(R, O) -> R,
H: FnMut() -> R,
E: ParseError<I>,
{
move |i: I| {
let _i = i.clone();
let init = init();
match f.parse(_i) {
Err(Err::Error(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1))),
Err(e) => Err(e),
Ok((i1, o1)) => {
let mut acc = g(init, o1);
let mut input = i1;
loop {
let _input = input.clone();
let len = input.input_len();
match f.parse(_input) {
Err(Err::Error(_)) => {
break;
}
Err(e) => return Err(e),
Ok((i, o)) => {
// infinite loop check: the parser must always consume
if i.input_len() == len {
return Err(Err::Failure(E::from_error_kind(i, ErrorKind::Many1)));
}
acc = g(acc, o);
input = i;
}
}
}
Ok((input, acc))
}
}
}
}
/// Applies a parser `n` times or until it fails and accumulates
/// the results using a given function and initial value.
/// Fails if the embedded parser does not succeed at least `m`
/// times.
/// # Arguments
/// * `m` The minimum number of iterations.
/// * `n` The maximum number of iterations.
/// * `f` The parser to apply.
/// * `init` A function returning the initial value.
/// * `g` The function that combines a result of `f` with
/// the current accumulator.
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::fold_many_m_n;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// fold_many_m_n(
/// 0,
/// 2,
/// tag("abc"),
/// Vec::new,
/// |mut acc: Vec<_>, item| {
/// acc.push(item);
/// acc
/// }
/// )(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"])));
/// ```
pub fn fold_many_m_n<I, O, E, F, G, H, R>(
min: usize,
max: usize,
mut parse: F,
mut init: H,
mut fold: G,
) -> impl FnMut(I) -> IResult<I, R, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: FnMut(R, O) -> R,
H: FnMut() -> R,
E: ParseError<I>,
{
move |mut input: I| {
if min > max {
return Err(Err::Failure(E::from_error_kind(input, ErrorKind::ManyMN)));
}
let mut acc = init();
for count in 0..max {
let len = input.input_len();
match parse.parse(input.clone()) {
Ok((tail, value)) => {
// infinite loop check: the parser must always consume
if tail.input_len() == len {
return Err(Err::Error(E::from_error_kind(tail, ErrorKind::ManyMN)));
}
acc = fold(acc, value);
input = tail;
}
//FInputXMError: handle failure properly
Err(Err::Error(err)) => {
if count < min {
return Err(Err::Error(E::append(input, ErrorKind::ManyMN, err)));
} else {
break;
}
}
Err(e) => return Err(e),
}
}
Ok((input, acc))
}
}
/// Gets a number from the parser and returns a
/// subslice of the input of that size.
/// If the parser returns `Incomplete`,
/// `length_data` will return an error.
/// # Arguments
/// * `f` The parser to apply.
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::number::complete::be_u16;
/// use nom::multi::length_data;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// length_data(be_u16)(s)
/// }
///
/// assert_eq!(parser(b"\x00\x03abcefg"), Ok((&b"efg"[..], &b"abc"[..])));
/// assert_eq!(parser(b"\x00\x03a"), Err(Err::Incomplete(Needed::new(2))));
/// ```
pub fn length_data<I, N, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E>
where
I: InputLength + InputTake,
N: ToUsize,
F: Parser<I, N, E>,
E: ParseError<I>,
{
move |i: I| {
let (i, length) = f.parse(i)?;
let length: usize = length.to_usize();
if let Some(needed) = length
.checked_sub(i.input_len())
.and_then(NonZeroUsize::new)
{
Err(Err::Incomplete(Needed::Size(needed)))
} else {
Ok(i.take_split(length))
}
}
}
/// Gets a number from the first parser,
/// takes a subslice of the input of that size,
/// then applies the second parser on that subslice.
/// If the second parser returns `Incomplete`,
/// `length_value` will return an error.
/// # Arguments
/// * `f` The parser to apply.
/// * `g` The parser to apply on the subslice.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::number::complete::be_u16;
/// use nom::multi::length_value;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &[u8]) -> IResult<&[u8], &[u8]> {
/// length_value(be_u16, tag("abc"))(s)
/// }
///
/// assert_eq!(parser(b"\x00\x03abcefg"), Ok((&b"efg"[..], &b"abc"[..])));
/// assert_eq!(parser(b"\x00\x03123123"), Err(Err::Error(Error::new(&b"123"[..], ErrorKind::Tag))));
/// assert_eq!(parser(b"\x00\x03a"), Err(Err::Incomplete(Needed::new(2))));
/// ```
pub fn length_value<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl FnMut(I) -> IResult<I, O, E>
where
I: Clone + InputLength + InputTake,
N: ToUsize,
F: Parser<I, N, E>,
G: Parser<I, O, E>,
E: ParseError<I>,
{
move |i: I| {
let (i, length) = f.parse(i)?;
let length: usize = length.to_usize();
if let Some(needed) = length
.checked_sub(i.input_len())
.and_then(NonZeroUsize::new)
{
Err(Err::Incomplete(Needed::Size(needed)))
} else {
let (rest, i) = i.take_split(length);
match g.parse(i.clone()) {
Err(Err::Incomplete(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Complete))),
Err(e) => Err(e),
Ok((_, o)) => Ok((rest, o)),
}
}
}
}
/// Gets a number from the first parser,
/// then applies the second parser that many times.
/// # Arguments
/// * `f` The parser to apply to obtain the count.
/// * `g` The parser to apply repeatedly.
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult};
/// use nom::number::complete::u8;
/// use nom::multi::length_count;
/// use nom::bytes::complete::tag;
/// use nom::combinator::map;
///
/// fn parser(s: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
/// length_count(map(u8, |i| {
/// println!("got number: {}", i);
/// i
/// }), tag("abc"))(s)
/// }
///
/// assert_eq!(parser(&b"\x02abcabcabc"[..]), Ok(((&b"abc"[..], vec![&b"abc"[..], &b"abc"[..]]))));
/// assert_eq!(parser(b"\x03123123123"), Err(Err::Error(Error::new(&b"123123123"[..], ErrorKind::Tag))));
/// ```
#[cfg(feature = "alloc")]
pub fn length_count<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone,
N: ToUsize,
F: Parser<I, N, E>,
G: Parser<I, O, E>,
E: ParseError<I>,
{
move |i: I| {
let (i, count) = f.parse(i)?;
let mut input = i.clone();
let mut res = Vec::new();
for _ in 0..count.to_usize() {
let input_ = input.clone();
match g.parse(input_) {
Ok((i, o)) => {
res.push(o);
input = i;
}
Err(Err::Error(e)) => {
return Err(Err::Error(E::append(i, ErrorKind::Count, e)));
}
Err(e) => {
return Err(e);
}
}
}
Ok((input, res))
}
}

View File

@@ -0,0 +1,534 @@
use super::{length_data, length_value, many0_count, many1_count};
use crate::{
bytes::streaming::tag,
character::streaming::digit1 as digit,
error::{ErrorKind, ParseError},
internal::{Err, IResult, Needed},
lib::std::str::{self, FromStr},
number::streaming::{be_u16, be_u8},
sequence::{pair, tuple},
};
#[cfg(feature = "alloc")]
use crate::{
lib::std::vec::Vec,
multi::{
count, fold_many0, fold_many1, fold_many_m_n, length_count, many0, many1, many_m_n, many_till,
separated_list0, separated_list1,
},
};
#[test]
#[cfg(feature = "alloc")]
fn separated_list0_test() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list0(tag(","), tag("abcd"))(i)
}
fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list0(tag(","), tag(""))(i)
}
fn empty_sep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list0(tag(""), tag("abc"))(i)
}
fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list0(tag(".."), tag("abcd"))(i)
}
let a = &b"abcdef"[..];
let b = &b"abcd,abcdef"[..];
let c = &b"azerty"[..];
let d = &b",,abc"[..];
let e = &b"abcd,abcd,ef"[..];
let f = &b"abc"[..];
let g = &b"abcd."[..];
let h = &b"abcd,abc"[..];
let i = &b"abcabc"[..];
let res1 = vec![&b"abcd"[..]];
assert_eq!(multi(a), Ok((&b"ef"[..], res1)));
let res2 = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(b), Ok((&b"ef"[..], res2)));
assert_eq!(multi(c), Ok((&b"azerty"[..], Vec::new())));
let res3 = vec![&b""[..], &b""[..], &b""[..]];
assert_eq!(multi_empty(d), Ok((&b"abc"[..], res3)));
let i_err_pos = &i[3..];
assert_eq!(
empty_sep(i),
Err(Err::Error(error_position!(
i_err_pos,
ErrorKind::SeparatedList
)))
);
let res4 = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(e), Ok((&b",ef"[..], res4)));
assert_eq!(multi(f), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(multi_longsep(g), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1))));
}
#[test]
#[cfg(feature = "alloc")]
fn separated_list1_test() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list1(tag(","), tag("abcd"))(i)
}
fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list1(tag(".."), tag("abcd"))(i)
}
let a = &b"abcdef"[..];
let b = &b"abcd,abcdef"[..];
let c = &b"azerty"[..];
let d = &b"abcd,abcd,ef"[..];
let f = &b"abc"[..];
let g = &b"abcd."[..];
let h = &b"abcd,abc"[..];
let res1 = vec![&b"abcd"[..]];
assert_eq!(multi(a), Ok((&b"ef"[..], res1)));
let res2 = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(b), Ok((&b"ef"[..], res2)));
assert_eq!(
multi(c),
Err(Err::Error(error_position!(c, ErrorKind::Tag)))
);
let res3 = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(d), Ok((&b",ef"[..], res3)));
assert_eq!(multi(f), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(multi_longsep(g), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1))));
}
#[test]
#[cfg(feature = "alloc")]
fn many0_test() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many0(tag("abcd"))(i)
}
fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many0(tag(""))(i)
}
assert_eq!(multi(&b"abcdef"[..]), Ok((&b"ef"[..], vec![&b"abcd"[..]])));
assert_eq!(
multi(&b"abcdabcdefgh"[..]),
Ok((&b"efgh"[..], vec![&b"abcd"[..], &b"abcd"[..]]))
);
assert_eq!(multi(&b"azerty"[..]), Ok((&b"azerty"[..], Vec::new())));
assert_eq!(multi(&b"abcdab"[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(multi(&b"abcd"[..]), Err(Err::Incomplete(Needed::new(4))));
assert_eq!(multi(&b""[..]), Err(Err::Incomplete(Needed::new(4))));
assert_eq!(
multi_empty(&b"abcdef"[..]),
Err(Err::Error(error_position!(
&b"abcdef"[..],
ErrorKind::Many0
)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn many1_test() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many1(tag("abcd"))(i)
}
let a = &b"abcdef"[..];
let b = &b"abcdabcdefgh"[..];
let c = &b"azerty"[..];
let d = &b"abcdab"[..];
let res1 = vec![&b"abcd"[..]];
assert_eq!(multi(a), Ok((&b"ef"[..], res1)));
let res2 = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(b), Ok((&b"efgh"[..], res2)));
assert_eq!(
multi(c),
Err(Err::Error(error_position!(c, ErrorKind::Tag)))
);
assert_eq!(multi(d), Err(Err::Incomplete(Needed::new(2))));
}
#[test]
#[cfg(feature = "alloc")]
fn many_till_test() {
fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> {
many_till(tag("abcd"), tag("efgh"))(i)
}
let a = b"abcdabcdefghabcd";
let b = b"efghabcd";
let c = b"azerty";
let res_a = (vec![&b"abcd"[..], &b"abcd"[..]], &b"efgh"[..]);
let res_b: (Vec<&[u8]>, &[u8]) = (Vec::new(), &b"efgh"[..]);
assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a)));
assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b)));
assert_eq!(
multi(&c[..]),
Err(Err::Error(error_node_position!(
&c[..],
ErrorKind::ManyTill,
error_position!(&c[..], ErrorKind::Tag)
)))
);
}
#[test]
#[cfg(feature = "std")]
fn infinite_many() {
fn tst(input: &[u8]) -> IResult<&[u8], &[u8]> {
println!("input: {:?}", input);
Err(Err::Error(error_position!(input, ErrorKind::Tag)))
}
// should not go into an infinite loop
fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many0(tst)(i)
}
let a = &b"abcdef"[..];
assert_eq!(multi0(a), Ok((a, Vec::new())));
fn multi1(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many1(tst)(i)
}
let a = &b"abcdef"[..];
assert_eq!(
multi1(a),
Err(Err::Error(error_position!(a, ErrorKind::Tag)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn many_m_n_test() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many_m_n(2, 4, tag("Abcd"))(i)
}
let a = &b"Abcdef"[..];
let b = &b"AbcdAbcdefgh"[..];
let c = &b"AbcdAbcdAbcdAbcdefgh"[..];
let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..];
let e = &b"AbcdAb"[..];
assert_eq!(
multi(a),
Err(Err::Error(error_position!(&b"ef"[..], ErrorKind::Tag)))
);
let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]];
assert_eq!(multi(b), Ok((&b"efgh"[..], res1)));
let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]];
assert_eq!(multi(c), Ok((&b"efgh"[..], res2)));
let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]];
assert_eq!(multi(d), Ok((&b"Abcdefgh"[..], res3)));
assert_eq!(multi(e), Err(Err::Incomplete(Needed::new(2))));
}
#[test]
#[cfg(feature = "alloc")]
fn count_test() {
const TIMES: usize = 2;
fn cnt_2(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
count(tag("abc"), TIMES)(i)
}
assert_eq!(
cnt_2(&b"abcabcabcdef"[..]),
Ok((&b"abcdef"[..], vec![&b"abc"[..], &b"abc"[..]]))
);
assert_eq!(cnt_2(&b"ab"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(cnt_2(&b"abcab"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(
cnt_2(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
assert_eq!(
cnt_2(&b"xxxabcabcdef"[..]),
Err(Err::Error(error_position!(
&b"xxxabcabcdef"[..],
ErrorKind::Tag
)))
);
assert_eq!(
cnt_2(&b"abcxxxabcdef"[..]),
Err(Err::Error(error_position!(
&b"xxxabcdef"[..],
ErrorKind::Tag
)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn count_zero() {
const TIMES: usize = 0;
fn counter_2(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
count(tag("abc"), TIMES)(i)
}
let done = &b"abcabcabcdef"[..];
let parsed_done = Vec::new();
let rest = done;
let incomplete_1 = &b"ab"[..];
let parsed_incompl_1 = Vec::new();
let incomplete_2 = &b"abcab"[..];
let parsed_incompl_2 = Vec::new();
let error = &b"xxx"[..];
let error_remain = &b"xxx"[..];
let parsed_err = Vec::new();
let error_1 = &b"xxxabcabcdef"[..];
let parsed_err_1 = Vec::new();
let error_1_remain = &b"xxxabcabcdef"[..];
let error_2 = &b"abcxxxabcdef"[..];
let parsed_err_2 = Vec::new();
let error_2_remain = &b"abcxxxabcdef"[..];
assert_eq!(counter_2(done), Ok((rest, parsed_done)));
assert_eq!(
counter_2(incomplete_1),
Ok((incomplete_1, parsed_incompl_1))
);
assert_eq!(
counter_2(incomplete_2),
Ok((incomplete_2, parsed_incompl_2))
);
assert_eq!(counter_2(error), Ok((error_remain, parsed_err)));
assert_eq!(counter_2(error_1), Ok((error_1_remain, parsed_err_1)));
assert_eq!(counter_2(error_2), Ok((error_2_remain, parsed_err_2)));
}
#[derive(Debug, Clone, PartialEq)]
pub struct NilError;
impl<I> From<(I, ErrorKind)> for NilError {
fn from(_: (I, ErrorKind)) -> Self {
NilError
}
}
impl<I> ParseError<I> for NilError {
fn from_error_kind(_: I, _: ErrorKind) -> NilError {
NilError
}
fn append(_: I, _: ErrorKind, _: NilError) -> NilError {
NilError
}
}
fn number(i: &[u8]) -> IResult<&[u8], u32> {
use crate::combinator::map_res;
map_res(map_res(digit, str::from_utf8), FromStr::from_str)(i)
}
#[test]
#[cfg(feature = "alloc")]
fn length_count_test() {
fn cnt(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
length_count(number, tag("abc"))(i)
}
assert_eq!(
cnt(&b"2abcabcabcdef"[..]),
Ok((&b"abcdef"[..], vec![&b"abc"[..], &b"abc"[..]]))
);
assert_eq!(cnt(&b"2ab"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(cnt(&b"3abcab"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(
cnt(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Digit)))
);
assert_eq!(
cnt(&b"2abcxxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn length_data_test() {
fn take(i: &[u8]) -> IResult<&[u8], &[u8]> {
length_data(number)(i)
}
assert_eq!(
take(&b"6abcabcabcdef"[..]),
Ok((&b"abcdef"[..], &b"abcabc"[..]))
);
assert_eq!(take(&b"3ab"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(
take(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Digit)))
);
assert_eq!(take(&b"2abcxxx"[..]), Ok((&b"cxxx"[..], &b"ab"[..])));
}
#[test]
fn length_value_test() {
fn length_value_1(i: &[u8]) -> IResult<&[u8], u16> {
length_value(be_u8, be_u16)(i)
}
fn length_value_2(i: &[u8]) -> IResult<&[u8], (u8, u8)> {
length_value(be_u8, tuple((be_u8, be_u8)))(i)
}
let i1 = [0, 5, 6];
assert_eq!(
length_value_1(&i1),
Err(Err::Error(error_position!(&b""[..], ErrorKind::Complete)))
);
assert_eq!(
length_value_2(&i1),
Err(Err::Error(error_position!(&b""[..], ErrorKind::Complete)))
);
let i2 = [1, 5, 6, 3];
assert_eq!(
length_value_1(&i2),
Err(Err::Error(error_position!(&i2[1..2], ErrorKind::Complete)))
);
assert_eq!(
length_value_2(&i2),
Err(Err::Error(error_position!(&i2[1..2], ErrorKind::Complete)))
);
let i3 = [2, 5, 6, 3, 4, 5, 7];
assert_eq!(length_value_1(&i3), Ok((&i3[3..], 1286)));
assert_eq!(length_value_2(&i3), Ok((&i3[3..], (5, 6))));
let i4 = [3, 5, 6, 3, 4, 5];
assert_eq!(length_value_1(&i4), Ok((&i4[4..], 1286)));
assert_eq!(length_value_2(&i4), Ok((&i4[4..], (5, 6))));
}
#[test]
#[cfg(feature = "alloc")]
fn fold_many0_test() {
fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> {
acc.push(item);
acc
}
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
fold_many0(tag("abcd"), Vec::new, fold_into_vec)(i)
}
fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
fold_many0(tag(""), Vec::new, fold_into_vec)(i)
}
assert_eq!(multi(&b"abcdef"[..]), Ok((&b"ef"[..], vec![&b"abcd"[..]])));
assert_eq!(
multi(&b"abcdabcdefgh"[..]),
Ok((&b"efgh"[..], vec![&b"abcd"[..], &b"abcd"[..]]))
);
assert_eq!(multi(&b"azerty"[..]), Ok((&b"azerty"[..], Vec::new())));
assert_eq!(multi(&b"abcdab"[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(multi(&b"abcd"[..]), Err(Err::Incomplete(Needed::new(4))));
assert_eq!(multi(&b""[..]), Err(Err::Incomplete(Needed::new(4))));
assert_eq!(
multi_empty(&b"abcdef"[..]),
Err(Err::Error(error_position!(
&b"abcdef"[..],
ErrorKind::Many0
)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn fold_many1_test() {
fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> {
acc.push(item);
acc
}
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
fold_many1(tag("abcd"), Vec::new, fold_into_vec)(i)
}
let a = &b"abcdef"[..];
let b = &b"abcdabcdefgh"[..];
let c = &b"azerty"[..];
let d = &b"abcdab"[..];
let res1 = vec![&b"abcd"[..]];
assert_eq!(multi(a), Ok((&b"ef"[..], res1)));
let res2 = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(b), Ok((&b"efgh"[..], res2)));
assert_eq!(
multi(c),
Err(Err::Error(error_position!(c, ErrorKind::Many1)))
);
assert_eq!(multi(d), Err(Err::Incomplete(Needed::new(2))));
}
#[test]
#[cfg(feature = "alloc")]
fn fold_many_m_n_test() {
fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> {
acc.push(item);
acc
}
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
fold_many_m_n(2, 4, tag("Abcd"), Vec::new, fold_into_vec)(i)
}
let a = &b"Abcdef"[..];
let b = &b"AbcdAbcdefgh"[..];
let c = &b"AbcdAbcdAbcdAbcdefgh"[..];
let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..];
let e = &b"AbcdAb"[..];
assert_eq!(
multi(a),
Err(Err::Error(error_position!(&b"ef"[..], ErrorKind::Tag)))
);
let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]];
assert_eq!(multi(b), Ok((&b"efgh"[..], res1)));
let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]];
assert_eq!(multi(c), Ok((&b"efgh"[..], res2)));
let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]];
assert_eq!(multi(d), Ok((&b"Abcdefgh"[..], res3)));
assert_eq!(multi(e), Err(Err::Incomplete(Needed::new(2))));
}
#[test]
fn many0_count_test() {
fn count0_nums(i: &[u8]) -> IResult<&[u8], usize> {
many0_count(pair(digit, tag(",")))(i)
}
assert_eq!(count0_nums(&b"123,junk"[..]), Ok((&b"junk"[..], 1)));
assert_eq!(count0_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2)));
assert_eq!(
count0_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]),
Ok((&b"junk"[..], 10))
);
assert_eq!(count0_nums(&b"hello"[..]), Ok((&b"hello"[..], 0)));
}
#[test]
fn many1_count_test() {
fn count1_nums(i: &[u8]) -> IResult<&[u8], usize> {
many1_count(pair(digit, tag(",")))(i)
}
assert_eq!(count1_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2)));
assert_eq!(
count1_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]),
Ok((&b"junk"[..], 10))
);
assert_eq!(
count1_nums(&b"hello"[..]),
Err(Err::Error(error_position!(
&b"hello"[..],
ErrorKind::Many1Count
)))
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
//! Parsers recognizing numbers
pub mod complete;
pub mod streaming;
/// Configurable endianness
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Endianness {
/// Big endian
Big,
/// Little endian
Little,
/// Will match the host's endianness
Native,
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,270 @@
//! Combinators applying parsers in sequence
#[cfg(test)]
mod tests;
use crate::error::ParseError;
use crate::internal::{IResult, Parser};
/// Gets an object from the first parser,
/// then gets another object from the second parser.
///
/// # Arguments
/// * `first` The first parser to apply.
/// * `second` The second parser to apply.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed};
/// # use nom::Needed::Size;
/// use nom::sequence::pair;
/// use nom::bytes::complete::tag;
///
/// let mut parser = pair(tag("abc"), tag("efg"));
///
/// assert_eq!(parser("abcefg"), Ok(("", ("abc", "efg"))));
/// assert_eq!(parser("abcefghij"), Ok(("hij", ("abc", "efg"))));
/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag))));
/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag))));
/// ```
pub fn pair<I, O1, O2, E: ParseError<I>, F, G>(
mut first: F,
mut second: G,
) -> impl FnMut(I) -> IResult<I, (O1, O2), E>
where
F: Parser<I, O1, E>,
G: Parser<I, O2, E>,
{
move |input: I| {
let (input, o1) = first.parse(input)?;
second.parse(input).map(|(i, o2)| (i, (o1, o2)))
}
}
/// Matches an object from the first parser and discards it,
/// then gets an object from the second parser.
///
/// # Arguments
/// * `first` The opening parser.
/// * `second` The second parser to get object.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed};
/// # use nom::Needed::Size;
/// use nom::sequence::preceded;
/// use nom::bytes::complete::tag;
///
/// let mut parser = preceded(tag("abc"), tag("efg"));
///
/// assert_eq!(parser("abcefg"), Ok(("", "efg")));
/// assert_eq!(parser("abcefghij"), Ok(("hij", "efg")));
/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag))));
/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag))));
/// ```
pub fn preceded<I, O1, O2, E: ParseError<I>, F, G>(
mut first: F,
mut second: G,
) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: Parser<I, O2, E>,
{
move |input: I| {
let (input, _) = first.parse(input)?;
second.parse(input)
}
}
/// Gets an object from the first parser,
/// then matches an object from the second parser and discards it.
///
/// # Arguments
/// * `first` The first parser to apply.
/// * `second` The second parser to match an object.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed};
/// # use nom::Needed::Size;
/// use nom::sequence::terminated;
/// use nom::bytes::complete::tag;
///
/// let mut parser = terminated(tag("abc"), tag("efg"));
///
/// assert_eq!(parser("abcefg"), Ok(("", "abc")));
/// assert_eq!(parser("abcefghij"), Ok(("hij", "abc")));
/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag))));
/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag))));
/// ```
pub fn terminated<I, O1, O2, E: ParseError<I>, F, G>(
mut first: F,
mut second: G,
) -> impl FnMut(I) -> IResult<I, O1, E>
where
F: Parser<I, O1, E>,
G: Parser<I, O2, E>,
{
move |input: I| {
let (input, o1) = first.parse(input)?;
second.parse(input).map(|(i, _)| (i, o1))
}
}
/// Gets an object from the first parser,
/// then matches an object from the sep_parser and discards it,
/// then gets another object from the second parser.
///
/// # Arguments
/// * `first` The first parser to apply.
/// * `sep` The separator parser to apply.
/// * `second` The second parser to apply.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed};
/// # use nom::Needed::Size;
/// use nom::sequence::separated_pair;
/// use nom::bytes::complete::tag;
///
/// let mut parser = separated_pair(tag("abc"), tag("|"), tag("efg"));
///
/// assert_eq!(parser("abc|efg"), Ok(("", ("abc", "efg"))));
/// assert_eq!(parser("abc|efghij"), Ok(("hij", ("abc", "efg"))));
/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag))));
/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag))));
/// ```
pub fn separated_pair<I, O1, O2, O3, E: ParseError<I>, F, G, H>(
mut first: F,
mut sep: G,
mut second: H,
) -> impl FnMut(I) -> IResult<I, (O1, O3), E>
where
F: Parser<I, O1, E>,
G: Parser<I, O2, E>,
H: Parser<I, O3, E>,
{
move |input: I| {
let (input, o1) = first.parse(input)?;
let (input, _) = sep.parse(input)?;
second.parse(input).map(|(i, o2)| (i, (o1, o2)))
}
}
/// Matches an object from the first parser and discards it,
/// then gets an object from the second parser,
/// and finally matches an object from the third parser and discards it.
///
/// # Arguments
/// * `first` The first parser to apply and discard.
/// * `second` The second parser to apply.
/// * `third` The third parser to apply and discard.
///
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed};
/// # use nom::Needed::Size;
/// use nom::sequence::delimited;
/// use nom::bytes::complete::tag;
///
/// let mut parser = delimited(tag("("), tag("abc"), tag(")"));
///
/// assert_eq!(parser("(abc)"), Ok(("", "abc")));
/// assert_eq!(parser("(abc)def"), Ok(("def", "abc")));
/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag))));
/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag))));
/// ```
pub fn delimited<I, O1, O2, O3, E: ParseError<I>, F, G, H>(
mut first: F,
mut second: G,
mut third: H,
) -> impl FnMut(I) -> IResult<I, O2, E>
where
F: Parser<I, O1, E>,
G: Parser<I, O2, E>,
H: Parser<I, O3, E>,
{
move |input: I| {
let (input, _) = first.parse(input)?;
let (input, o2) = second.parse(input)?;
third.parse(input).map(|(i, _)| (i, o2))
}
}
/// Helper trait for the tuple combinator.
///
/// This trait is implemented for tuples of parsers of up to 21 elements.
pub trait Tuple<I, O, E> {
/// Parses the input and returns a tuple of results of each parser.
fn parse(&mut self, input: I) -> IResult<I, O, E>;
}
impl<Input, Output, Error: ParseError<Input>, F: Parser<Input, Output, Error>>
Tuple<Input, (Output,), Error> for (F,)
{
fn parse(&mut self, input: Input) -> IResult<Input, (Output,), Error> {
self.0.parse(input).map(|(i, o)| (i, (o,)))
}
}
macro_rules! tuple_trait(
($name1:ident $ty1:ident, $name2: ident $ty2:ident, $($name:ident $ty:ident),*) => (
tuple_trait!(__impl $name1 $ty1, $name2 $ty2; $($name $ty),*);
);
(__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident, $($name2:ident $ty2:ident),*) => (
tuple_trait_impl!($($name $ty),+);
tuple_trait!(__impl $($name $ty),+ , $name1 $ty1; $($name2 $ty2),*);
);
(__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident) => (
tuple_trait_impl!($($name $ty),+);
tuple_trait_impl!($($name $ty),+, $name1 $ty1);
);
);
macro_rules! tuple_trait_impl(
($($name:ident $ty: ident),+) => (
impl<
Input: Clone, $($ty),+ , Error: ParseError<Input>,
$($name: Parser<Input, $ty, Error>),+
> Tuple<Input, ( $($ty),+ ), Error> for ( $($name),+ ) {
fn parse(&mut self, input: Input) -> IResult<Input, ( $($ty),+ ), Error> {
tuple_trait_inner!(0, self, input, (), $($name)+)
}
}
);
);
macro_rules! tuple_trait_inner(
($it:tt, $self:expr, $input:expr, (), $head:ident $($id:ident)+) => ({
let (i, o) = $self.$it.parse($input.clone())?;
succ!($it, tuple_trait_inner!($self, i, ( o ), $($id)+))
});
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $head:ident $($id:ident)+) => ({
let (i, o) = $self.$it.parse($input.clone())?;
succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $($id)+))
});
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $head:ident) => ({
let (i, o) = $self.$it.parse($input.clone())?;
Ok((i, ($($parsed)* , o)))
});
);
tuple_trait!(FnA A, FnB B, FnC C, FnD D, FnE E, FnF F, FnG G, FnH H, FnI I, FnJ J, FnK K, FnL L,
FnM M, FnN N, FnO O, FnP P, FnQ Q, FnR R, FnS S, FnT T, FnU U);
///Applies a tuple of parsers one by one and returns their results as a tuple.
///There is a maximum of 21 parsers
/// ```rust
/// # use nom::{Err, error::ErrorKind};
/// use nom::sequence::tuple;
/// use nom::character::complete::{alpha1, digit1};
/// let mut parser = tuple((alpha1, digit1, alpha1));
///
/// assert_eq!(parser("abc123def"), Ok(("", ("abc", "123", "def"))));
/// assert_eq!(parser("123def"), Err(Err::Error(("123def", ErrorKind::Alpha))));
/// ```
pub fn tuple<I, O, E: ParseError<I>, List: Tuple<I, O, E>>(
mut l: List,
) -> impl FnMut(I) -> IResult<I, O, E> {
move |i: I| l.parse(i)
}

View File

@@ -0,0 +1,274 @@
use super::*;
use crate::bytes::streaming::{tag, take};
use crate::error::ErrorKind;
use crate::internal::{Err, IResult, Needed};
use crate::number::streaming::be_u16;
#[test]
fn single_element_tuples() {
use crate::character::complete::alpha1;
use crate::{error::ErrorKind, Err};
let mut parser = tuple((alpha1,));
assert_eq!(parser("abc123def"), Ok(("123def", ("abc",))));
assert_eq!(
parser("123def"),
Err(Err::Error(("123def", ErrorKind::Alpha)))
);
}
#[derive(PartialEq, Eq, Debug)]
struct B {
a: u8,
b: u8,
}
#[derive(PartialEq, Eq, Debug)]
struct C {
a: u8,
b: Option<u8>,
}
/*FIXME: convert code examples to new error management
use util::{add_error_pattern, error_to_list, print_error};
#[cfg(feature = "std")]
#[rustfmt::skip]
fn error_to_string<P: Clone + PartialEq>(e: &Context<P, u32>) -> &'static str {
let v: Vec<(P, ErrorKind<u32>)> = error_to_list(e);
// do it this way if you can use slice patterns
//match &v[..] {
// [ErrorKind::Custom(42), ErrorKind::Tag] => "missing `ijkl` tag",
// [ErrorKind::Custom(42), ErrorKind::Custom(128), ErrorKind::Tag] => "missing `mnop` tag after `ijkl`",
// _ => "unrecognized error"
//}
let collected: Vec<ErrorKind<u32>> = v.iter().map(|&(_, ref e)| e.clone()).collect();
if &collected[..] == [ErrorKind::Custom(42), ErrorKind::Tag] {
"missing `ijkl` tag"
} else if &collected[..] == [ErrorKind::Custom(42), ErrorKind::Custom(128), ErrorKind::Tag] {
"missing `mnop` tag after `ijkl`"
} else {
"unrecognized error"
}
}
// do it this way if you can use box patterns
//use $crate::lib::std::str;
//fn error_to_string(e:Err) -> String
// match e {
// NodePosition(ErrorKind::Custom(42), i1, box Position(ErrorKind::Tag, i2)) => {
// format!("missing `ijkl` tag, found '{}' instead", str::from_utf8(i2).unwrap())
// },
// NodePosition(ErrorKind::Custom(42), i1, box NodePosition(ErrorKind::Custom(128), i2, box Position(ErrorKind::Tag, i3))) => {
// format!("missing `mnop` tag after `ijkl`, found '{}' instead", str::from_utf8(i3).unwrap())
// },
// _ => "unrecognized error".to_string()
// }
//}
*/
#[test]
fn complete() {
use crate::bytes::complete::tag;
fn err_test(i: &[u8]) -> IResult<&[u8], &[u8]> {
let (i, _) = tag("ijkl")(i)?;
tag("mnop")(i)
}
let a = &b"ijklmn"[..];
let res_a = err_test(a);
assert_eq!(
res_a,
Err(Err::Error(error_position!(&b"mn"[..], ErrorKind::Tag)))
);
}
#[test]
fn pair_test() {
fn pair_abc_def(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
pair(tag("abc"), tag("def"))(i)
}
assert_eq!(
pair_abc_def(&b"abcdefghijkl"[..]),
Ok((&b"ghijkl"[..], (&b"abc"[..], &b"def"[..])))
);
assert_eq!(
pair_abc_def(&b"ab"[..]),
Err(Err::Incomplete(Needed::new(1)))
);
assert_eq!(
pair_abc_def(&b"abcd"[..]),
Err(Err::Incomplete(Needed::new(2)))
);
assert_eq!(
pair_abc_def(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
assert_eq!(
pair_abc_def(&b"xxxdef"[..]),
Err(Err::Error(error_position!(&b"xxxdef"[..], ErrorKind::Tag)))
);
assert_eq!(
pair_abc_def(&b"abcxxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn separated_pair_test() {
fn sep_pair_abc_def(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
separated_pair(tag("abc"), tag(","), tag("def"))(i)
}
assert_eq!(
sep_pair_abc_def(&b"abc,defghijkl"[..]),
Ok((&b"ghijkl"[..], (&b"abc"[..], &b"def"[..])))
);
assert_eq!(
sep_pair_abc_def(&b"ab"[..]),
Err(Err::Incomplete(Needed::new(1)))
);
assert_eq!(
sep_pair_abc_def(&b"abc,d"[..]),
Err(Err::Incomplete(Needed::new(2)))
);
assert_eq!(
sep_pair_abc_def(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
assert_eq!(
sep_pair_abc_def(&b"xxx,def"[..]),
Err(Err::Error(error_position!(&b"xxx,def"[..], ErrorKind::Tag)))
);
assert_eq!(
sep_pair_abc_def(&b"abc,xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn preceded_test() {
fn preceded_abcd_efgh(i: &[u8]) -> IResult<&[u8], &[u8]> {
preceded(tag("abcd"), tag("efgh"))(i)
}
assert_eq!(
preceded_abcd_efgh(&b"abcdefghijkl"[..]),
Ok((&b"ijkl"[..], &b"efgh"[..]))
);
assert_eq!(
preceded_abcd_efgh(&b"ab"[..]),
Err(Err::Incomplete(Needed::new(2)))
);
assert_eq!(
preceded_abcd_efgh(&b"abcde"[..]),
Err(Err::Incomplete(Needed::new(3)))
);
assert_eq!(
preceded_abcd_efgh(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
assert_eq!(
preceded_abcd_efgh(&b"xxxxdef"[..]),
Err(Err::Error(error_position!(&b"xxxxdef"[..], ErrorKind::Tag)))
);
assert_eq!(
preceded_abcd_efgh(&b"abcdxxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn terminated_test() {
fn terminated_abcd_efgh(i: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(tag("abcd"), tag("efgh"))(i)
}
assert_eq!(
terminated_abcd_efgh(&b"abcdefghijkl"[..]),
Ok((&b"ijkl"[..], &b"abcd"[..]))
);
assert_eq!(
terminated_abcd_efgh(&b"ab"[..]),
Err(Err::Incomplete(Needed::new(2)))
);
assert_eq!(
terminated_abcd_efgh(&b"abcde"[..]),
Err(Err::Incomplete(Needed::new(3)))
);
assert_eq!(
terminated_abcd_efgh(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
assert_eq!(
terminated_abcd_efgh(&b"xxxxdef"[..]),
Err(Err::Error(error_position!(&b"xxxxdef"[..], ErrorKind::Tag)))
);
assert_eq!(
terminated_abcd_efgh(&b"abcdxxxx"[..]),
Err(Err::Error(error_position!(&b"xxxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn delimited_test() {
fn delimited_abc_def_ghi(i: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(tag("abc"), tag("def"), tag("ghi"))(i)
}
assert_eq!(
delimited_abc_def_ghi(&b"abcdefghijkl"[..]),
Ok((&b"jkl"[..], &b"def"[..]))
);
assert_eq!(
delimited_abc_def_ghi(&b"ab"[..]),
Err(Err::Incomplete(Needed::new(1)))
);
assert_eq!(
delimited_abc_def_ghi(&b"abcde"[..]),
Err(Err::Incomplete(Needed::new(1)))
);
assert_eq!(
delimited_abc_def_ghi(&b"abcdefgh"[..]),
Err(Err::Incomplete(Needed::new(1)))
);
assert_eq!(
delimited_abc_def_ghi(&b"xxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
assert_eq!(
delimited_abc_def_ghi(&b"xxxdefghi"[..]),
Err(Err::Error(error_position!(
&b"xxxdefghi"[..],
ErrorKind::Tag
),))
);
assert_eq!(
delimited_abc_def_ghi(&b"abcxxxghi"[..]),
Err(Err::Error(error_position!(&b"xxxghi"[..], ErrorKind::Tag)))
);
assert_eq!(
delimited_abc_def_ghi(&b"abcdefxxx"[..]),
Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag)))
);
}
#[test]
fn tuple_test() {
fn tuple_3(i: &[u8]) -> IResult<&[u8], (u16, &[u8], &[u8])> {
tuple((be_u16, take(3u8), tag("fg")))(i)
}
assert_eq!(
tuple_3(&b"abcdefgh"[..]),
Ok((&b"h"[..], (0x6162u16, &b"cde"[..], &b"fg"[..])))
);
assert_eq!(tuple_3(&b"abcd"[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(tuple_3(&b"abcde"[..]), Err(Err::Incomplete(Needed::new(2))));
assert_eq!(
tuple_3(&b"abcdejk"[..]),
Err(Err::Error(error_position!(&b"jk"[..], ErrorKind::Tag)))
);
}

View File

@@ -0,0 +1,536 @@
#[cfg(test)]
mod test {
#[cfg(feature = "alloc")]
use crate::{branch::alt, bytes::complete::tag_no_case, combinator::recognize, multi::many1};
use crate::{
bytes::complete::{is_a, is_not, tag, take, take_till, take_until},
error::{self, ErrorKind},
Err, IResult,
};
#[test]
fn tagtr_succeed() {
const INPUT: &str = "Hello World!";
const TAG: &str = "Hello";
fn test(input: &str) -> IResult<&str, &str> {
tag(TAG)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(extra == " World!", "Parser `tag` consumed leftover input.");
assert!(
output == TAG,
"Parser `tag` doesn't return the tag it matched on success. \
Expected `{}`, got `{}`.",
TAG,
output
);
}
other => panic!(
"Parser `tag` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn tagtr_incomplete() {
use crate::bytes::streaming::tag;
const INPUT: &str = "Hello";
const TAG: &str = "Hello World!";
let res: IResult<_, _, error::Error<_>> = tag(TAG)(INPUT);
match res {
Err(Err::Incomplete(_)) => (),
other => {
panic!(
"Parser `tag` didn't require more input when it should have. \
Got `{:?}`.",
other
);
}
};
}
#[test]
fn tagtr_error() {
const INPUT: &str = "Hello World!";
const TAG: &str = "Random"; // TAG must be closer than INPUT.
let res: IResult<_, _, error::Error<_>> = tag(TAG)(INPUT);
match res {
Err(Err::Error(_)) => (),
other => {
panic!(
"Parser `tag` didn't fail when it should have. Got `{:?}`.`",
other
);
}
};
}
#[test]
fn take_s_succeed() {
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const CONSUMED: &str = "βèƒôřèÂßÇ";
const LEFTOVER: &str = "áƒƭèř";
let res: IResult<_, _, error::Error<_>> = take(9_usize)(INPUT);
match res {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `take_s` consumed leftover input. Leftover `{}`.",
extra
);
assert!(
output == CONSUMED,
"Parser `take_s` doesn't return the string it consumed on success. Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `take_s` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn take_until_succeed() {
const INPUT: &str = "βèƒôřèÂßÇ∂áƒƭèř";
const FIND: &str = "ÂßÇ∂";
const CONSUMED: &str = "βèƒôřè";
const LEFTOVER: &str = "ÂßÇ∂áƒƭèř";
let res: IResult<_, _, (_, ErrorKind)> = take_until(FIND)(INPUT);
match res {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `take_until`\
consumed leftover input. Leftover `{}`.",
extra
);
assert!(
output == CONSUMED,
"Parser `take_until`\
doesn't return the string it consumed on success. Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `take_until` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn take_s_incomplete() {
use crate::bytes::streaming::take;
const INPUT: &str = "βèƒôřèÂßÇá";
let res: IResult<_, _, (_, ErrorKind)> = take(13_usize)(INPUT);
match res {
Err(Err::Incomplete(_)) => (),
other => panic!(
"Parser `take` didn't require more input when it should have. \
Got `{:?}`.",
other
),
}
}
use crate::internal::Needed;
fn is_alphabetic(c: char) -> bool {
(c as u8 >= 0x41 && c as u8 <= 0x5A) || (c as u8 >= 0x61 && c as u8 <= 0x7A)
}
#[test]
fn take_while() {
use crate::bytes::streaming::take_while;
fn f(i: &str) -> IResult<&str, &str> {
take_while(is_alphabetic)(i)
}
let a = "";
let b = "abcd";
let c = "abcd123";
let d = "123";
assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&c[..]), Ok((&d[..], &b[..])));
assert_eq!(f(&d[..]), Ok((&d[..], &a[..])));
}
#[test]
fn take_while1() {
use crate::bytes::streaming::take_while1;
fn f(i: &str) -> IResult<&str, &str> {
take_while1(is_alphabetic)(i)
}
let a = "";
let b = "abcd";
let c = "abcd123";
let d = "123";
assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(f(&c[..]), Ok((&"123"[..], &b[..])));
assert_eq!(
f(&d[..]),
Err(Err::Error(error_position!(&d[..], ErrorKind::TakeWhile1)))
);
}
#[test]
fn take_till_s_succeed() {
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const CONSUMED: &str = "βèƒôřèÂßÇ";
const LEFTOVER: &str = "áƒƭèř";
fn till_s(c: char) -> bool {
c == 'á'
}
fn test(input: &str) -> IResult<&str, &str> {
take_till(till_s)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `take_till` consumed leftover input."
);
assert!(
output == CONSUMED,
"Parser `take_till` doesn't return the string it consumed on success. \
Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `take_till` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn take_while_succeed_none() {
use crate::bytes::complete::take_while;
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const CONSUMED: &str = "";
const LEFTOVER: &str = "βèƒôřèÂßÇáƒƭèř";
fn while_s(c: char) -> bool {
c == '9'
}
fn test(input: &str) -> IResult<&str, &str> {
take_while(while_s)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `take_while` consumed leftover input."
);
assert!(
output == CONSUMED,
"Parser `take_while` doesn't return the string it consumed on success. \
Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `take_while` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn is_not_succeed() {
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const AVOID: &str = "£úçƙ¥á";
const CONSUMED: &str = "βèƒôřèÂßÇ";
const LEFTOVER: &str = "áƒƭèř";
fn test(input: &str) -> IResult<&str, &str> {
is_not(AVOID)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `is_not` consumed leftover input. Leftover `{}`.",
extra
);
assert!(
output == CONSUMED,
"Parser `is_not` doesn't return the string it consumed on success. Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `is_not` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn take_while_succeed_some() {
use crate::bytes::complete::take_while;
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const CONSUMED: &str = "βèƒôřèÂßÇ";
const LEFTOVER: &str = "áƒƭèř";
fn while_s(c: char) -> bool {
c == 'β'
|| c == 'è'
|| c == 'ƒ'
|| c == 'ô'
|| c == 'ř'
|| c == 'è'
|| c == 'Â'
|| c == 'ß'
|| c == 'Ç'
}
fn test(input: &str) -> IResult<&str, &str> {
take_while(while_s)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `take_while` consumed leftover input."
);
assert!(
output == CONSUMED,
"Parser `take_while` doesn't return the string it consumed on success. \
Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `take_while` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn is_not_fail() {
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const AVOID: &str = "βúçƙ¥";
fn test(input: &str) -> IResult<&str, &str> {
is_not(AVOID)(input)
}
match test(INPUT) {
Err(Err::Error(_)) => (),
other => panic!(
"Parser `is_not` didn't fail when it should have. Got `{:?}`.",
other
),
};
}
#[test]
fn take_while1_succeed() {
use crate::bytes::complete::take_while1;
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const CONSUMED: &str = "βèƒôřèÂßÇ";
const LEFTOVER: &str = "áƒƭèř";
fn while1_s(c: char) -> bool {
c == 'β'
|| c == 'è'
|| c == 'ƒ'
|| c == 'ô'
|| c == 'ř'
|| c == 'è'
|| c == 'Â'
|| c == 'ß'
|| c == 'Ç'
}
fn test(input: &str) -> IResult<&str, &str> {
take_while1(while1_s)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `take_while1` consumed leftover input."
);
assert!(
output == CONSUMED,
"Parser `take_while1` doesn't return the string it consumed on success. \
Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `take_while1` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn take_until_incomplete() {
use crate::bytes::streaming::take_until;
const INPUT: &str = "βèƒôřè";
const FIND: &str = "βèƒôřèÂßÇ";
let res: IResult<_, _, (_, ErrorKind)> = take_until(FIND)(INPUT);
match res {
Err(Err::Incomplete(_)) => (),
other => panic!(
"Parser `take_until` didn't require more input when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn is_a_succeed() {
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const MATCH: &str = "βèƒôřèÂßÇ";
const CONSUMED: &str = "βèƒôřèÂßÇ";
const LEFTOVER: &str = "áƒƭèř";
fn test(input: &str) -> IResult<&str, &str> {
is_a(MATCH)(input)
}
match test(INPUT) {
Ok((extra, output)) => {
assert!(
extra == LEFTOVER,
"Parser `is_a` consumed leftover input. Leftover `{}`.",
extra
);
assert!(
output == CONSUMED,
"Parser `is_a` doesn't return the string it consumed on success. Expected `{}`, got `{}`.",
CONSUMED,
output
);
}
other => panic!(
"Parser `is_a` didn't succeed when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn take_while1_fail() {
use crate::bytes::complete::take_while1;
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
fn while1_s(c: char) -> bool {
c == '9'
}
fn test(input: &str) -> IResult<&str, &str> {
take_while1(while1_s)(input)
}
match test(INPUT) {
Err(Err::Error(_)) => (),
other => panic!(
"Parser `take_while1` didn't fail when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
fn is_a_fail() {
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const MATCH: &str = "Ûñℓúçƙ¥";
fn test(input: &str) -> IResult<&str, &str> {
is_a(MATCH)(input)
}
match test(INPUT) {
Err(Err::Error(_)) => (),
other => panic!(
"Parser `is_a` didn't fail when it should have. Got `{:?}`.",
other
),
};
}
#[test]
fn take_until_error() {
use crate::bytes::streaming::take_until;
const INPUT: &str = "βèƒôřèÂßÇáƒƭèř";
const FIND: &str = "Ráñδô₥";
let res: IResult<_, _, (_, ErrorKind)> = take_until(FIND)(INPUT);
match res {
Err(Err::Incomplete(_)) => (),
other => panic!(
"Parser `take_until` didn't fail when it should have. \
Got `{:?}`.",
other
),
};
}
#[test]
#[cfg(feature = "alloc")]
fn recognize_is_a() {
let a = "aabbab";
let b = "ababcd";
fn f(i: &str) -> IResult<&str, &str> {
recognize(many1(alt((tag("a"), tag("b")))))(i)
}
assert_eq!(f(&a[..]), Ok((&a[6..], &a[..])));
assert_eq!(f(&b[..]), Ok((&b[4..], &b[..4])));
}
#[test]
fn utf8_indexing() {
fn dot(i: &str) -> IResult<&str, &str> {
tag(".")(i)
}
let _ = dot("");
}
#[cfg(feature = "alloc")]
#[test]
fn case_insensitive() {
fn test(i: &str) -> IResult<&str, &str> {
tag_no_case("ABcd")(i)
}
assert_eq!(test("aBCdefgh"), Ok(("efgh", "aBCd")));
assert_eq!(test("abcdefgh"), Ok(("efgh", "abcd")));
assert_eq!(test("ABCDefgh"), Ok(("efgh", "ABCD")));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,94 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::char,
character::complete::{digit1 as digit, space0 as space},
combinator::map_res,
multi::fold_many0,
sequence::{delimited, pair},
IResult,
};
// Parser definition
use std::str::FromStr;
// We parse any expr surrounded by parens, ignoring all whitespaces around those
fn parens(i: &str) -> IResult<&str, i64> {
delimited(space, delimited(tag("("), expr, tag(")")), space)(i)
}
// We transform an integer string into a i64, ignoring surrounding whitespaces
// We look for a digit suite, and try to convert it.
// If either str::from_utf8 or FromStr::from_str fail,
// we fallback to the parens parser defined above
fn factor(i: &str) -> IResult<&str, i64> {
alt((
map_res(delimited(space, digit, space), FromStr::from_str),
parens,
))(i)
}
// We read an initial factor and for each time we find
// a * or / operator followed by another factor, we do
// the math by folding everything
fn term(i: &str) -> IResult<&str, i64> {
let (i, init) = factor(i)?;
fold_many0(
pair(alt((char('*'), char('/'))), factor),
move || init,
|acc, (op, val): (char, i64)| {
if op == '*' {
acc * val
} else {
acc / val
}
},
)(i)
}
fn expr(i: &str) -> IResult<&str, i64> {
let (i, init) = term(i)?;
fold_many0(
pair(alt((char('+'), char('-'))), term),
move || init,
|acc, (op, val): (char, i64)| {
if op == '+' {
acc + val
} else {
acc - val
}
},
)(i)
}
#[test]
fn factor_test() {
assert_eq!(factor("3"), Ok(("", 3)));
assert_eq!(factor(" 12"), Ok(("", 12)));
assert_eq!(factor("537 "), Ok(("", 537)));
assert_eq!(factor(" 24 "), Ok(("", 24)));
}
#[test]
fn term_test() {
assert_eq!(term(" 12 *2 / 3"), Ok(("", 8)));
assert_eq!(term(" 2* 3 *2 *2 / 3"), Ok(("", 8)));
assert_eq!(term(" 48 / 3/2"), Ok(("", 8)));
}
#[test]
fn expr_test() {
assert_eq!(expr(" 1 + 2 "), Ok(("", 3)));
assert_eq!(expr(" 12 + 6 - 4+ 3"), Ok(("", 17)));
assert_eq!(expr(" 1 + 2*3 + 4"), Ok(("", 11)));
}
#[test]
fn parens_test() {
assert_eq!(expr(" ( 2 )"), Ok(("", 2)));
assert_eq!(expr(" 2* ( 3 + 4 ) "), Ok(("", 14)));
assert_eq!(expr(" 2*2 / ( 5 - 1) + 3"), Ok(("", 4)));
}

View File

@@ -0,0 +1,161 @@
use std::fmt;
use std::fmt::{Debug, Display, Formatter};
use std::str::FromStr;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1 as digit, multispace0 as multispace},
combinator::{map, map_res},
multi::many0,
sequence::{delimited, preceded},
IResult,
};
pub enum Expr {
Value(i64),
Add(Box<Expr>, Box<Expr>),
Sub(Box<Expr>, Box<Expr>),
Mul(Box<Expr>, Box<Expr>),
Div(Box<Expr>, Box<Expr>),
Paren(Box<Expr>),
}
#[derive(Debug)]
pub enum Oper {
Add,
Sub,
Mul,
Div,
}
impl Display for Expr {
fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result {
use self::Expr::*;
match *self {
Value(val) => write!(format, "{}", val),
Add(ref left, ref right) => write!(format, "{} + {}", left, right),
Sub(ref left, ref right) => write!(format, "{} - {}", left, right),
Mul(ref left, ref right) => write!(format, "{} * {}", left, right),
Div(ref left, ref right) => write!(format, "{} / {}", left, right),
Paren(ref expr) => write!(format, "({})", expr),
}
}
}
impl Debug for Expr {
fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result {
use self::Expr::*;
match *self {
Value(val) => write!(format, "{}", val),
Add(ref left, ref right) => write!(format, "({:?} + {:?})", left, right),
Sub(ref left, ref right) => write!(format, "({:?} - {:?})", left, right),
Mul(ref left, ref right) => write!(format, "({:?} * {:?})", left, right),
Div(ref left, ref right) => write!(format, "({:?} / {:?})", left, right),
Paren(ref expr) => write!(format, "[{:?}]", expr),
}
}
}
fn parens(i: &str) -> IResult<&str, Expr> {
delimited(
multispace,
delimited(tag("("), map(expr, |e| Expr::Paren(Box::new(e))), tag(")")),
multispace,
)(i)
}
fn factor(i: &str) -> IResult<&str, Expr> {
alt((
map(
map_res(delimited(multispace, digit, multispace), FromStr::from_str),
Expr::Value,
),
parens,
))(i)
}
fn fold_exprs(initial: Expr, remainder: Vec<(Oper, Expr)>) -> Expr {
remainder.into_iter().fold(initial, |acc, pair| {
let (oper, expr) = pair;
match oper {
Oper::Add => Expr::Add(Box::new(acc), Box::new(expr)),
Oper::Sub => Expr::Sub(Box::new(acc), Box::new(expr)),
Oper::Mul => Expr::Mul(Box::new(acc), Box::new(expr)),
Oper::Div => Expr::Div(Box::new(acc), Box::new(expr)),
}
})
}
fn term(i: &str) -> IResult<&str, Expr> {
let (i, initial) = factor(i)?;
let (i, remainder) = many0(alt((
|i| {
let (i, mul) = preceded(tag("*"), factor)(i)?;
Ok((i, (Oper::Mul, mul)))
},
|i| {
let (i, div) = preceded(tag("/"), factor)(i)?;
Ok((i, (Oper::Div, div)))
},
)))(i)?;
Ok((i, fold_exprs(initial, remainder)))
}
fn expr(i: &str) -> IResult<&str, Expr> {
let (i, initial) = term(i)?;
let (i, remainder) = many0(alt((
|i| {
let (i, add) = preceded(tag("+"), term)(i)?;
Ok((i, (Oper::Add, add)))
},
|i| {
let (i, sub) = preceded(tag("-"), term)(i)?;
Ok((i, (Oper::Sub, sub)))
},
)))(i)?;
Ok((i, fold_exprs(initial, remainder)))
}
#[test]
fn factor_test() {
assert_eq!(
factor(" 3 ").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("3")))
);
}
#[test]
fn term_test() {
assert_eq!(
term(" 3 * 5 ").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("(3 * 5)")))
);
}
#[test]
fn expr_test() {
assert_eq!(
expr(" 1 + 2 * 3 ").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("(1 + (2 * 3))")))
);
assert_eq!(
expr(" 1 + 2 * 3 / 4 - 5 ").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("((1 + ((2 * 3) / 4)) - 5)")))
);
assert_eq!(
expr(" 72 / 2 / 3 ").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("((72 / 2) / 3)")))
);
}
#[test]
fn parens_test() {
assert_eq!(
expr(" ( 1 + 2 ) * 3 ").map(|(i, x)| (i, format!("{:?}", x))),
Ok(("", String::from("([(1 + 2)] * 3)")))
);
}

View File

@@ -0,0 +1,45 @@
use nom::bytes::complete::{tag, take_while_m_n};
use nom::combinator::map_res;
use nom::sequence::tuple;
use nom::IResult;
#[derive(Debug, PartialEq)]
pub struct Color {
pub red: u8,
pub green: u8,
pub blue: u8,
}
fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
u8::from_str_radix(input, 16)
}
fn is_hex_digit(c: char) -> bool {
c.is_digit(16)
}
fn hex_primary(input: &str) -> IResult<&str, u8> {
map_res(take_while_m_n(2, 2, is_hex_digit), from_hex)(input)
}
fn hex_color(input: &str) -> IResult<&str, Color> {
let (input, _) = tag("#")(input)?;
let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?;
Ok((input, Color { red, green, blue }))
}
#[test]
fn parse_color() {
assert_eq!(
hex_color("#2F14DF"),
Ok((
"",
Color {
red: 47,
green: 20,
blue: 223,
}
))
);
}

View File

@@ -0,0 +1,48 @@
#![allow(dead_code)]
use nom::bytes::streaming::tag;
use nom::character::streaming::digit1 as digit;
use nom::combinator::verify;
use nom::error::{ErrorKind, ParseError};
#[cfg(feature = "alloc")]
use nom::multi::count;
use nom::sequence::terminated;
use nom::IResult;
#[derive(Debug)]
pub struct CustomError(String);
impl<'a> From<(&'a str, ErrorKind)> for CustomError {
fn from(error: (&'a str, ErrorKind)) -> Self {
CustomError(format!("error code was: {:?}", error))
}
}
impl<'a> ParseError<&'a str> for CustomError {
fn from_error_kind(_: &'a str, kind: ErrorKind) -> Self {
CustomError(format!("error code was: {:?}", kind))
}
fn append(_: &'a str, kind: ErrorKind, other: CustomError) -> Self {
CustomError(format!("{:?}\nerror code was: {:?}", other, kind))
}
}
fn test1(input: &str) -> IResult<&str, &str, CustomError> {
//fix_error!(input, CustomError, tag!("abcd"))
tag("abcd")(input)
}
fn test2(input: &str) -> IResult<&str, &str, CustomError> {
//terminated!(input, test1, fix_error!(CustomError, digit))
terminated(test1, digit)(input)
}
fn test3(input: &str) -> IResult<&str, &str, CustomError> {
verify(test1, |s: &str| s.starts_with("abcd"))(input)
}
#[cfg(feature = "alloc")]
fn test4(input: &str) -> IResult<&str, Vec<&str>, CustomError> {
count(test1, 4)(input)
}

View File

@@ -0,0 +1,28 @@
use nom::bytes::complete::escaped;
use nom::character::complete::digit1;
use nom::character::complete::one_of;
use nom::{error::ErrorKind, Err, IResult};
fn esc(s: &str) -> IResult<&str, &str, (&str, ErrorKind)> {
escaped(digit1, '\\', one_of("\"n\\"))(s)
}
#[cfg(feature = "alloc")]
fn esc_trans(s: &str) -> IResult<&str, String, (&str, ErrorKind)> {
use nom::bytes::complete::{escaped_transform, tag};
escaped_transform(digit1, '\\', tag("n"))(s)
}
#[test]
fn test_escaped() {
assert_eq!(esc("abcd"), Err(Err::Error(("abcd", ErrorKind::Escaped))));
}
#[test]
#[cfg(feature = "alloc")]
fn test_escaped_transform() {
assert_eq!(
esc_trans("abcd"),
Err(Err::Error(("abcd", ErrorKind::EscapedTransform)))
);
}

View File

@@ -0,0 +1,46 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::streaming::digit1 as digit;
use nom::combinator::{map, map_res, opt, recognize};
use nom::sequence::{delimited, pair};
use nom::IResult;
use std::str;
use std::str::FromStr;
fn unsigned_float(i: &[u8]) -> IResult<&[u8], f32> {
let float_bytes = recognize(alt((
delimited(digit, tag("."), opt(digit)),
delimited(opt(digit), tag("."), digit),
)));
let float_str = map_res(float_bytes, str::from_utf8);
map_res(float_str, FromStr::from_str)(i)
}
fn float(i: &[u8]) -> IResult<&[u8], f32> {
map(
pair(opt(alt((tag("+"), tag("-")))), unsigned_float),
|(sign, value)| {
sign
.and_then(|s| if s[0] == b'-' { Some(-1f32) } else { None })
.unwrap_or(1f32)
* value
},
)(i)
}
#[test]
fn unsigned_float_test() {
assert_eq!(unsigned_float(&b"123.456;"[..]), Ok((&b";"[..], 123.456)));
assert_eq!(unsigned_float(&b"0.123;"[..]), Ok((&b";"[..], 0.123)));
assert_eq!(unsigned_float(&b"123.0;"[..]), Ok((&b";"[..], 123.0)));
assert_eq!(unsigned_float(&b"123.;"[..]), Ok((&b";"[..], 123.0)));
assert_eq!(unsigned_float(&b".123;"[..]), Ok((&b";"[..], 0.123)));
}
#[test]
fn float_test() {
assert_eq!(float(&b"123.456;"[..]), Ok((&b";"[..], 123.456)));
assert_eq!(float(&b"+123.456;"[..]), Ok((&b";"[..], 123.456)));
assert_eq!(float(&b"-123.456;"[..]), Ok((&b";"[..], -123.456)));
}

View File

@@ -0,0 +1,39 @@
use nom::{
bytes::complete::tag,
multi::{many0, many0_count},
};
#[test]
fn parse() {
let mut counter = 0;
let res = {
let mut parser = many0::<_, _, (), _>(|i| {
counter += 1;
tag("abc")(i)
});
parser("abcabcabcabc").unwrap()
};
println!("res: {:?}", res);
assert_eq!(counter, 5);
}
#[test]
fn accumulate() {
let mut v = Vec::new();
let (_, count) = {
let mut parser = many0_count::<_, _, (), _>(|i| {
let (i, o) = tag("abc")(i)?;
v.push(o);
Ok((i, ()))
});
parser("abcabcabcabc").unwrap()
};
println!("v: {:?}", v);
assert_eq!(count, 4);
assert_eq!(v.len(), 4);
}

View File

@@ -0,0 +1,207 @@
use nom::{
bytes::complete::take_while,
character::complete::{
alphanumeric1 as alphanumeric, char, multispace0 as multispace, space0 as space,
},
combinator::{map, map_res, opt},
multi::many0,
sequence::{delimited, pair, separated_pair, terminated, tuple},
IResult,
};
use std::collections::HashMap;
use std::str;
fn category(i: &[u8]) -> IResult<&[u8], &str> {
map_res(
delimited(char('['), take_while(|c| c != b']'), char(']')),
str::from_utf8,
)(i)
}
fn key_value(i: &[u8]) -> IResult<&[u8], (&str, &str)> {
let (i, key) = map_res(alphanumeric, str::from_utf8)(i)?;
let (i, _) = tuple((opt(space), char('='), opt(space)))(i)?;
let (i, val) = map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8)(i)?;
let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n')))(i)?;
Ok((i, (key, val)))
}
fn keys_and_values(i: &[u8]) -> IResult<&[u8], HashMap<&str, &str>> {
map(many0(terminated(key_value, opt(multispace))), |vec| {
vec.into_iter().collect()
})(i)
}
fn category_and_keys(i: &[u8]) -> IResult<&[u8], (&str, HashMap<&str, &str>)> {
let (i, category) = terminated(category, opt(multispace))(i)?;
let (i, keys) = keys_and_values(i)?;
Ok((i, (category, keys)))
}
fn categories(i: &[u8]) -> IResult<&[u8], HashMap<&str, HashMap<&str, &str>>> {
map(
many0(separated_pair(
category,
opt(multispace),
map(
many0(terminated(key_value, opt(multispace))),
|vec: Vec<_>| vec.into_iter().collect(),
),
)),
|vec: Vec<_>| vec.into_iter().collect(),
)(i)
}
#[test]
fn parse_category_test() {
let ini_file = &b"[category]
parameter=value
key = value2"[..];
let ini_without_category = &b"\n\nparameter=value
key = value2"[..];
let res = category(ini_file);
println!("{:?}", res);
match res {
Ok((i, o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_category, "category")));
}
#[test]
fn parse_key_value_test() {
let ini_file = &b"parameter=value
key = value2"[..];
let ini_without_key_value = &b"\nkey = value2"[..];
let res = key_value(ini_file);
println!("{:?}", res);
match res {
Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value"))));
}
#[test]
fn parse_key_value_with_space_test() {
let ini_file = &b"parameter = value
key = value2"[..];
let ini_without_key_value = &b"\nkey = value2"[..];
let res = key_value(ini_file);
println!("{:?}", res);
match res {
Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value"))));
}
#[test]
fn parse_key_value_with_comment_test() {
let ini_file = &b"parameter=value;abc
key = value2"[..];
let ini_without_key_value = &b"\nkey = value2"[..];
let res = key_value(ini_file);
println!("{:?}", res);
match res {
Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value"))));
}
#[test]
fn parse_multiple_keys_and_values_test() {
let ini_file = &b"parameter=value;abc
key = value2
[category]"[..];
let ini_without_key_value = &b"[category]"[..];
let res = keys_and_values(ini_file);
println!("{:?}", res);
match res {
Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o),
_ => println!("error"),
}
let mut expected: HashMap<&str, &str> = HashMap::new();
expected.insert("parameter", "value");
expected.insert("key", "value2");
assert_eq!(res, Ok((ini_without_key_value, expected)));
}
#[test]
fn parse_category_then_multiple_keys_and_values_test() {
//FIXME: there can be an empty line or a comment line after a category
let ini_file = &b"[abcd]
parameter=value;abc
key = value2
[category]"[..];
let ini_after_parser = &b"[category]"[..];
let res = category_and_keys(ini_file);
println!("{:?}", res);
match res {
Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o),
_ => println!("error"),
}
let mut expected_h: HashMap<&str, &str> = HashMap::new();
expected_h.insert("parameter", "value");
expected_h.insert("key", "value2");
assert_eq!(res, Ok((ini_after_parser, ("abcd", expected_h))));
}
#[test]
fn parse_multiple_categories_test() {
let ini_file = &b"[abcd]
parameter=value;abc
key = value2
[category]
parameter3=value3
key4 = value4
"[..];
let ini_after_parser = &b""[..];
let res = categories(ini_file);
//println!("{:?}", res);
match res {
Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o),
_ => println!("error"),
}
let mut expected_1: HashMap<&str, &str> = HashMap::new();
expected_1.insert("parameter", "value");
expected_1.insert("key", "value2");
let mut expected_2: HashMap<&str, &str> = HashMap::new();
expected_2.insert("parameter3", "value3");
expected_2.insert("key4", "value4");
let mut expected_h: HashMap<&str, HashMap<&str, &str>> = HashMap::new();
expected_h.insert("abcd", expected_1);
expected_h.insert("category", expected_2);
assert_eq!(res, Ok((ini_after_parser, expected_h)));
}

View File

@@ -0,0 +1,217 @@
use nom::{
bytes::complete::{is_a, tag, take_till, take_while},
character::complete::{alphanumeric1 as alphanumeric, char, space0 as space},
combinator::opt,
multi::many0,
sequence::{delimited, pair, terminated, tuple},
IResult,
};
use std::collections::HashMap;
fn is_line_ending_or_comment(chr: char) -> bool {
chr == ';' || chr == '\n'
}
fn not_line_ending(i: &str) -> IResult<&str, &str> {
take_while(|c| c != '\r' && c != '\n')(i)
}
fn space_or_line_ending(i: &str) -> IResult<&str, &str> {
is_a(" \r\n")(i)
}
fn category(i: &str) -> IResult<&str, &str> {
terminated(
delimited(char('['), take_while(|c| c != ']'), char(']')),
opt(is_a(" \r\n")),
)(i)
}
fn key_value(i: &str) -> IResult<&str, (&str, &str)> {
let (i, key) = alphanumeric(i)?;
let (i, _) = tuple((opt(space), tag("="), opt(space)))(i)?;
let (i, val) = take_till(is_line_ending_or_comment)(i)?;
let (i, _) = opt(space)(i)?;
let (i, _) = opt(pair(tag(";"), not_line_ending))(i)?;
let (i, _) = opt(space_or_line_ending)(i)?;
Ok((i, (key, val)))
}
fn keys_and_values_aggregator(i: &str) -> IResult<&str, Vec<(&str, &str)>> {
many0(key_value)(i)
}
fn keys_and_values(input: &str) -> IResult<&str, HashMap<&str, &str>> {
match keys_and_values_aggregator(input) {
Ok((i, tuple_vec)) => Ok((i, tuple_vec.into_iter().collect())),
Err(e) => Err(e),
}
}
fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> {
pair(category, keys_and_values)(i)
}
fn categories_aggregator(i: &str) -> IResult<&str, Vec<(&str, HashMap<&str, &str>)>> {
many0(category_and_keys)(i)
}
fn categories(input: &str) -> IResult<&str, HashMap<&str, HashMap<&str, &str>>> {
match categories_aggregator(input) {
Ok((i, tuple_vec)) => Ok((i, tuple_vec.into_iter().collect())),
Err(e) => Err(e),
}
}
#[test]
fn parse_category_test() {
let ini_file = "[category]
parameter=value
key = value2";
let ini_without_category = "parameter=value
key = value2";
let res = category(ini_file);
println!("{:?}", res);
match res {
Ok((i, o)) => println!("i: {} | o: {:?}", i, o),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_category, "category")));
}
#[test]
fn parse_key_value_test() {
let ini_file = "parameter=value
key = value2";
let ini_without_key_value = "key = value2";
let res = key_value(ini_file);
println!("{:?}", res);
match res {
Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value"))));
}
#[test]
fn parse_key_value_with_space_test() {
let ini_file = "parameter = value
key = value2";
let ini_without_key_value = "key = value2";
let res = key_value(ini_file);
println!("{:?}", res);
match res {
Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value"))));
}
#[test]
fn parse_key_value_with_comment_test() {
let ini_file = "parameter=value;abc
key = value2";
let ini_without_key_value = "key = value2";
let res = key_value(ini_file);
println!("{:?}", res);
match res {
Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2),
_ => println!("error"),
}
assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value"))));
}
#[test]
fn parse_multiple_keys_and_values_test() {
let ini_file = "parameter=value;abc
key = value2
[category]";
let ini_without_key_value = "[category]";
let res = keys_and_values(ini_file);
println!("{:?}", res);
match res {
Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o),
_ => println!("error"),
}
let mut expected: HashMap<&str, &str> = HashMap::new();
expected.insert("parameter", "value");
expected.insert("key", "value2");
assert_eq!(res, Ok((ini_without_key_value, expected)));
}
#[test]
fn parse_category_then_multiple_keys_and_values_test() {
//FIXME: there can be an empty line or a comment line after a category
let ini_file = "[abcd]
parameter=value;abc
key = value2
[category]";
let ini_after_parser = "[category]";
let res = category_and_keys(ini_file);
println!("{:?}", res);
match res {
Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o),
_ => println!("error"),
}
let mut expected_h: HashMap<&str, &str> = HashMap::new();
expected_h.insert("parameter", "value");
expected_h.insert("key", "value2");
assert_eq!(res, Ok((ini_after_parser, ("abcd", expected_h))));
}
#[test]
fn parse_multiple_categories_test() {
let ini_file = "[abcd]
parameter=value;abc
key = value2
[category]
parameter3=value3
key4 = value4
";
let res = categories(ini_file);
//println!("{:?}", res);
match res {
Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o),
_ => println!("error"),
}
let mut expected_1: HashMap<&str, &str> = HashMap::new();
expected_1.insert("parameter", "value");
expected_1.insert("key", "value2");
let mut expected_2: HashMap<&str, &str> = HashMap::new();
expected_2.insert("parameter3", "value3");
expected_2.insert("key4", "value4");
let mut expected_h: HashMap<&str, HashMap<&str, &str>> = HashMap::new();
expected_h.insert("abcd", expected_1);
expected_h.insert("category", expected_2);
assert_eq!(res, Ok(("", expected_h)));
}

View File

@@ -0,0 +1,216 @@
//#![feature(trace_macros)]
#![allow(dead_code)]
#![cfg_attr(feature = "cargo-clippy", allow(redundant_closure))]
use nom::{error::ErrorKind, Err, IResult, Needed};
#[allow(dead_code)]
struct Range {
start: char,
end: char,
}
pub fn take_char(input: &[u8]) -> IResult<&[u8], char> {
if !input.is_empty() {
Ok((&input[1..], input[0] as char))
} else {
Err(Err::Incomplete(Needed::new(1)))
}
}
#[cfg(feature = "std")]
mod parse_int {
use nom::HexDisplay;
use nom::{
character::streaming::{digit1 as digit, space1 as space},
combinator::{complete, map, opt},
multi::many0,
IResult,
};
use std::str;
fn parse_ints(input: &[u8]) -> IResult<&[u8], Vec<i32>> {
many0(spaces_or_int)(input)
}
fn spaces_or_int(input: &[u8]) -> IResult<&[u8], i32> {
println!("{}", input.to_hex(8));
let (i, _) = opt(complete(space))(input)?;
let (i, res) = map(complete(digit), |x| {
println!("x: {:?}", x);
let result = str::from_utf8(x).unwrap();
println!("Result: {}", result);
println!("int is empty?: {}", x.is_empty());
match result.parse() {
Ok(i) => i,
Err(e) => panic!("UH OH! NOT A DIGIT! {:?}", e),
}
})(i)?;
Ok((i, res))
}
#[test]
fn issue_142() {
let subject = parse_ints(&b"12 34 5689a"[..]);
let expected = Ok((&b"a"[..], vec![12, 34, 5689]));
assert_eq!(subject, expected);
let subject = parse_ints(&b"12 34 5689 "[..]);
let expected = Ok((&b" "[..], vec![12, 34, 5689]));
assert_eq!(subject, expected)
}
}
#[test]
fn usize_length_bytes_issue() {
use nom::multi::length_data;
use nom::number::streaming::be_u16;
let _: IResult<&[u8], &[u8], (&[u8], ErrorKind)> = length_data(be_u16)(b"012346");
}
#[test]
fn take_till_issue() {
use nom::bytes::streaming::take_till;
fn nothing(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_till(|_| true)(i)
}
assert_eq!(nothing(b""), Err(Err::Incomplete(Needed::new(1))));
assert_eq!(nothing(b"abc"), Ok((&b"abc"[..], &b""[..])));
}
#[test]
fn issue_655() {
use nom::character::streaming::{line_ending, not_line_ending};
fn twolines(i: &str) -> IResult<&str, (&str, &str)> {
let (i, l1) = not_line_ending(i)?;
let (i, _) = line_ending(i)?;
let (i, l2) = not_line_ending(i)?;
let (i, _) = line_ending(i)?;
Ok((i, (l1, l2)))
}
assert_eq!(twolines("foo\nbar\n"), Ok(("", ("foo", "bar"))));
assert_eq!(twolines("féo\nbar\n"), Ok(("", ("féo", "bar"))));
assert_eq!(twolines("foé\nbar\n"), Ok(("", ("foé", "bar"))));
assert_eq!(twolines("foé\r\nbar\n"), Ok(("", ("foé", "bar"))));
}
#[cfg(feature = "alloc")]
fn issue_717(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
use nom::bytes::complete::{is_not, tag};
use nom::multi::separated_list0;
separated_list0(tag([0x0]), is_not([0x0u8]))(i)
}
mod issue_647 {
use nom::bytes::streaming::tag;
use nom::combinator::complete;
use nom::multi::separated_list0;
use nom::{error::Error, number::streaming::be_f64, Err, IResult};
pub type Input<'a> = &'a [u8];
#[derive(PartialEq, Debug, Clone)]
struct Data {
c: f64,
v: Vec<f64>,
}
fn list<'a, 'b>(
input: Input<'a>,
_cs: &'b f64,
) -> Result<(Input<'a>, Vec<f64>), Err<Error<&'a [u8]>>> {
separated_list0(complete(tag(",")), complete(be_f64))(input)
}
fn data(input: Input<'_>) -> IResult<Input<'_>, Data> {
let (i, c) = be_f64(input)?;
let (i, _) = tag("\n")(i)?;
let (i, v) = list(i, &c)?;
Ok((i, Data { c, v }))
}
}
#[test]
fn issue_848_overflow_incomplete_bits_to_bytes() {
fn take(i: &[u8]) -> IResult<&[u8], &[u8]> {
use nom::bytes::streaming::take;
take(0x2000000000000000_usize)(i)
}
fn parser(i: &[u8]) -> IResult<&[u8], &[u8]> {
use nom::bits::{bits, bytes};
bits(bytes(take))(i)
}
assert_eq!(
parser(&b""[..]),
Err(Err::Failure(nom::error_position!(
&b""[..],
ErrorKind::TooLarge
)))
);
}
#[test]
fn issue_942() {
use nom::error::{ContextError, ParseError};
pub fn parser<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, usize, E> {
use nom::{character::complete::char, error::context, multi::many0_count};
many0_count(context("char_a", char('a')))(i)
}
assert_eq!(parser::<()>("aaa"), Ok(("", 3)));
}
#[test]
fn issue_many_m_n_with_zeros() {
use nom::character::complete::char;
use nom::multi::many_m_n;
let mut parser = many_m_n::<_, _, (), _>(0, 0, char('a'));
assert_eq!(parser("aaa"), Ok(("aaa", vec![])));
}
#[test]
fn issue_1027_convert_error_panic_nonempty() {
use nom::character::complete::char;
use nom::error::{convert_error, VerboseError};
use nom::sequence::pair;
let input = "a";
let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b'))(input);
let err = match result.unwrap_err() {
Err::Error(e) => e,
_ => unreachable!(),
};
let msg = convert_error(input, err);
assert_eq!(
msg,
"0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n"
);
}
#[test]
fn issue_1231_bits_expect_fn_closure() {
use nom::bits::{bits, complete::take};
use nom::error::Error;
use nom::sequence::tuple;
pub fn example(input: &[u8]) -> IResult<&[u8], (u8, u8)> {
bits::<_, _, Error<_>, _, _>(tuple((take(1usize), take(1usize))))(input)
}
assert_eq!(example(&[0xff]), Ok((&b""[..], (1, 1))));
}
#[test]
fn issue_1282_findtoken_char() {
use nom::character::complete::one_of;
use nom::error::Error;
let parser = one_of::<_, _, Error<_>>(&['a', 'b', 'c'][..]);
assert_eq!(parser("aaa"), Ok(("aa", 'a')));
}

View File

@@ -0,0 +1,236 @@
#![cfg(feature = "alloc")]
use nom::{
branch::alt,
bytes::complete::{tag, take},
character::complete::{anychar, char, multispace0, none_of},
combinator::{map, map_opt, map_res, value, verify},
error::ParseError,
multi::{fold_many0, separated_list0},
number::complete::double,
sequence::{delimited, preceded, separated_pair},
IResult, Parser,
};
use std::collections::HashMap;
#[derive(Debug, PartialEq, Clone)]
pub enum JsonValue {
Null,
Bool(bool),
Str(String),
Num(f64),
Array(Vec<JsonValue>),
Object(HashMap<String, JsonValue>),
}
fn boolean(input: &str) -> IResult<&str, bool> {
alt((value(false, tag("false")), value(true, tag("true"))))(input)
}
fn u16_hex(input: &str) -> IResult<&str, u16> {
map_res(take(4usize), |s| u16::from_str_radix(s, 16))(input)
}
fn unicode_escape(input: &str) -> IResult<&str, char> {
map_opt(
alt((
// Not a surrogate
map(verify(u16_hex, |cp| !(0xD800..0xE000).contains(cp)), |cp| {
cp as u32
}),
// See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
map(
verify(
separated_pair(u16_hex, tag("\\u"), u16_hex),
|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low),
),
|(high, low)| {
let high_ten = (high as u32) - 0xD800;
let low_ten = (low as u32) - 0xDC00;
(high_ten << 10) + low_ten + 0x10000
},
),
)),
// Could be probably replaced with .unwrap() or _unchecked due to the verify checks
std::char::from_u32,
)(input)
}
fn character(input: &str) -> IResult<&str, char> {
let (input, c) = none_of("\"")(input)?;
if c == '\\' {
alt((
map_res(anychar, |c| {
Ok(match c {
'"' | '\\' | '/' => c,
'b' => '\x08',
'f' => '\x0C',
'n' => '\n',
'r' => '\r',
't' => '\t',
_ => return Err(()),
})
}),
preceded(char('u'), unicode_escape),
))(input)
} else {
Ok((input, c))
}
}
fn string(input: &str) -> IResult<&str, String> {
delimited(
char('"'),
fold_many0(character, String::new, |mut string, c| {
string.push(c);
string
}),
char('"'),
)(input)
}
fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, O, E>>(f: F) -> impl Parser<&'a str, O, E> {
delimited(multispace0, f, multispace0)
}
fn array(input: &str) -> IResult<&str, Vec<JsonValue>> {
delimited(
char('['),
ws(separated_list0(ws(char(',')), json_value)),
char(']'),
)(input)
}
fn object(input: &str) -> IResult<&str, HashMap<String, JsonValue>> {
map(
delimited(
char('{'),
ws(separated_list0(
ws(char(',')),
separated_pair(string, ws(char(':')), json_value),
)),
char('}'),
),
|key_values| key_values.into_iter().collect(),
)(input)
}
fn json_value(input: &str) -> IResult<&str, JsonValue> {
use JsonValue::*;
alt((
value(Null, tag("null")),
map(boolean, Bool),
map(string, Str),
map(double, Num),
map(array, Array),
map(object, Object),
))(input)
}
fn json(input: &str) -> IResult<&str, JsonValue> {
ws(json_value).parse(input)
}
#[test]
fn json_string() {
assert_eq!(string("\"\""), Ok(("", "".to_string())));
assert_eq!(string("\"abc\""), Ok(("", "abc".to_string())));
assert_eq!(
string("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())),
);
assert_eq!(string("\"\\uD83D\\uDE10\""), Ok(("", "😐".to_string())));
assert!(string("\"").is_err());
assert!(string("\"abc").is_err());
assert!(string("\"\\\"").is_err());
assert!(string("\"\\u123\"").is_err());
assert!(string("\"\\uD800\"").is_err());
assert!(string("\"\\uD800\\uD800\"").is_err());
assert!(string("\"\\uDC00\"").is_err());
}
#[test]
fn json_object() {
use JsonValue::*;
let input = r#"{"a":42,"b":"x"}"#;
let expected = Object(
vec![
("a".to_string(), Num(42.0)),
("b".to_string(), Str("x".to_string())),
]
.into_iter()
.collect(),
);
assert_eq!(json(input), Ok(("", expected)));
}
#[test]
fn json_array() {
use JsonValue::*;
let input = r#"[42,"x"]"#;
let expected = Array(vec![Num(42.0), Str("x".to_string())]);
assert_eq!(json(input), Ok(("", expected)));
}
#[test]
fn json_whitespace() {
use JsonValue::*;
let input = r#"
{
"null" : null,
"true" :true ,
"false": false ,
"number" : 123e4 ,
"string" : " abc 123 " ,
"array" : [ false , 1 , "two" ] ,
"object" : { "a" : 1.0 , "b" : "c" } ,
"empty_array" : [ ] ,
"empty_object" : { }
}
"#;
assert_eq!(
json(input),
Ok((
"",
Object(
vec![
("null".to_string(), Null),
("true".to_string(), Bool(true)),
("false".to_string(), Bool(false)),
("number".to_string(), Num(123e4)),
("string".to_string(), Str(" abc 123 ".to_string())),
(
"array".to_string(),
Array(vec![Bool(false), Num(1.0), Str("two".to_string())])
),
(
"object".to_string(),
Object(
vec![
("a".to_string(), Num(1.0)),
("b".to_string(), Str("c".to_string())),
]
.into_iter()
.collect()
)
),
("empty_array".to_string(), Array(vec![]),),
("empty_object".to_string(), Object(HashMap::new()),),
]
.into_iter()
.collect()
)
))
);
}

View File

@@ -0,0 +1,320 @@
#![allow(dead_code)]
use nom::{
branch::alt,
bytes::streaming::{tag, take},
combinator::{map, map_res},
error::ErrorKind,
multi::many0,
number::streaming::{be_f32, be_u16, be_u32, be_u64},
Err, IResult, Needed,
};
use std::str;
fn mp4_box(input: &[u8]) -> IResult<&[u8], &[u8]> {
match be_u32(input) {
Ok((i, offset)) => {
let sz: usize = offset as usize;
if i.len() >= sz - 4 {
Ok((&i[(sz - 4)..], &i[0..(sz - 4)]))
} else {
Err(Err::Incomplete(Needed::new(offset as usize + 4)))
}
}
Err(e) => Err(e),
}
}
#[cfg_attr(rustfmt, rustfmt_skip)]
#[derive(PartialEq,Eq,Debug)]
struct FileType<'a> {
major_brand: &'a str,
major_brand_version: &'a [u8],
compatible_brands: Vec<&'a str>
}
#[cfg_attr(rustfmt, rustfmt_skip)]
#[allow(non_snake_case)]
#[derive(Debug,Clone)]
pub struct Mvhd32 {
version_flags: u32, // actually:
// version: u8,
// flags: u24 // 3 bytes
created_date: u32,
modified_date: u32,
scale: u32,
duration: u32,
speed: f32,
volume: u16, // actually a 2 bytes decimal
/* 10 bytes reserved */
scaleA: f32,
rotateB: f32,
angleU: f32,
rotateC: f32,
scaleD: f32,
angleV: f32,
positionX: f32,
positionY: f32,
scaleW: f32,
preview: u64,
poster: u32,
selection: u64,
current_time: u32,
track_id: u32
}
#[cfg_attr(rustfmt, rustfmt_skip)]
#[allow(non_snake_case)]
#[derive(Debug,Clone)]
pub struct Mvhd64 {
version_flags: u32, // actually:
// version: u8,
// flags: u24 // 3 bytes
created_date: u64,
modified_date: u64,
scale: u32,
duration: u64,
speed: f32,
volume: u16, // actually a 2 bytes decimal
/* 10 bytes reserved */
scaleA: f32,
rotateB: f32,
angleU: f32,
rotateC: f32,
scaleD: f32,
angleV: f32,
positionX: f32,
positionY: f32,
scaleW: f32,
preview: u64,
poster: u32,
selection: u64,
current_time: u32,
track_id: u32
}
#[cfg_attr(rustfmt, rustfmt_skip)]
fn mvhd32(i: &[u8]) -> IResult<&[u8], MvhdBox> {
let (i, version_flags) = be_u32(i)?;
let (i, created_date) = be_u32(i)?;
let (i, modified_date) = be_u32(i)?;
let (i, scale) = be_u32(i)?;
let (i, duration) = be_u32(i)?;
let (i, speed) = be_f32(i)?;
let (i, volume) = be_u16(i)?; // actually a 2 bytes decimal
let (i, _) = take(10_usize)(i)?;
let (i, scale_a) = be_f32(i)?;
let (i, rotate_b) = be_f32(i)?;
let (i, angle_u) = be_f32(i)?;
let (i, rotate_c) = be_f32(i)?;
let (i, scale_d) = be_f32(i)?;
let (i, angle_v) = be_f32(i)?;
let (i, position_x) = be_f32(i)?;
let (i, position_y) = be_f32(i)?;
let (i, scale_w) = be_f32(i)?;
let (i, preview) = be_u64(i)?;
let (i, poster) = be_u32(i)?;
let (i, selection) = be_u64(i)?;
let (i, current_time) = be_u32(i)?;
let (i, track_id) = be_u32(i)?;
let mvhd_box = MvhdBox::M32(Mvhd32 {
version_flags,
created_date,
modified_date,
scale,
duration,
speed,
volume,
scaleA: scale_a,
rotateB: rotate_b,
angleU: angle_u,
rotateC: rotate_c,
scaleD: scale_d,
angleV: angle_v,
positionX: position_x,
positionY: position_y,
scaleW: scale_w,
preview,
poster,
selection,
current_time,
track_id,
});
Ok((i, mvhd_box))
}
#[cfg_attr(rustfmt, rustfmt_skip)]
fn mvhd64(i: &[u8]) -> IResult<&[u8], MvhdBox> {
let (i, version_flags) = be_u32(i)?;
let (i, created_date) = be_u64(i)?;
let (i, modified_date) = be_u64(i)?;
let (i, scale) = be_u32(i)?;
let (i, duration) = be_u64(i)?;
let (i, speed) = be_f32(i)?;
let (i, volume) = be_u16(i)?; // actually a 2 bytes decimal
let (i, _) = take(10_usize)(i)?;
let (i, scale_a) = be_f32(i)?;
let (i, rotate_b) = be_f32(i)?;
let (i, angle_u) = be_f32(i)?;
let (i, rotate_c) = be_f32(i)?;
let (i, scale_d) = be_f32(i)?;
let (i, angle_v) = be_f32(i)?;
let (i, position_x) = be_f32(i)?;
let (i, position_y) = be_f32(i)?;
let (i, scale_w) = be_f32(i)?;
let (i, preview) = be_u64(i)?;
let (i, poster) = be_u32(i)?;
let (i, selection) = be_u64(i)?;
let (i, current_time) = be_u32(i)?;
let (i, track_id) = be_u32(i)?;
let mvhd_box = MvhdBox::M64(Mvhd64 {
version_flags,
created_date,
modified_date,
scale,
duration,
speed,
volume,
scaleA: scale_a,
rotateB: rotate_b,
angleU: angle_u,
rotateC: rotate_c,
scaleD: scale_d,
angleV: angle_v,
positionX: position_x,
positionY: position_y,
scaleW: scale_w,
preview,
poster,
selection,
current_time,
track_id,
});
Ok((i, mvhd_box))
}
#[derive(Debug, Clone)]
pub enum MvhdBox {
M32(Mvhd32),
M64(Mvhd64),
}
#[derive(Debug, Clone)]
pub enum MoovBox {
Mdra,
Dref,
Cmov,
Rmra,
Iods,
Mvhd(MvhdBox),
Clip,
Trak,
Udta,
}
#[derive(Debug)]
enum MP4BoxType {
Ftyp,
Moov,
Mdat,
Free,
Skip,
Wide,
Mdra,
Dref,
Cmov,
Rmra,
Iods,
Mvhd,
Clip,
Trak,
Udta,
Unknown,
}
#[derive(Debug)]
struct MP4BoxHeader {
length: u32,
tag: MP4BoxType,
}
fn brand_name(input: &[u8]) -> IResult<&[u8], &str> {
map_res(take(4_usize), str::from_utf8)(input)
}
fn filetype_parser(input: &[u8]) -> IResult<&[u8], FileType<'_>> {
let (i, name) = brand_name(input)?;
let (i, version) = take(4_usize)(i)?;
let (i, brands) = many0(brand_name)(i)?;
let ft = FileType {
major_brand: name,
major_brand_version: version,
compatible_brands: brands,
};
Ok((i, ft))
}
fn mvhd_box(input: &[u8]) -> IResult<&[u8], MvhdBox> {
let res = if input.len() < 100 {
Err(Err::Incomplete(Needed::new(100)))
} else if input.len() == 100 {
mvhd32(input)
} else if input.len() == 112 {
mvhd64(input)
} else {
Err(Err::Error(nom::error_position!(input, ErrorKind::TooLarge)))
};
println!("res: {:?}", res);
res
}
fn unknown_box_type(input: &[u8]) -> IResult<&[u8], MP4BoxType> {
Ok((input, MP4BoxType::Unknown))
}
fn box_type(input: &[u8]) -> IResult<&[u8], MP4BoxType> {
alt((
map(tag("ftyp"), |_| MP4BoxType::Ftyp),
map(tag("moov"), |_| MP4BoxType::Moov),
map(tag("mdat"), |_| MP4BoxType::Mdat),
map(tag("free"), |_| MP4BoxType::Free),
map(tag("skip"), |_| MP4BoxType::Skip),
map(tag("wide"), |_| MP4BoxType::Wide),
unknown_box_type,
))(input)
}
// warning, an alt combinator with 9 branches containing a tag combinator
// can make the compilation very slow. Use functions as sub parsers,
// or split into multiple alt parsers if it gets slow
fn moov_type(input: &[u8]) -> IResult<&[u8], MP4BoxType> {
alt((
map(tag("mdra"), |_| MP4BoxType::Mdra),
map(tag("dref"), |_| MP4BoxType::Dref),
map(tag("cmov"), |_| MP4BoxType::Cmov),
map(tag("rmra"), |_| MP4BoxType::Rmra),
map(tag("iods"), |_| MP4BoxType::Iods),
map(tag("mvhd"), |_| MP4BoxType::Mvhd),
map(tag("clip"), |_| MP4BoxType::Clip),
map(tag("trak"), |_| MP4BoxType::Trak),
map(tag("udta"), |_| MP4BoxType::Udta),
))(input)
}
fn box_header(input: &[u8]) -> IResult<&[u8], MP4BoxHeader> {
let (i, length) = be_u32(input)?;
let (i, tag) = box_type(i)?;
Ok((i, MP4BoxHeader { length, tag }))
}
fn moov_header(input: &[u8]) -> IResult<&[u8], MP4BoxHeader> {
let (i, length) = be_u32(input)?;
let (i, tag) = moov_type(i)?;
Ok((i, MP4BoxHeader { length, tag }))
}

View File

@@ -0,0 +1,31 @@
use nom::{
character::complete::{alphanumeric1 as alphanumeric, line_ending as eol},
multi::many0,
sequence::terminated,
IResult,
};
pub fn end_of_line(input: &str) -> IResult<&str, &str> {
if input.is_empty() {
Ok((input, input))
} else {
eol(input)
}
}
pub fn read_line(input: &str) -> IResult<&str, &str> {
terminated(alphanumeric, end_of_line)(input)
}
pub fn read_lines(input: &str) -> IResult<&str, Vec<&str>> {
many0(read_line)(input)
}
#[cfg(feature = "alloc")]
#[test]
fn read_lines_test() {
let res = Ok(("", vec!["Duck", "Dog", "Cow"]));
assert_eq!(read_lines("Duck\nDog\nCow\n"), res);
assert_eq!(read_lines("Duck\nDog\nCow"), res);
}

View File

@@ -0,0 +1,145 @@
#![cfg_attr(feature = "cargo-clippy", allow(unreadable_literal))]
#![cfg(target_pointer_width = "64")]
use nom::bytes::streaming::take;
#[cfg(feature = "alloc")]
use nom::multi::{length_data, many0};
#[cfg(feature = "alloc")]
use nom::number::streaming::be_u64;
use nom::sequence::tuple;
use nom::{Err, IResult, Needed};
// Parser definition
// We request a length that would trigger an overflow if computing consumed + requested
fn parser02(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
tuple((take(1_usize), take(18446744073709551615_usize)))(i)
}
#[test]
fn overflow_incomplete_tuple() {
assert_eq!(
parser02(&b"3"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551615)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_length_bytes() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many0(length_data(be_u64))(i)
}
// Trigger an overflow in length_data
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xff"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551615)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_many0() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many0(length_data(be_u64))(i)
}
// Trigger an overflow in many0
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551599)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_many1() {
use nom::multi::many1;
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many1(length_data(be_u64))(i)
}
// Trigger an overflow in many1
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551599)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_many_till() {
use nom::{bytes::complete::tag, multi::many_till};
fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> {
many_till(length_data(be_u64), tag("abc"))(i)
}
// Trigger an overflow in many_till
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551599)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_many_m_n() {
use nom::multi::many_m_n;
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many_m_n(2, 4, length_data(be_u64))(i)
}
// Trigger an overflow in many_m_n
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551599)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_count() {
use nom::multi::count;
fn counter(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
count(length_data(be_u64), 2)(i)
}
assert_eq!(
counter(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551599)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_length_count() {
use nom::multi::length_count;
use nom::number::streaming::be_u8;
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
length_count(be_u8, length_data(be_u64))(i)
}
assert_eq!(
multi(&b"\x04\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xee"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551598)))
);
}
#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_length_data() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
many0(length_data(be_u64))(i)
}
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xff"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551615)))
);
}

View File

@@ -0,0 +1,31 @@
#![allow(dead_code)]
// #![allow(unused_variables)]
use std::str;
use nom::bytes::complete::is_not;
use nom::character::complete::char;
use nom::combinator::{map, map_res};
use nom::multi::fold_many0;
use nom::sequence::delimited;
use nom::IResult;
fn atom<'a>(_tomb: &'a mut ()) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], String> {
move |input| {
map(
map_res(is_not(" \t\r\n"), str::from_utf8),
ToString::to_string,
)(input)
}
}
// FIXME: should we support the use case of borrowing data mutably in a parser?
fn list<'a>(i: &'a [u8], tomb: &'a mut ()) -> IResult<&'a [u8], String> {
delimited(
char('('),
fold_many0(atom(tomb), String::new, |acc: String, next: String| {
acc + next.as_str()
}),
char(')'),
)(i)
}