195 lines
7.8 KiB
Rust
195 lines
7.8 KiB
Rust
//! Extensions to the parsing API with niche applicability.
|
|
|
|
use super::*;
|
|
|
|
/// Extensions to the `ParseStream` API to support speculative parsing.
|
|
pub trait Speculative {
|
|
/// Advance this parse stream to the position of a forked parse stream.
|
|
///
|
|
/// This is the opposite operation to [`ParseStream::fork`]. You can fork a
|
|
/// parse stream, perform some speculative parsing, then join the original
|
|
/// stream to the fork to "commit" the parsing from the fork to the main
|
|
/// stream.
|
|
///
|
|
/// If you can avoid doing this, you should, as it limits the ability to
|
|
/// generate useful errors. That said, it is often the only way to parse
|
|
/// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
|
|
/// is that when the fork fails to parse an `A`, it's impossible to tell
|
|
/// whether that was because of a syntax error and the user meant to provide
|
|
/// an `A`, or that the `A`s are finished and it's time to start parsing
|
|
/// `B`s. Use with care.
|
|
///
|
|
/// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
|
|
/// parsing `B*` and removing the leading members of `A` from the
|
|
/// repetition, bypassing the need to involve the downsides associated with
|
|
/// speculative parsing.
|
|
///
|
|
/// [`ParseStream::fork`]: ParseBuffer::fork
|
|
///
|
|
/// # Example
|
|
///
|
|
/// There has been chatter about the possibility of making the colons in the
|
|
/// turbofish syntax like `path::to::<T>` no longer required by accepting
|
|
/// `path::to<T>` in expression position. Specifically, according to [RFC
|
|
/// 2544], [`PathSegment`] parsing should always try to consume a following
|
|
/// `<` token as the start of generic arguments, and reset to the `<` if
|
|
/// that fails (e.g. the token is acting as a less-than operator).
|
|
///
|
|
/// This is the exact kind of parsing behavior which requires the "fork,
|
|
/// try, commit" behavior that [`ParseStream::fork`] discourages. With
|
|
/// `advance_to`, we can avoid having to parse the speculatively parsed
|
|
/// content a second time.
|
|
///
|
|
/// This change in behavior can be implemented in syn by replacing just the
|
|
/// `Parse` implementation for `PathSegment`:
|
|
///
|
|
/// ```
|
|
/// # use syn::ext::IdentExt;
|
|
/// use syn::parse::discouraged::Speculative;
|
|
/// # use syn::parse::{Parse, ParseStream};
|
|
/// # use syn::{Ident, PathArguments, Result, Token};
|
|
///
|
|
/// pub struct PathSegment {
|
|
/// pub ident: Ident,
|
|
/// pub arguments: PathArguments,
|
|
/// }
|
|
/// #
|
|
/// # impl<T> From<T> for PathSegment
|
|
/// # where
|
|
/// # T: Into<Ident>,
|
|
/// # {
|
|
/// # fn from(ident: T) -> Self {
|
|
/// # PathSegment {
|
|
/// # ident: ident.into(),
|
|
/// # arguments: PathArguments::None,
|
|
/// # }
|
|
/// # }
|
|
/// # }
|
|
///
|
|
/// impl Parse for PathSegment {
|
|
/// fn parse(input: ParseStream) -> Result<Self> {
|
|
/// if input.peek(Token![super])
|
|
/// || input.peek(Token![self])
|
|
/// || input.peek(Token![Self])
|
|
/// || input.peek(Token![crate])
|
|
/// {
|
|
/// let ident = input.call(Ident::parse_any)?;
|
|
/// return Ok(PathSegment::from(ident));
|
|
/// }
|
|
///
|
|
/// let ident = input.parse()?;
|
|
/// if input.peek(Token![::]) && input.peek3(Token![<]) {
|
|
/// return Ok(PathSegment {
|
|
/// ident,
|
|
/// arguments: PathArguments::AngleBracketed(input.parse()?),
|
|
/// });
|
|
/// }
|
|
/// if input.peek(Token![<]) && !input.peek(Token![<=]) {
|
|
/// let fork = input.fork();
|
|
/// if let Ok(arguments) = fork.parse() {
|
|
/// input.advance_to(&fork);
|
|
/// return Ok(PathSegment {
|
|
/// ident,
|
|
/// arguments: PathArguments::AngleBracketed(arguments),
|
|
/// });
|
|
/// }
|
|
/// }
|
|
/// Ok(PathSegment::from(ident))
|
|
/// }
|
|
/// }
|
|
///
|
|
/// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
|
|
/// ```
|
|
///
|
|
/// # Drawbacks
|
|
///
|
|
/// The main drawback of this style of speculative parsing is in error
|
|
/// presentation. Even if the lookahead is the "correct" parse, the error
|
|
/// that is shown is that of the "fallback" parse. To use the same example
|
|
/// as the turbofish above, take the following unfinished "turbofish":
|
|
///
|
|
/// ```text
|
|
/// let _ = f<&'a fn(), for<'a> serde::>();
|
|
/// ```
|
|
///
|
|
/// If this is parsed as generic arguments, we can provide the error message
|
|
///
|
|
/// ```text
|
|
/// error: expected identifier
|
|
/// --> src.rs:L:C
|
|
/// |
|
|
/// L | let _ = f<&'a fn(), for<'a> serde::>();
|
|
/// | ^
|
|
/// ```
|
|
///
|
|
/// but if parsed using the above speculative parsing, it falls back to
|
|
/// assuming that the `<` is a less-than when it fails to parse the generic
|
|
/// arguments, and tries to interpret the `&'a` as the start of a labelled
|
|
/// loop, resulting in the much less helpful error
|
|
///
|
|
/// ```text
|
|
/// error: expected `:`
|
|
/// --> src.rs:L:C
|
|
/// |
|
|
/// L | let _ = f<&'a fn(), for<'a> serde::>();
|
|
/// | ^^
|
|
/// ```
|
|
///
|
|
/// This can be mitigated with various heuristics (two examples: show both
|
|
/// forks' parse errors, or show the one that consumed more tokens), but
|
|
/// when you can control the grammar, sticking to something that can be
|
|
/// parsed LL(3) and without the LL(*) speculative parsing this makes
|
|
/// possible, displaying reasonable errors becomes much more simple.
|
|
///
|
|
/// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
|
|
/// [`PathSegment`]: crate::PathSegment
|
|
///
|
|
/// # Performance
|
|
///
|
|
/// This method performs a cheap fixed amount of work that does not depend
|
|
/// on how far apart the two streams are positioned.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// The forked stream in the argument of `advance_to` must have been
|
|
/// obtained by forking `self`. Attempting to advance to any other stream
|
|
/// will cause a panic.
|
|
fn advance_to(&self, fork: &Self);
|
|
}
|
|
|
|
impl<'a> Speculative for ParseBuffer<'a> {
|
|
fn advance_to(&self, fork: &Self) {
|
|
if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
|
|
panic!("Fork was not derived from the advancing parse stream");
|
|
}
|
|
|
|
let (self_unexp, self_sp) = inner_unexpected(self);
|
|
let (fork_unexp, fork_sp) = inner_unexpected(fork);
|
|
if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
|
|
match (fork_sp, self_sp) {
|
|
// Unexpected set on the fork, but not on `self`, copy it over.
|
|
(Some(span), None) => {
|
|
self_unexp.set(Unexpected::Some(span));
|
|
}
|
|
// Unexpected unset. Use chain to propagate errors from fork.
|
|
(None, None) => {
|
|
fork_unexp.set(Unexpected::Chain(self_unexp));
|
|
|
|
// Ensure toplevel 'unexpected' tokens from the fork don't
|
|
// bubble up the chain by replacing the root `unexpected`
|
|
// pointer, only 'unexpected' tokens from existing group
|
|
// parsers should bubble.
|
|
fork.unexpected
|
|
.set(Some(Rc::new(Cell::new(Unexpected::None))));
|
|
}
|
|
// Unexpected has been set on `self`. No changes needed.
|
|
(_, Some(_)) => {}
|
|
}
|
|
}
|
|
|
|
// See comment on `cell` in the struct definition.
|
|
self.cell
|
|
.set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
|
|
}
|
|
}
|