aboutsummaryrefslogtreecommitdiff
path: root/syn/src/discouraged.rs
diff options
context:
space:
mode:
Diffstat (limited to 'syn/src/discouraged.rs')
-rw-r--r--syn/src/discouraged.rs195
1 files changed, 195 insertions, 0 deletions
diff --git a/syn/src/discouraged.rs b/syn/src/discouraged.rs
new file mode 100644
index 0000000..29d1006
--- /dev/null
+++ b/syn/src/discouraged.rs
@@ -0,0 +1,195 @@
+//! Extensions to the parsing API with niche applicability.
+
+use super::*;
+
+/// Extensions to the `ParseStream` API to support speculative parsing.
+pub trait Speculative {
+ /// Advance this parse stream to the position of a forked parse stream.
+ ///
+ /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
+ /// parse stream, perform some speculative parsing, then join the original
+ /// stream to the fork to "commit" the parsing from the fork to the main
+ /// stream.
+ ///
+ /// If you can avoid doing this, you should, as it limits the ability to
+ /// generate useful errors. That said, it is often the only way to parse
+ /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
+ /// is that when the fork fails to parse an `A`, it's impossible to tell
+ /// whether that was because of a syntax error and the user meant to provide
+ /// an `A`, or that the `A`s are finished and its time to start parsing
+ /// `B`s. Use with care.
+ ///
+ /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
+ /// parsing `B*` and removing the leading members of `A` from the
+ /// repetition, bypassing the need to involve the downsides associated with
+ /// speculative parsing.
+ ///
+ /// [`ParseStream::fork`]: ParseBuffer::fork
+ ///
+ /// # Example
+ ///
+ /// There has been chatter about the possibility of making the colons in the
+ /// turbofish syntax like `path::to::<T>` no longer required by accepting
+ /// `path::to<T>` in expression position. Specifically, according to [RFC
+ /// 2544], [`PathSegment`] parsing should always try to consume a following
+ /// `<` token as the start of generic arguments, and reset to the `<` if
+ /// that fails (e.g. the token is acting as a less-than operator).
+ ///
+ /// This is the exact kind of parsing behavior which requires the "fork,
+ /// try, commit" behavior that [`ParseStream::fork`] discourages. With
+ /// `advance_to`, we can avoid having to parse the speculatively parsed
+ /// content a second time.
+ ///
+ /// This change in behavior can be implemented in syn by replacing just the
+ /// `Parse` implementation for `PathSegment`:
+ ///
+ /// ```
+ /// # use syn::ext::IdentExt;
+ /// use syn::parse::discouraged::Speculative;
+ /// # use syn::parse::{Parse, ParseStream};
+ /// # use syn::{Ident, PathArguments, Result, Token};
+ ///
+ /// pub struct PathSegment {
+ /// pub ident: Ident,
+ /// pub arguments: PathArguments,
+ /// }
+ /// #
+ /// # impl<T> From<T> for PathSegment
+ /// # where
+ /// # T: Into<Ident>,
+ /// # {
+ /// # fn from(ident: T) -> Self {
+ /// # PathSegment {
+ /// # ident: ident.into(),
+ /// # arguments: PathArguments::None,
+ /// # }
+ /// # }
+ /// # }
+ ///
+ /// impl Parse for PathSegment {
+ /// fn parse(input: ParseStream) -> Result<Self> {
+ /// if input.peek(Token![super])
+ /// || input.peek(Token![self])
+ /// || input.peek(Token![Self])
+ /// || input.peek(Token![crate])
+ /// || input.peek(Token![extern])
+ /// {
+ /// let ident = input.call(Ident::parse_any)?;
+ /// return Ok(PathSegment::from(ident));
+ /// }
+ ///
+ /// let ident = input.parse()?;
+ /// if input.peek(Token![::]) && input.peek3(Token![<]) {
+ /// return Ok(PathSegment {
+ /// ident,
+ /// arguments: PathArguments::AngleBracketed(input.parse()?),
+ /// });
+ /// }
+ /// if input.peek(Token![<]) && !input.peek(Token![<=]) {
+ /// let fork = input.fork();
+ /// if let Ok(arguments) = fork.parse() {
+ /// input.advance_to(&fork);
+ /// return Ok(PathSegment {
+ /// ident,
+ /// arguments: PathArguments::AngleBracketed(arguments),
+ /// });
+ /// }
+ /// }
+ /// Ok(PathSegment::from(ident))
+ /// }
+ /// }
+ ///
+ /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
+ /// ```
+ ///
+ /// # Drawbacks
+ ///
+ /// The main drawback of this style of speculative parsing is in error
+ /// presentation. Even if the lookahead is the "correct" parse, the error
+ /// that is shown is that of the "fallback" parse. To use the same example
+ /// as the turbofish above, take the following unfinished "turbofish":
+ ///
+ /// ```text
+ /// let _ = f<&'a fn(), for<'a> serde::>();
+ /// ```
+ ///
+ /// If this is parsed as generic arguments, we can provide the error message
+ ///
+ /// ```text
+ /// error: expected identifier
+ /// --> src.rs:L:C
+ /// |
+ /// L | let _ = f<&'a fn(), for<'a> serde::>();
+ /// | ^
+ /// ```
+ ///
+ /// but if parsed using the above speculative parsing, it falls back to
+ /// assuming that the `<` is a less-than when it fails to parse the generic
+ /// arguments, and tries to interpret the `&'a` as the start of a labelled
+ /// loop, resulting in the much less helpful error
+ ///
+ /// ```text
+ /// error: expected `:`
+ /// --> src.rs:L:C
+ /// |
+ /// L | let _ = f<&'a fn(), for<'a> serde::>();
+ /// | ^^
+ /// ```
+ ///
+ /// This can be mitigated with various heuristics (two examples: show both
+ /// forks' parse errors, or show the one that consumed more tokens), but
+ /// when you can control the grammar, sticking to something that can be
+ /// parsed LL(3) and without the LL(*) speculative parsing this makes
+ /// possible, displaying reasonable errors becomes much more simple.
+ ///
+ /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
+ /// [`PathSegment`]: crate::PathSegment
+ ///
+ /// # Performance
+ ///
+ /// This method performs a cheap fixed amount of work that does not depend
+ /// on how far apart the two streams are positioned.
+ ///
+ /// # Panics
+ ///
+ /// The forked stream in the argument of `advance_to` must have been
+ /// obtained by forking `self`. Attempting to advance to any other stream
+ /// will cause a panic.
+ fn advance_to(&self, fork: &Self);
+}
+
+impl<'a> Speculative for ParseBuffer<'a> {
+ fn advance_to(&self, fork: &Self) {
+ if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
+ panic!("Fork was not derived from the advancing parse stream");
+ }
+
+ let (self_unexp, self_sp) = inner_unexpected(self);
+ let (fork_unexp, fork_sp) = inner_unexpected(fork);
+ if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
+ match (fork_sp, self_sp) {
+ // Unexpected set on the fork, but not on `self`, copy it over.
+ (Some(span), None) => {
+ self_unexp.set(Unexpected::Some(span));
+ }
+ // Unexpected unset. Use chain to propagate errors from fork.
+ (None, None) => {
+ fork_unexp.set(Unexpected::Chain(self_unexp));
+
+ // Ensure toplevel 'unexpected' tokens from the fork don't
+ // bubble up the chain by replacing the root `unexpected`
+ // pointer, only 'unexpected' tokens from existing group
+ // parsers should bubble.
+ fork.unexpected
+ .set(Some(Rc::new(Cell::new(Unexpected::None))));
+ }
+ // Unexpected has been set on `self`. No changes needed.
+ (_, Some(_)) => {}
+ }
+ }
+
+ // See comment on `cell` in the struct definition.
+ self.cell
+ .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) })
+ }
+}