(index<- )        ./libsyntax/ext/tt/macro_parser.rs

    git branch:    * master           5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
    modified:    Fri May  9 13:02:28 2014

   1  // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
   2  // file at the top-level directory of this distribution and at
   3  // http://rust-lang.org/COPYRIGHT.
   4  //
   5  // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6  // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7  // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8  // option. This file may not be copied, modified, or distributed
   9  // except according to those terms.
  10
  11  // Earley-like parser for macros.
  12
  13  use ast;
  14  use ast::{Matcher, MatchTok, MatchSeq, MatchNonterminal, Ident};
  15  use codemap::{BytePos, mk_sp};
  16  use codemap;
  17  use parse::lexer::*; //resolve bug?
  18  use parse::ParseSess;
  19  use parse::attr::ParserAttr;
  20  use parse::parser::{LifetimeAndTypesWithoutColons, Parser};
  21  use parse::token::{Token, EOF, Nonterminal};
  22  use parse::token;
  23
  24  use std::rc::Rc;
  25  use collections::HashMap;
  26
  27  /* This is an Earley-like parser, without support for in-grammar nonterminals,
  28  only by calling out to the main rust parser for named nonterminals (which it
  29  commits to fully when it hits one in a grammar). This means that there are no
  30  completer or predictor rules, and therefore no need to store one column per
  31  token: instead, there's a set of current Earley items and a set of next
  32  ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
  33  pathological cases, is worse than traditional Earley parsing, but it's an
  34  easier fit for Macro-by-Example-style rules, and I think the overhead is
  35  lower. (In order to prevent the pathological case, we'd need to lazily
  36  construct the resulting `NamedMatch`es at the very end. It'd be a pain,
  37  and require more memory to keep around old items, but it would also save
  38  overhead)*/
  39
  40  /* Quick intro to how the parser works:
  41
  42  A 'position' is a dot in the middle of a matcher, usually represented as a
  43  dot. For example `Â· a $( a )* a b` is a position, as is `a $( Â· a )* a b`.
  44
  45  The parser walks through the input a character at a time, maintaining a list
  46  of items consistent with the current position in the input string: `cur_eis`.
  47
  48  As it processes them, it fills up `eof_eis` with items that would be valid if
  49  the macro invocation is now over, `bb_eis` with items that are waiting on
  50  a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting
  51  on the a particular token. Most of the logic concerns moving the Â· through the
  52  repetitions indicated by Kleene stars. It only advances or calls out to the
  53  real Rust parser when no `cur_eis` items remain
  54
  55  Example: Start parsing `a a a a b` against [Â· a $( a )* a b].
  56
  57  Remaining input: `a a a a b`
  58  next_eis: [Â· a $( a )* a b]
  59
  60  - - - Advance over an `a`. - - -
  61
  62  Remaining input: `a a a b`
  63  cur: [a Â· $( a )* a b]
  64  Descend/Skip (first item).
  65  next: [a $( Â· a )* a b]  [a $( a )* Â· a b].
  66
  67  - - - Advance over an `a`. - - -
  68
  69  Remaining input: `a a b`
  70  cur: [a $( a Â· )* a b]  next: [a $( a )* a Â· b]
  71  Finish/Repeat (first item)
  72  next: [a $( a )* Â· a b]  [a $( Â· a )* a b]  [a $( a )* a Â· b]
  73
  74  - - - Advance over an `a`. - - - (this looks exactly like the last step)
  75
  76  Remaining input: `a b`
  77  cur: [a $( a Â· )* a b]  next: [a $( a )* a Â· b]
  78  Finish/Repeat (first item)
  79  next: [a $( a )* Â· a b]  [a $( Â· a )* a b]  [a $( a )* a Â· b]
  80
  81  - - - Advance over an `a`. - - - (this looks exactly like the last step)
  82
  83  Remaining input: `b`
  84  cur: [a $( a Â· )* a b]  next: [a $( a )* a Â· b]
  85  Finish/Repeat (first item)
  86  next: [a $( a )* Â· a b]  [a $( Â· a )* a b]
  87
  88  - - - Advance over a `b`. - - -
  89
  90  Remaining input: ``
  91  eof: [a $( a )* a b Â·]
  92
  93   */
  94
  95
  96  /* to avoid costly uniqueness checks, we require that `MatchSeq` always has a
  97  nonempty body. */
  98
  99
100  #[deriving(Clone)]
101  pub struct MatcherPos {
102      elts: Vec<ast::Matcher> , // maybe should be <'>? Need to understand regions.
103      sep: Option<Token>,
104      idx: uint,
105      up: Option<Box<MatcherPos>>,
106      matches: Vec<Vec<Rc<NamedMatch>>>,
107      match_lo: uint, match_hi: uint,
108      sp_lo: BytePos,
109  }
110
111  pub fn count_names(ms: &[Matcher]) -> uint {
112      ms.iter().fold(0, |ct, m| {
113          ct + match m.node {
114              MatchTok(_) => 0u,
115              MatchSeq(ref more_ms, _, _, _, _) => {
116                  count_names(more_ms.as_slice())
117              }
118              MatchNonterminal(_, _, _) => 1u
119          }})
120  }
121
122  pub fn initial_matcher_pos(ms: Vec<Matcher> , sep: Option<Token>, lo: BytePos)
123                             -> Box<MatcherPos> {
124      let mut match_idx_hi = 0u;
125      for elt in ms.iter() {
126          match elt.node {
127              MatchTok(_) => (),
128              MatchSeq(_,_,_,_,hi) => {
129                  match_idx_hi = hi;       // it is monotonic...
130              }
131              MatchNonterminal(_,_,pos) => {
132                  match_idx_hi = pos+1u;  // ...so latest is highest
133              }
134          }
135      }
136      let matches = Vec::from_fn(count_names(ms.as_slice()), |_i| Vec::new());
137      box MatcherPos {
138          elts: ms,
139          sep: sep,
140          idx: 0u,
141          up: None,
142          matches: matches,
143          match_lo: 0u,
144          match_hi: match_idx_hi,
145          sp_lo: lo
146      }
147  }
148
149  // NamedMatch is a pattern-match result for a single ast::MatchNonterminal:
150  // so it is associated with a single ident in a parse, and all
151  // MatchedNonterminal's in the NamedMatch have the same nonterminal type
152  // (expr, item, etc). All the leaves in a single NamedMatch correspond to a
153  // single matcher_nonterminal in the ast::Matcher that produced it.
154  //
155  // It should probably be renamed, it has more or less exact correspondence to
156  // ast::match nodes, and the in-memory structure of a particular NamedMatch
157  // represents the match that occurred when a particular subset of an
158  // ast::match -- those ast::Matcher nodes leading to a single
159  // MatchNonterminal -- was applied to a particular token tree.
160  //
161  // The width of each MatchedSeq in the NamedMatch, and the identity of the
162  // MatchedNonterminal's, will depend on the token tree it was applied to: each
163  // MatchedSeq corresponds to a single MatchSeq in the originating
164  // ast::Matcher. The depth of the NamedMatch structure will therefore depend
165  // only on the nesting depth of ast::MatchSeq's in the originating
166  // ast::Matcher it was derived from.
167
168  pub enum NamedMatch {
169      MatchedSeq(Vec<Rc<NamedMatch>>, codemap::Span),
170      MatchedNonterminal(Nonterminal)
171  }
172
173  pub fn nameize(p_s: &ParseSess, ms: &[Matcher], res: &[Rc<NamedMatch>])
174              -> HashMap<Ident, Rc<NamedMatch>> {
175      fn n_rec(p_s: &ParseSess, m: &Matcher, res: &[Rc<NamedMatch>],
176               ret_val: &mut HashMap<Ident, Rc<NamedMatch>>) {
177          match *m {
178            codemap::Spanned {node: MatchTok(_), .. } => (),
179            codemap::Spanned {node: MatchSeq(ref more_ms, _, _, _, _), .. } => {
180              for next_m in more_ms.iter() {
181                  n_rec(p_s, next_m, res, ret_val)
182              };
183            }
184            codemap::Spanned {
185                  node: MatchNonterminal(bind_name, _, idx),
186                  span
187            } => {
188              if ret_val.contains_key(&bind_name) {
189                  let string = token::get_ident(bind_name);
190                  p_s.span_diagnostic
191                     .span_fatal(span, "duplicated bind name: " + string.get())
192              }
193              ret_val.insert(bind_name, res[idx].clone());
194            }
195          }
196      }
197      let mut ret_val = HashMap::new();
198      for m in ms.iter() { n_rec(p_s, m, res, &mut ret_val) }
199      ret_val
200  }
201
202  pub enum ParseResult {
203      Success(HashMap<Ident, Rc<NamedMatch>>),
204      Failure(codemap::Span, StrBuf),
205      Error(codemap::Span, StrBuf)
206  }
207
208  pub fn parse_or_else(sess: &ParseSess,
209                       cfg: ast::CrateConfig,
210                       rdr: TtReader,
211                       ms: Vec<Matcher> )
212                       -> HashMap<Ident, Rc<NamedMatch>> {
213      match parse(sess, cfg, rdr, ms.as_slice()) {
214          Success(m) => m,
215          Failure(sp, str) => {
216              sess.span_diagnostic.span_fatal(sp, str.as_slice())
217          }
218          Error(sp, str) => {
219              sess.span_diagnostic.span_fatal(sp, str.as_slice())
220          }
221      }
222  }
223
224  // perform a token equality check, ignoring syntax context (that is, an unhygienic comparison)
225  pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool {
226      match (t1,t2) {
227          (&token::IDENT(id1,_),&token::IDENT(id2,_))
228          | (&token::LIFETIME(id1),&token::LIFETIME(id2)) =>
229              id1.name == id2.name,
230          _ => *t1 == *t2
231      }
232  }
233
234  pub fn parse(sess: &ParseSess,
235               cfg: ast::CrateConfig,
236               mut rdr: TtReader,
237               ms: &[Matcher])
238               -> ParseResult {
239      let mut cur_eis = Vec::new();
240      cur_eis.push(initial_matcher_pos(ms.iter()
241                                         .map(|x| (*x).clone())
242                                         .collect(),
243                                       None,
244                                       rdr.peek().sp.lo));
245
246      loop {
247          let mut bb_eis = Vec::new(); // black-box parsed by parser.rs
248          let mut next_eis = Vec::new(); // or proceed normally
249          let mut eof_eis = Vec::new();
250
251          let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
252
253          /* we append new items to this while we go */
254          loop {
255              let ei = match cur_eis.pop() {
256                  None => break, /* for each Earley Item */
257                  Some(ei) => ei,
258              };
259
260              let idx = ei.idx;
261              let len = ei.elts.len();
262
263              /* at end of sequence */
264              if idx >= len {
265                  // can't move out of `match`es, so:
266                  if ei.up.is_some() {
267                      // hack: a matcher sequence is repeating iff it has a
268                      // parent (the top level is just a container)
269
270
271                      // disregard separator, try to go up
272                      // (remove this condition to make trailing seps ok)
273                      if idx == len {
274                          // pop from the matcher position
275
276                          let mut new_pos = ei.up.clone().unwrap();
277
278                          // update matches (the MBE "parse tree") by appending
279                          // each tree as a subtree.
280
281                          // I bet this is a perf problem: we're preemptively
282                          // doing a lot of array work that will get thrown away
283                          // most of the time.
284
285                          // Only touch the binders we have actually bound
286                          for idx in range(ei.match_lo, ei.match_hi) {
287                              let sub = (*ei.matches.get(idx)).clone();
288                              new_pos.matches
289                                     .get_mut(idx)
290                                     .push(Rc::new(MatchedSeq(sub, mk_sp(ei.sp_lo,
291                                                                         sp.hi))));
292                          }
293
294                          new_pos.idx += 1;
295                          cur_eis.push(new_pos);
296                      }
297
298                      // can we go around again?
299
300                      // the *_t vars are workarounds for the lack of unary move
301                      match ei.sep {
302                        Some(ref t) if idx == len => { // we need a separator
303                          // i'm conflicted about whether this should be hygienic....
304                          // though in this case, if the separators are never legal
305                          // idents, it shouldn't matter.
306                          if token_name_eq(&tok, t) { //pass the separator
307                              let mut ei_t = ei.clone();
308                              ei_t.idx += 1;
309                              next_eis.push(ei_t);
310                          }
311                        }
312                        _ => { // we don't need a separator
313                          let mut ei_t = ei;
314                          ei_t.idx = 0;
315                          cur_eis.push(ei_t);
316                        }
317                      }
318                  } else {
319                      eof_eis.push(ei);
320                  }
321              } else {
322                  match ei.elts.get(idx).node.clone() {
323                    /* need to descend into sequence */
324                    MatchSeq(ref matchers, ref sep, zero_ok,
325                             match_idx_lo, match_idx_hi) => {
326                      if zero_ok {
327                          let mut new_ei = ei.clone();
328                          new_ei.idx += 1u;
329                          //we specifically matched zero repeats.
330                          for idx in range(match_idx_lo, match_idx_hi) {
331                              new_ei.matches
332                                    .get_mut(idx)
333                                    .push(Rc::new(MatchedSeq(Vec::new(), sp)));
334                          }
335
336                          cur_eis.push(new_ei);
337                      }
338
339                      let matches = Vec::from_elem(ei.matches.len(), Vec::new());
340                      let ei_t = ei;
341                      cur_eis.push(box MatcherPos {
342                          elts: (*matchers).clone(),
343                          sep: (*sep).clone(),
344                          idx: 0u,
345                          up: Some(ei_t),
346                          matches: matches,
347                          match_lo: match_idx_lo, match_hi: match_idx_hi,
348                          sp_lo: sp.lo
349                      });
350                    }
351                    MatchNonterminal(_,_,_) => { bb_eis.push(ei) }
352                    MatchTok(ref t) => {
353                      let mut ei_t = ei.clone();
354                      //if (token_name_eq(t,&tok)) {
355                      if token::mtwt_token_eq(t,&tok) {
356                          ei_t.idx += 1;
357                          next_eis.push(ei_t);
358                      }
359                    }
360                  }
361              }
362          }
363
364          /* error messages here could be improved with links to orig. rules */
365          if token_name_eq(&tok, &EOF) {
366              if eof_eis.len() == 1u {
367                  let mut v = Vec::new();
368                  for dv in eof_eis.get_mut(0).matches.mut_iter() {
369                      v.push(dv.pop().unwrap());
370                  }
371                  return Success(nameize(sess, ms, v.as_slice()));
372              } else if eof_eis.len() > 1u {
373                  return Error(sp, "ambiguity: multiple successful parses".to_strbuf());
374              } else {
375                  return Failure(sp, "unexpected end of macro invocation".to_strbuf());
376              }
377          } else {
378              if (bb_eis.len() > 0u && next_eis.len() > 0u)
379                  || bb_eis.len() > 1u {
380                  let nts = bb_eis.iter().map(|ei| {
381                      match ei.elts.get(ei.idx).node {
382                        MatchNonterminal(bind, name, _) => {
383                          (format!("{} ('{}')",
384                                  token::get_ident(name),
385                                  token::get_ident(bind))).to_strbuf()
386                        }
387                        _ => fail!()
388                      } }).collect::<Vec<StrBuf>>().connect(" or ");
389                  return Error(sp, format!(
390                      "local ambiguity: multiple parsing options: \
391                       built-in NTs {} or {} other options.",
392                      nts, next_eis.len()).to_strbuf());
393              } else if bb_eis.len() == 0u && next_eis.len() == 0u {
394                  return Failure(sp, format!("no rules expected the token `{}`",
395                              token::to_str(&tok)).to_strbuf());
396              } else if next_eis.len() > 0u {
397                  /* Now process the next token */
398                  while next_eis.len() > 0u {
399                      cur_eis.push(next_eis.pop().unwrap());
400                  }
401                  rdr.next_token();
402              } else /* bb_eis.len() == 1 */ {
403                  let mut rust_parser = Parser(sess, cfg.clone(), box rdr.clone());
404
405                  let mut ei = bb_eis.pop().unwrap();
406                  match ei.elts.get(ei.idx).node {
407                    MatchNonterminal(_, name, idx) => {
408                      let name_string = token::get_ident(name);
409                      ei.matches.get_mut(idx).push(Rc::new(MatchedNonterminal(
410                          parse_nt(&mut rust_parser, name_string.get()))));
411                      ei.idx += 1u;
412                    }
413                    _ => fail!()
414                  }
415                  cur_eis.push(ei);
416
417                  for _ in range(0, rust_parser.tokens_consumed) {
418                      let _ = rdr.next_token();
419                  }
420              }
421          }
422
423          assert!(cur_eis.len() > 0u);
424      }
425  }
426
427  pub fn parse_nt(p: &mut Parser, name: &str) -> Nonterminal {
428      match name {
429        "item" => match p.parse_item(Vec::new()) {
430          Some(i) => token::NtItem(i),
431          None => p.fatal("expected an item keyword")
432        },
433        "block" => token::NtBlock(p.parse_block()),
434        "stmt" => token::NtStmt(p.parse_stmt(Vec::new())),
435        "pat" => token::NtPat(p.parse_pat()),
436        "expr" => token::NtExpr(p.parse_expr()),
437        "ty" => token::NtTy(p.parse_ty(false /* no need to disambiguate*/)),
438        // this could be handled like a token, since it is one
439        "ident" => match p.token {
440          token::IDENT(sn,b) => { p.bump(); token::NtIdent(box sn,b) }
441          _ => {
442              let token_str = token::to_str(&p.token);
443              p.fatal((format!("expected ident, found {}",
444                               token_str.as_slice())).as_slice())
445          }
446        },
447        "path" => {
448          token::NtPath(box p.parse_path(LifetimeAndTypesWithoutColons).path)
449        }
450        "meta" => token::NtMeta(p.parse_meta_item()),
451        "tt" => {
452          p.quote_depth += 1u; //but in theory, non-quoted tts might be useful
453          let res = token::NtTT(@p.parse_token_tree());
454          p.quote_depth -= 1u;
455          res
456        }
457        "matchers" => token::NtMatchers(p.parse_matchers()),
458        _ => p.fatal("unsupported builtin nonterminal parser: ".to_owned() + name)
459      }
460  }

libsyntax/ext/tt/macro_parser.rs:224:95-224:95 -fn- definition:
// perform a token equality check, ignoring syntax context (that is, an unhygienic comparison)
pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool {
    match (t1,t2) {
references:- 2
364:         /* error messages here could be improved with links to orig. rules */
365:         if token_name_eq(&tok, &EOF) {
366:             if eof_eis.len() == 1u {

libsyntax/ext/tt/macro_parser.rs:100:19-100:19 -struct- definition:
pub struct MatcherPos {
    elts: Vec<ast::Matcher> , // maybe should be <'>? Need to understand regions.
    sep: Option<Token>,
references:- 8
340:                     let ei_t = ei;
341:                     cur_eis.push(box MatcherPos {
342:                         elts: (*matchers).clone(),

libsyntax/ext/tt/macro_parser.rs:110:1-110:1 -fn- definition:
pub fn count_names(ms: &[Matcher]) -> uint {
    ms.iter().fold(0, |ct, m| {
        ct + match m.node {
references:- 2
115:             MatchSeq(ref more_ms, _, _, _, _) => {
116:                 count_names(more_ms.as_slice())
117:             }
--
135:     }
136:     let matches = Vec::from_fn(count_names(ms.as_slice()), |_i| Vec::new());
137:     box MatcherPos {

libsyntax/ext/tt/macro_parser.rs:167:1-167:1 -enum- definition:
pub enum NamedMatch {
    MatchedSeq(Vec<Rc<NamedMatch>>, codemap::Span),
    MatchedNonterminal(Nonterminal)
references:- 17
175:     fn n_rec(p_s: &ParseSess, m: &Matcher, res: &[Rc<NamedMatch>],
176:              ret_val: &mut HashMap<Ident, Rc<NamedMatch>>) {
177:         match *m {
--
211:                      ms: Vec<Matcher> )
212:                      -> HashMap<Ident, Rc<NamedMatch>> {
213:     match parse(sess, cfg, rdr, ms.as_slice()) {
libsyntax/ext/tt/macro_rules.rs:
122:                      lhses: &[Rc<NamedMatch>],
123:                      rhses: &[Rc<NamedMatch>])
124:                      -> Box<MacResult> {
libsyntax/ext/tt/transcribe.rs:
75: fn lookup_cur_matched_by_matched(r: &TtReader, start: Rc<NamedMatch>) -> Rc<NamedMatch> {
76:     r.repeat_idx.iter().fold(start, |ad, idx| {
--
87: fn lookup_cur_matched(r: &TtReader, name: Ident) -> Rc<NamedMatch> {
88:     let matched_opt = r.interpolations.find_copy(&name);
libsyntax/ext/tt/macro_rules.rs:
88:     name: Ident,
89:     lhses: Vec<Rc<NamedMatch>>,
90:     rhses: Vec<Rc<NamedMatch>>,

libsyntax/ext/tt/macro_parser.rs:175:4-175:4 -fn- definition:
    fn n_rec(p_s: &ParseSess, m: &Matcher, res: &[Rc<NamedMatch>],
             ret_val: &mut HashMap<Ident, Rc<NamedMatch>>) {
        match *m {
references:- 2
180:             for next_m in more_ms.iter() {
181:                 n_rec(p_s, next_m, res, ret_val)
182:             };
--
197:     let mut ret_val = HashMap::new();
198:     for m in ms.iter() { n_rec(p_s, m, res, &mut ret_val) }
199:     ret_val

libsyntax/ext/tt/macro_parser.rs:233:1-233:1 -fn- definition:
pub fn parse(sess: &ParseSess,
             cfg: ast::CrateConfig,
             mut rdr: TtReader,
references:- 2
212:                      -> HashMap<Ident, Rc<NamedMatch>> {
213:     match parse(sess, cfg, rdr, ms.as_slice()) {
214:         Success(m) => m,
libsyntax/ext/tt/macro_rules.rs:
145:                                            .collect());
146:             match parse(cx.parse_sess(), cx.cfg(), arg_rdr, mtcs.as_slice()) {
147:               Success(named_matches) => {