(index<- )        ./libsyntax/parse/comments.rs

    git branch:    * master           5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
    modified:    Fri May  9 13:02:28 2014

   1  // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2  // file at the top-level directory of this distribution and at
   3  // http://rust-lang.org/COPYRIGHT.
   4  //
   5  // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6  // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7  // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8  // option. This file may not be copied, modified, or distributed
   9  // except according to those terms.
  10
  11  use ast;
  12  use codemap::{BytePos, CharPos, CodeMap, Pos};
  13  use diagnostic;
  14  use parse::lexer::{is_whitespace, with_str_from, Reader};
  15  use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
  16  use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
  17  use parse::lexer;
  18  use parse::token;
  19
  20  use std::io;
  21  use std::str;
  22  use std::strbuf::StrBuf;
  23  use std::uint;
  24
  25  #[deriving(Clone, Eq)]
  26  pub enum CommentStyle {
  27      Isolated, // No code on either side of each line of the comment
  28      Trailing, // Code exists to the left of the comment
  29      Mixed, // Code before /* foo */ and after the comment
  30      BlankLine, // Just a manual blank line "\n\n", for layout
  31  }
  32
  33  #[deriving(Clone)]
  34  pub struct Comment {
  35      pub style: CommentStyle,
  36      pub lines: Vec<StrBuf>,
  37      pub pos: BytePos,
  38  }
  39
  40  pub fn is_doc_comment(s: &str) -> bool {
  41      (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
  42      s.starts_with("//!") ||
  43      (s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
  44      s.starts_with("/*!")
  45  }
  46
  47  pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  48      assert!(is_doc_comment(comment));
  49      if comment.starts_with("//!") || comment.starts_with("/*!") {
  50          ast::AttrInner
  51      } else {
  52          ast::AttrOuter
  53      }
  54  }
  55
  56  pub fn strip_doc_comment_decoration(comment: &str) -> StrBuf {
  57      /// remove whitespace-only lines from the start/end of lines
  58      fn vertical_trim(lines: Vec<StrBuf> ) -> Vec<StrBuf> {
  59          let mut i = 0u;
  60          let mut j = lines.len();
  61          // first line of all-stars should be omitted
  62          if lines.len() > 0 &&
  63                  lines.get(0).as_slice().chars().all(|c| c == '*') {
  64              i += 1;
  65          }
  66          while i < j && lines.get(i).as_slice().trim().is_empty() {
  67              i += 1;
  68          }
  69          // like the first, a last line of all stars should be omitted
  70          if j > i && lines.get(j - 1)
  71                           .as_slice()
  72                           .chars()
  73                           .skip(1)
  74                           .all(|c| c == '*') {
  75              j -= 1;
  76          }
  77          while j > i && lines.get(j - 1).as_slice().trim().is_empty() {
  78              j -= 1;
  79          }
  80          return lines.slice(i, j).iter().map(|x| (*x).clone()).collect();
  81      }
  82
  83      /// remove a "[ \t]*\*" block from each line, if possible
  84      fn horizontal_trim(lines: Vec<StrBuf> ) -> Vec<StrBuf> {
  85          let mut i = uint::MAX;
  86          let mut can_trim = true;
  87          let mut first = true;
  88          for line in lines.iter() {
  89              for (j, c) in line.as_slice().chars().enumerate() {
  90                  if j > i || !"* \t".contains_char(c) {
  91                      can_trim = false;
  92                      break;
  93                  }
  94                  if c == '*' {
  95                      if first {
  96                          i = j;
  97                          first = false;
  98                      } else if i != j {
  99                          can_trim = false;
100                      }
101                      break;
102                  }
103              }
104              if i > line.len() {
105                  can_trim = false;
106              }
107              if !can_trim {
108                  break;
109              }
110          }
111
112          if can_trim {
113              lines.iter().map(|line| {
114                  line.as_slice().slice(i + 1, line.len()).to_strbuf()
115              }).collect()
116          } else {
117              lines
118          }
119      }
120
121      // one-line comments lose their prefix
122      static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
123      for prefix in ONLINERS.iter() {
124          if comment.starts_with(*prefix) {
125              return comment.slice_from(prefix.len()).to_strbuf();
126          }
127      }
128
129      if comment.starts_with("/*") {
130          let lines = comment.slice(3u, comment.len() - 2u)
131              .lines_any()
132              .map(|s| s.to_strbuf())
133              .collect::<Vec<StrBuf> >();
134
135          let lines = vertical_trim(lines);
136          let lines = horizontal_trim(lines);
137
138          return lines.connect("\n").to_strbuf();
139      }
140
141      fail!("not a doc-comment: {}", comment);
142  }
143
144  fn read_to_eol(rdr: &mut StringReader) -> StrBuf {
145      let mut val = StrBuf::new();
146      while !rdr.curr_is('\n') && !is_eof(rdr) {
147          val.push_char(rdr.curr.unwrap());
148          bump(rdr);
149      }
150      if rdr.curr_is('\n') { bump(rdr); }
151      return val
152  }
153
154  fn read_one_line_comment(rdr: &mut StringReader) -> StrBuf {
155      let val = read_to_eol(rdr);
156      assert!((val.as_slice()[0] == '/' as u8 &&
157                  val.as_slice()[1] == '/' as u8) ||
158                  (val.as_slice()[0] == '#' as u8 &&
159                   val.as_slice()[1] == '!' as u8));
160      return val;
161  }
162
163  fn consume_non_eol_whitespace(rdr: &mut StringReader) {
164      while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
165          bump(rdr);
166      }
167  }
168
169  fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
170      debug!(">>> blank-line comment");
171      comments.push(Comment {
172          style: BlankLine,
173          lines: Vec::new(),
174          pos: rdr.last_pos,
175      });
176  }
177
178  fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
179                                             comments: &mut Vec<Comment>) {
180      while is_whitespace(rdr.curr) && !is_eof(rdr) {
181          if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
182              push_blank_line_comment(rdr, &mut *comments);
183          }
184          bump(rdr);
185      }
186  }
187
188
189  fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
190                          comments: &mut Vec<Comment>) {
191      debug!(">>> shebang comment");
192      let p = rdr.last_pos;
193      debug!("<<< shebang comment");
194      comments.push(Comment {
195          style: if code_to_the_left { Trailing } else { Isolated },
196          lines: vec!(read_one_line_comment(rdr)),
197          pos: p
198      });
199  }
200
201  fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
202                        comments: &mut Vec<Comment>) {
203      debug!(">>> line comments");
204      let p = rdr.last_pos;
205      let mut lines: Vec<StrBuf> = Vec::new();
206      while rdr.curr_is('/') && nextch_is(rdr, '/') {
207          let line = read_one_line_comment(rdr);
208          debug!("{}", line);
209          // Doc comments are not put in comments.
210          if is_doc_comment(line.as_slice()) {
211              break;
212          }
213          lines.push(line);
214          consume_non_eol_whitespace(rdr);
215      }
216      debug!("<<< line comments");
217      if !lines.is_empty() {
218          comments.push(Comment {
219              style: if code_to_the_left { Trailing } else { Isolated },
220              lines: lines,
221              pos: p
222          });
223      }
224  }
225
226  // Returns None if the first col chars of s contain a non-whitespace char.
227  // Otherwise returns Some(k) where k is first char offset after that leading
228  // whitespace.  Note k may be outside bounds of s.
229  fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
230      let len = s.len();
231      let mut col = col.to_uint();
232      let mut cursor: uint = 0;
233      while col > 0 && cursor < len {
234          let r: str::CharRange = s.char_range_at(cursor);
235          if !r.ch.is_whitespace() {
236              return None;
237          }
238          cursor = r.next;
239          col -= 1;
240      }
241      return Some(cursor);
242  }
243
244  fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<StrBuf> ,
245                                          s: StrBuf, col: CharPos) {
246      let len = s.len();
247      let s1 = match all_whitespace(s.as_slice(), col) {
248          Some(col) => {
249              if col < len {
250                  s.as_slice().slice(col, len).to_strbuf()
251              } else {
252                  "".to_strbuf()
253              }
254          }
255          None => s,
256      };
257      debug!("pushing line: {}", s1);
258      lines.push(s1);
259  }
260
261  fn read_block_comment(rdr: &mut StringReader,
262                        code_to_the_left: bool,
263                        comments: &mut Vec<Comment> ) {
264      debug!(">>> block comment");
265      let p = rdr.last_pos;
266      let mut lines: Vec<StrBuf> = Vec::new();
267      let col = rdr.col;
268      bump(rdr);
269      bump(rdr);
270
271      let mut curr_line = StrBuf::from_str("/*");
272
273      // doc-comments are not really comments, they are attributes
274      if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') {
275          while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
276              curr_line.push_char(rdr.curr.unwrap());
277              bump(rdr);
278          }
279          if !is_eof(rdr) {
280              curr_line.push_str("*/");
281              bump(rdr);
282              bump(rdr);
283          }
284          if !is_block_non_doc_comment(curr_line.as_slice()) {
285              return
286          }
287          assert!(!curr_line.as_slice().contains_char('\n'));
288          lines.push(curr_line);
289      } else {
290          let mut level: int = 1;
291          while level > 0 {
292              debug!("=== block comment level {}", level);
293              if is_eof(rdr) {
294                  rdr.fatal("unterminated block comment".to_strbuf());
295              }
296              if rdr.curr_is('\n') {
297                  trim_whitespace_prefix_and_push_line(&mut lines,
298                                                       curr_line,
299                                                       col);
300                  curr_line = StrBuf::new();
301                  bump(rdr);
302              } else {
303                  curr_line.push_char(rdr.curr.unwrap());
304                  if rdr.curr_is('/') && nextch_is(rdr, '*') {
305                      bump(rdr);
306                      bump(rdr);
307                      curr_line.push_char('*');
308                      level += 1;
309                  } else {
310                      if rdr.curr_is('*') && nextch_is(rdr, '/') {
311                          bump(rdr);
312                          bump(rdr);
313                          curr_line.push_char('/');
314                          level -= 1;
315                      } else { bump(rdr); }
316                  }
317              }
318          }
319          if curr_line.len() != 0 {
320              trim_whitespace_prefix_and_push_line(&mut lines,
321                                                   curr_line,
322                                                   col);
323          }
324      }
325
326      let mut style = if code_to_the_left { Trailing } else { Isolated };
327      consume_non_eol_whitespace(rdr);
328      if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
329          style = Mixed;
330      }
331      debug!("<<< block comment");
332      comments.push(Comment {style: style, lines: lines, pos: p});
333  }
334
335  fn peeking_at_comment(rdr: &StringReader) -> bool {
336      return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
337           (rdr.curr_is('/') && nextch_is(rdr, '*')) ||
338           // consider shebangs comments, but not inner attributes
339           (rdr.curr_is('#') && nextch_is(rdr, '!') &&
340            !lexer::nextnextch_is(rdr, '['));
341  }
342
343  fn consume_comment(rdr: &mut StringReader,
344                     code_to_the_left: bool,
345                     comments: &mut Vec<Comment> ) {
346      debug!(">>> consume comment");
347      if rdr.curr_is('/') && nextch_is(rdr, '/') {
348          read_line_comments(rdr, code_to_the_left, comments);
349      } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
350          read_block_comment(rdr, code_to_the_left, comments);
351      } else if rdr.curr_is('#') && nextch_is(rdr, '!') {
352          read_shebang_comment(rdr, code_to_the_left, comments);
353      } else { fail!(); }
354      debug!("<<< consume comment");
355  }
356
357  #[deriving(Clone)]
358  pub struct Literal {
359      pub lit: StrBuf,
360      pub pos: BytePos,
361  }
362
363  // it appears this function is called only from pprust... that's
364  // probably not a good thing.
365  pub fn gather_comments_and_literals(span_diagnostic:
366                                          &diagnostic::SpanHandler,
367                                      path: StrBuf,
368                                      srdr: &mut io::Reader)
369                                   -> (Vec<Comment>, Vec<Literal>) {
370      let src = srdr.read_to_end().unwrap();
371      let src = str::from_utf8(src.as_slice()).unwrap().to_strbuf();
372      let cm = CodeMap::new();
373      let filemap = cm.new_filemap(path, src);
374      let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
375
376      let mut comments: Vec<Comment> = Vec::new();
377      let mut literals: Vec<Literal> = Vec::new();
378      let mut first_read: bool = true;
379      while !is_eof(&rdr) {
380          loop {
381              let mut code_to_the_left = !first_read;
382              consume_non_eol_whitespace(&mut rdr);
383              if rdr.curr_is('\n') {
384                  code_to_the_left = false;
385                  consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
386              }
387              while peeking_at_comment(&rdr) {
388                  consume_comment(&mut rdr, code_to_the_left, &mut comments);
389                  consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
390              }
391              break;
392          }
393
394
395          let bstart = rdr.last_pos;
396          rdr.next_token();
397          //discard, and look ahead; we're working with internal state
398          let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
399          if token::is_lit(&tok) {
400              with_str_from(&rdr, bstart, |s| {
401                  debug!("tok lit: {}", s);
402                  literals.push(Literal {lit: s.to_strbuf(), pos: sp.lo});
403              })
404          } else {
405              debug!("tok: {}", token::to_str(&tok));
406          }
407          first_read = false;
408      }
409
410      (comments, literals)
411  }
412
413  #[cfg(test)]
414  mod test {
415      use super::*;
416
417      #[test] fn test_block_doc_comment_1() {
418          let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
419          let stripped = strip_doc_comment_decoration(comment);
420          assert_eq!(stripped, " Test \n*  Test\n   Test".to_strbuf());
421      }
422
423      #[test] fn test_block_doc_comment_2() {
424          let comment = "/**\n * Test\n *  Test\n*/";
425          let stripped = strip_doc_comment_decoration(comment);
426          assert_eq!(stripped, " Test\n  Test".to_strbuf());
427      }
428
429      #[test] fn test_block_doc_comment_3() {
430          let comment = "/**\n let a: *int;\n *a = 5;\n*/";
431          let stripped = strip_doc_comment_decoration(comment);
432          assert_eq!(stripped, " let a: *int;\n *a = 5;".to_strbuf());
433      }
434
435      #[test] fn test_block_doc_comment_4() {
436          let comment = "/*******************\n test\n *********************/";
437          let stripped = strip_doc_comment_decoration(comment);
438          assert_eq!(stripped, " test".to_strbuf());
439      }
440
441      #[test] fn test_line_doc_comment() {
442          let stripped = strip_doc_comment_decoration("/// test");
443          assert_eq!(stripped, " test".to_strbuf());
444          let stripped = strip_doc_comment_decoration("///! test");
445          assert_eq!(stripped, " test".to_strbuf());
446          let stripped = strip_doc_comment_decoration("// test");
447          assert_eq!(stripped, " test".to_strbuf());
448          let stripped = strip_doc_comment_decoration("// test");
449          assert_eq!(stripped, " test".to_strbuf());
450          let stripped = strip_doc_comment_decoration("///test");
451          assert_eq!(stripped, "test".to_strbuf());
452          let stripped = strip_doc_comment_decoration("///!test");
453          assert_eq!(stripped, "test".to_strbuf());
454          let stripped = strip_doc_comment_decoration("//test");
455          assert_eq!(stripped, "test".to_strbuf());
456      }
457  }

libsyntax/parse/comments.rs:39:1-39:1 -fn- definition:
pub fn is_doc_comment(s: &str) -> bool {
    (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
    s.starts_with("//!") ||
references:- 2
47: pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
48:     assert!(is_doc_comment(comment));
49:     if comment.starts_with("//!") || comment.starts_with("/*!") {
--
209:         // Doc comments are not put in comments.
210:         if is_doc_comment(line.as_slice()) {
211:             break;

libsyntax/parse/comments.rs:162:1-162:1 -fn- definition:
fn consume_non_eol_whitespace(rdr: &mut StringReader) {
    while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
        bump(rdr);
references:- 3
213:         lines.push(line);
214:         consume_non_eol_whitespace(rdr);
215:     }
--
381:             let mut code_to_the_left = !first_read;
382:             consume_non_eol_whitespace(&mut rdr);
383:             if rdr.curr_is('\n') {

libsyntax/parse/comments.rs:153:1-153:1 -fn- definition:
fn read_one_line_comment(rdr: &mut StringReader) -> StrBuf {
    let val = read_to_eol(rdr);
    assert!((val.as_slice()[0] == '/' as u8 &&
references:- 2
195:         style: if code_to_the_left { Trailing } else { Isolated },
196:         lines: vec!(read_one_line_comment(rdr)),
197:         pos: p
--
206:     while rdr.curr_is('/') && nextch_is(rdr, '/') {
207:         let line = read_one_line_comment(rdr);
208:         debug!("{}", line);

libsyntax/parse/comments.rs:177:1-177:1 -fn- definition:
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
                                           comments: &mut Vec<Comment>) {
    while is_whitespace(rdr.curr) && !is_eof(rdr) {
references:- 2
384:                 code_to_the_left = false;
385:                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
386:             }
--
388:                 consume_comment(&mut rdr, code_to_the_left, &mut comments);
389:                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
390:             }

libsyntax/parse/comments.rs:25:23-25:23 -enum- definition:
pub enum CommentStyle {
    Isolated, // No code on either side of each line of the comment
    Trailing, // Code exists to the left of the comment
references:- 6
26: pub enum CommentStyle {
--
34: pub struct Comment {
35:     pub style: CommentStyle,
36:     pub lines: Vec<StrBuf>,

libsyntax/parse/comments.rs:357:19-357:19 -struct- definition:
pub struct Literal {
    pub lit: StrBuf,
    pub pos: BytePos,
references:- 9
401:                 debug!("tok lit: {}", s);
402:                 literals.push(Literal {lit: s.to_strbuf(), pos: sp.lo});
403:             })
libsyntax/print/pprust.rs:
2244:     pub fn next_lit(&mut self, pos: BytePos) -> Option<comments::Literal> {
2245:         match self.literals {
libsyntax/parse/comments.rs:
358: pub struct Literal {

libsyntax/parse/comments.rs:243:1-243:1 -fn- definition:
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<StrBuf> ,
                                        s: StrBuf, col: CharPos) {
    let len = s.len();
references:- 2
319:         if curr_line.len() != 0 {
320:             trim_whitespace_prefix_and_push_line(&mut lines,
321:                                                  curr_line,

libsyntax/parse/comments.rs:33:19-33:19 -struct- definition:
pub struct Comment {
    pub style: CommentStyle,
    pub lines: Vec<StrBuf>,
references:- 19
193:     debug!("<<< shebang comment");
194:     comments.push(Comment {
195:         style: if code_to_the_left { Trailing } else { Isolated },
--
331:     debug!("<<< block comment");
332:     comments.push(Comment {style: style, lines: lines, pos: p});
333: }
--
376:     let mut comments: Vec<Comment> = Vec::new();
377:     let mut literals: Vec<Literal> = Vec::new();
libsyntax/print/pprust.rs:
2335:     pub fn next_comment(&mut self) -> Option<comments::Comment> {
2336:         match self.comments {
libsyntax/parse/comments.rs:
189: fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
190:                         comments: &mut Vec<Comment>) {
191:     debug!(">>> shebang comment");