1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use ast;
12 use codemap::{BytePos, CharPos, CodeMap, Pos};
13 use diagnostic;
14 use parse::lexer::{is_whitespace, with_str_from, Reader};
15 use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
16 use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
17 use parse::lexer;
18 use parse::token;
19
20 use std::io;
21 use std::str;
22 use std::strbuf::StrBuf;
23 use std::uint;
24
25 #[deriving(Clone, Eq)]
26 pub enum CommentStyle {
27 Isolated, // No code on either side of each line of the comment
28 Trailing, // Code exists to the left of the comment
29 Mixed, // Code before /* foo */ and after the comment
30 BlankLine, // Just a manual blank line "\n\n", for layout
31 }
32
33 #[deriving(Clone)]
34 pub struct Comment {
35 pub style: CommentStyle,
36 pub lines: Vec<StrBuf>,
37 pub pos: BytePos,
38 }
39
40 pub fn is_doc_comment(s: &str) -> bool {
41 (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
42 s.starts_with("//!") ||
43 (s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
44 s.starts_with("/*!")
45 }
46
47 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
48 assert!(is_doc_comment(comment));
49 if comment.starts_with("//!") || comment.starts_with("/*!") {
50 ast::AttrInner
51 } else {
52 ast::AttrOuter
53 }
54 }
55
56 pub fn strip_doc_comment_decoration(comment: &str) -> StrBuf {
57 /// remove whitespace-only lines from the start/end of lines
58 fn vertical_trim(lines: Vec<StrBuf> ) -> Vec<StrBuf> {
59 let mut i = 0u;
60 let mut j = lines.len();
61 // first line of all-stars should be omitted
62 if lines.len() > 0 &&
63 lines.get(0).as_slice().chars().all(|c| c == '*') {
64 i += 1;
65 }
66 while i < j && lines.get(i).as_slice().trim().is_empty() {
67 i += 1;
68 }
69 // like the first, a last line of all stars should be omitted
70 if j > i && lines.get(j - 1)
71 .as_slice()
72 .chars()
73 .skip(1)
74 .all(|c| c == '*') {
75 j -= 1;
76 }
77 while j > i && lines.get(j - 1).as_slice().trim().is_empty() {
78 j -= 1;
79 }
80 return lines.slice(i, j).iter().map(|x| (*x).clone()).collect();
81 }
82
83 /// remove a "[ \t]*\*" block from each line, if possible
84 fn horizontal_trim(lines: Vec<StrBuf> ) -> Vec<StrBuf> {
85 let mut i = uint::MAX;
86 let mut can_trim = true;
87 let mut first = true;
88 for line in lines.iter() {
89 for (j, c) in line.as_slice().chars().enumerate() {
90 if j > i || !"* \t".contains_char(c) {
91 can_trim = false;
92 break;
93 }
94 if c == '*' {
95 if first {
96 i = j;
97 first = false;
98 } else if i != j {
99 can_trim = false;
100 }
101 break;
102 }
103 }
104 if i > line.len() {
105 can_trim = false;
106 }
107 if !can_trim {
108 break;
109 }
110 }
111
112 if can_trim {
113 lines.iter().map(|line| {
114 line.as_slice().slice(i + 1, line.len()).to_strbuf()
115 }).collect()
116 } else {
117 lines
118 }
119 }
120
121 // one-line comments lose their prefix
122 static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
123 for prefix in ONLINERS.iter() {
124 if comment.starts_with(*prefix) {
125 return comment.slice_from(prefix.len()).to_strbuf();
126 }
127 }
128
129 if comment.starts_with("/*") {
130 let lines = comment.slice(3u, comment.len() - 2u)
131 .lines_any()
132 .map(|s| s.to_strbuf())
133 .collect::<Vec<StrBuf> >();
134
135 let lines = vertical_trim(lines);
136 let lines = horizontal_trim(lines);
137
138 return lines.connect("\n").to_strbuf();
139 }
140
141 fail!("not a doc-comment: {}", comment);
142 }
143
144 fn read_to_eol(rdr: &mut StringReader) -> StrBuf {
145 let mut val = StrBuf::new();
146 while !rdr.curr_is('\n') && !is_eof(rdr) {
147 val.push_char(rdr.curr.unwrap());
148 bump(rdr);
149 }
150 if rdr.curr_is('\n') { bump(rdr); }
151 return val
152 }
153
154 fn read_one_line_comment(rdr: &mut StringReader) -> StrBuf {
155 let val = read_to_eol(rdr);
156 assert!((val.as_slice()[0] == '/' as u8 &&
157 val.as_slice()[1] == '/' as u8) ||
158 (val.as_slice()[0] == '#' as u8 &&
159 val.as_slice()[1] == '!' as u8));
160 return val;
161 }
162
163 fn consume_non_eol_whitespace(rdr: &mut StringReader) {
164 while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
165 bump(rdr);
166 }
167 }
168
169 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
170 debug!(">>> blank-line comment");
171 comments.push(Comment {
172 style: BlankLine,
173 lines: Vec::new(),
174 pos: rdr.last_pos,
175 });
176 }
177
178 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
179 comments: &mut Vec<Comment>) {
180 while is_whitespace(rdr.curr) && !is_eof(rdr) {
181 if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
182 push_blank_line_comment(rdr, &mut *comments);
183 }
184 bump(rdr);
185 }
186 }
187
188
189 fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
190 comments: &mut Vec<Comment>) {
191 debug!(">>> shebang comment");
192 let p = rdr.last_pos;
193 debug!("<<< shebang comment");
194 comments.push(Comment {
195 style: if code_to_the_left { Trailing } else { Isolated },
196 lines: vec!(read_one_line_comment(rdr)),
197 pos: p
198 });
199 }
200
201 fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
202 comments: &mut Vec<Comment>) {
203 debug!(">>> line comments");
204 let p = rdr.last_pos;
205 let mut lines: Vec<StrBuf> = Vec::new();
206 while rdr.curr_is('/') && nextch_is(rdr, '/') {
207 let line = read_one_line_comment(rdr);
208 debug!("{}", line);
209 // Doc comments are not put in comments.
210 if is_doc_comment(line.as_slice()) {
211 break;
212 }
213 lines.push(line);
214 consume_non_eol_whitespace(rdr);
215 }
216 debug!("<<< line comments");
217 if !lines.is_empty() {
218 comments.push(Comment {
219 style: if code_to_the_left { Trailing } else { Isolated },
220 lines: lines,
221 pos: p
222 });
223 }
224 }
225
226 // Returns None if the first col chars of s contain a non-whitespace char.
227 // Otherwise returns Some(k) where k is first char offset after that leading
228 // whitespace. Note k may be outside bounds of s.
229 fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
230 let len = s.len();
231 let mut col = col.to_uint();
232 let mut cursor: uint = 0;
233 while col > 0 && cursor < len {
234 let r: str::CharRange = s.char_range_at(cursor);
235 if !r.ch.is_whitespace() {
236 return None;
237 }
238 cursor = r.next;
239 col -= 1;
240 }
241 return Some(cursor);
242 }
243
244 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<StrBuf> ,
245 s: StrBuf, col: CharPos) {
246 let len = s.len();
247 let s1 = match all_whitespace(s.as_slice(), col) {
248 Some(col) => {
249 if col < len {
250 s.as_slice().slice(col, len).to_strbuf()
251 } else {
252 "".to_strbuf()
253 }
254 }
255 None => s,
256 };
257 debug!("pushing line: {}", s1);
258 lines.push(s1);
259 }
260
261 fn read_block_comment(rdr: &mut StringReader,
262 code_to_the_left: bool,
263 comments: &mut Vec<Comment> ) {
264 debug!(">>> block comment");
265 let p = rdr.last_pos;
266 let mut lines: Vec<StrBuf> = Vec::new();
267 let col = rdr.col;
268 bump(rdr);
269 bump(rdr);
270
271 let mut curr_line = StrBuf::from_str("/*");
272
273 // doc-comments are not really comments, they are attributes
274 if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') {
275 while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
276 curr_line.push_char(rdr.curr.unwrap());
277 bump(rdr);
278 }
279 if !is_eof(rdr) {
280 curr_line.push_str("*/");
281 bump(rdr);
282 bump(rdr);
283 }
284 if !is_block_non_doc_comment(curr_line.as_slice()) {
285 return
286 }
287 assert!(!curr_line.as_slice().contains_char('\n'));
288 lines.push(curr_line);
289 } else {
290 let mut level: int = 1;
291 while level > 0 {
292 debug!("=== block comment level {}", level);
293 if is_eof(rdr) {
294 rdr.fatal("unterminated block comment".to_strbuf());
295 }
296 if rdr.curr_is('\n') {
297 trim_whitespace_prefix_and_push_line(&mut lines,
298 curr_line,
299 col);
300 curr_line = StrBuf::new();
301 bump(rdr);
302 } else {
303 curr_line.push_char(rdr.curr.unwrap());
304 if rdr.curr_is('/') && nextch_is(rdr, '*') {
305 bump(rdr);
306 bump(rdr);
307 curr_line.push_char('*');
308 level += 1;
309 } else {
310 if rdr.curr_is('*') && nextch_is(rdr, '/') {
311 bump(rdr);
312 bump(rdr);
313 curr_line.push_char('/');
314 level -= 1;
315 } else { bump(rdr); }
316 }
317 }
318 }
319 if curr_line.len() != 0 {
320 trim_whitespace_prefix_and_push_line(&mut lines,
321 curr_line,
322 col);
323 }
324 }
325
326 let mut style = if code_to_the_left { Trailing } else { Isolated };
327 consume_non_eol_whitespace(rdr);
328 if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
329 style = Mixed;
330 }
331 debug!("<<< block comment");
332 comments.push(Comment {style: style, lines: lines, pos: p});
333 }
334
335 fn peeking_at_comment(rdr: &StringReader) -> bool {
336 return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
337 (rdr.curr_is('/') && nextch_is(rdr, '*')) ||
338 // consider shebangs comments, but not inner attributes
339 (rdr.curr_is('#') && nextch_is(rdr, '!') &&
340 !lexer::nextnextch_is(rdr, '['));
341 }
342
343 fn consume_comment(rdr: &mut StringReader,
344 code_to_the_left: bool,
345 comments: &mut Vec<Comment> ) {
346 debug!(">>> consume comment");
347 if rdr.curr_is('/') && nextch_is(rdr, '/') {
348 read_line_comments(rdr, code_to_the_left, comments);
349 } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
350 read_block_comment(rdr, code_to_the_left, comments);
351 } else if rdr.curr_is('#') && nextch_is(rdr, '!') {
352 read_shebang_comment(rdr, code_to_the_left, comments);
353 } else { fail!(); }
354 debug!("<<< consume comment");
355 }
356
357 #[deriving(Clone)]
358 pub struct Literal {
359 pub lit: StrBuf,
360 pub pos: BytePos,
361 }
362
363 // it appears this function is called only from pprust... that's
364 // probably not a good thing.
365 pub fn gather_comments_and_literals(span_diagnostic:
366 &diagnostic::SpanHandler,
367 path: StrBuf,
368 srdr: &mut io::Reader)
369 -> (Vec<Comment>, Vec<Literal>) {
370 let src = srdr.read_to_end().unwrap();
371 let src = str::from_utf8(src.as_slice()).unwrap().to_strbuf();
372 let cm = CodeMap::new();
373 let filemap = cm.new_filemap(path, src);
374 let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
375
376 let mut comments: Vec<Comment> = Vec::new();
377 let mut literals: Vec<Literal> = Vec::new();
378 let mut first_read: bool = true;
379 while !is_eof(&rdr) {
380 loop {
381 let mut code_to_the_left = !first_read;
382 consume_non_eol_whitespace(&mut rdr);
383 if rdr.curr_is('\n') {
384 code_to_the_left = false;
385 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
386 }
387 while peeking_at_comment(&rdr) {
388 consume_comment(&mut rdr, code_to_the_left, &mut comments);
389 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
390 }
391 break;
392 }
393
394
395 let bstart = rdr.last_pos;
396 rdr.next_token();
397 //discard, and look ahead; we're working with internal state
398 let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
399 if token::is_lit(&tok) {
400 with_str_from(&rdr, bstart, |s| {
401 debug!("tok lit: {}", s);
402 literals.push(Literal {lit: s.to_strbuf(), pos: sp.lo});
403 })
404 } else {
405 debug!("tok: {}", token::to_str(&tok));
406 }
407 first_read = false;
408 }
409
410 (comments, literals)
411 }
412
413 #[cfg(test)]
414 mod test {
415 use super::*;
416
417 #[test] fn test_block_doc_comment_1() {
418 let comment = "/**\n * Test \n ** Test\n * Test\n*/";
419 let stripped = strip_doc_comment_decoration(comment);
420 assert_eq!(stripped, " Test \n* Test\n Test".to_strbuf());
421 }
422
423 #[test] fn test_block_doc_comment_2() {
424 let comment = "/**\n * Test\n * Test\n*/";
425 let stripped = strip_doc_comment_decoration(comment);
426 assert_eq!(stripped, " Test\n Test".to_strbuf());
427 }
428
429 #[test] fn test_block_doc_comment_3() {
430 let comment = "/**\n let a: *int;\n *a = 5;\n*/";
431 let stripped = strip_doc_comment_decoration(comment);
432 assert_eq!(stripped, " let a: *int;\n *a = 5;".to_strbuf());
433 }
434
435 #[test] fn test_block_doc_comment_4() {
436 let comment = "/*******************\n test\n *********************/";
437 let stripped = strip_doc_comment_decoration(comment);
438 assert_eq!(stripped, " test".to_strbuf());
439 }
440
441 #[test] fn test_line_doc_comment() {
442 let stripped = strip_doc_comment_decoration("/// test");
443 assert_eq!(stripped, " test".to_strbuf());
444 let stripped = strip_doc_comment_decoration("///! test");
445 assert_eq!(stripped, " test".to_strbuf());
446 let stripped = strip_doc_comment_decoration("// test");
447 assert_eq!(stripped, " test".to_strbuf());
448 let stripped = strip_doc_comment_decoration("// test");
449 assert_eq!(stripped, " test".to_strbuf());
450 let stripped = strip_doc_comment_decoration("///test");
451 assert_eq!(stripped, "test".to_strbuf());
452 let stripped = strip_doc_comment_decoration("///!test");
453 assert_eq!(stripped, "test".to_strbuf());
454 let stripped = strip_doc_comment_decoration("//test");
455 assert_eq!(stripped, "test".to_strbuf());
456 }
457 }
libsyntax/parse/comments.rs:39:1-39:1 -fn- definition:
pub fn is_doc_comment(s: &str) -> bool {
(s.starts_with("///") && !is_line_non_doc_comment(s)) ||
s.starts_with("//!") ||
references:- 247: pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
48: assert!(is_doc_comment(comment));
49: if comment.starts_with("//!") || comment.starts_with("/*!") {
--
209: // Doc comments are not put in comments.
210: if is_doc_comment(line.as_slice()) {
211: break;
libsyntax/parse/comments.rs:162:1-162:1 -fn- definition:
fn consume_non_eol_whitespace(rdr: &mut StringReader) {
while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
bump(rdr);
references:- 3213: lines.push(line);
214: consume_non_eol_whitespace(rdr);
215: }
--
381: let mut code_to_the_left = !first_read;
382: consume_non_eol_whitespace(&mut rdr);
383: if rdr.curr_is('\n') {
libsyntax/parse/comments.rs:153:1-153:1 -fn- definition:
fn read_one_line_comment(rdr: &mut StringReader) -> StrBuf {
let val = read_to_eol(rdr);
assert!((val.as_slice()[0] == '/' as u8 &&
references:- 2195: style: if code_to_the_left { Trailing } else { Isolated },
196: lines: vec!(read_one_line_comment(rdr)),
197: pos: p
--
206: while rdr.curr_is('/') && nextch_is(rdr, '/') {
207: let line = read_one_line_comment(rdr);
208: debug!("{}", line);
libsyntax/parse/comments.rs:177:1-177:1 -fn- definition:
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
comments: &mut Vec<Comment>) {
while is_whitespace(rdr.curr) && !is_eof(rdr) {
references:- 2384: code_to_the_left = false;
385: consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
386: }
--
388: consume_comment(&mut rdr, code_to_the_left, &mut comments);
389: consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
390: }
libsyntax/parse/comments.rs:25:23-25:23 -enum- definition:
pub enum CommentStyle {
Isolated, // No code on either side of each line of the comment
Trailing, // Code exists to the left of the comment
references:- 626: pub enum CommentStyle {
--
34: pub struct Comment {
35: pub style: CommentStyle,
36: pub lines: Vec<StrBuf>,
libsyntax/parse/comments.rs:357:19-357:19 -struct- definition:
pub struct Literal {
pub lit: StrBuf,
pub pos: BytePos,
references:- 9401: debug!("tok lit: {}", s);
402: literals.push(Literal {lit: s.to_strbuf(), pos: sp.lo});
403: })
libsyntax/print/pprust.rs:
2244: pub fn next_lit(&mut self, pos: BytePos) -> Option<comments::Literal> {
2245: match self.literals {
libsyntax/parse/comments.rs:
358: pub struct Literal {
libsyntax/parse/comments.rs:243:1-243:1 -fn- definition:
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<StrBuf> ,
s: StrBuf, col: CharPos) {
let len = s.len();
references:- 2319: if curr_line.len() != 0 {
320: trim_whitespace_prefix_and_push_line(&mut lines,
321: curr_line,
libsyntax/parse/comments.rs:33:19-33:19 -struct- definition:
pub struct Comment {
pub style: CommentStyle,
pub lines: Vec<StrBuf>,
references:- 19193: debug!("<<< shebang comment");
194: comments.push(Comment {
195: style: if code_to_the_left { Trailing } else { Isolated },
--
331: debug!("<<< block comment");
332: comments.push(Comment {style: style, lines: lines, pos: p});
333: }
--
376: let mut comments: Vec<Comment> = Vec::new();
377: let mut literals: Vec<Literal> = Vec::new();
libsyntax/print/pprust.rs:
2335: pub fn next_comment(&mut self) -> Option<comments::Comment> {
2336: match self.comments {
libsyntax/parse/comments.rs:
189: fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
190: comments: &mut Vec<Comment>) {
191: debug!(">>> shebang comment");