1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use ast;
12 use codemap::{BytePos, CharPos, CodeMap, Pos, Span};
13 use codemap;
14 use diagnostic::SpanHandler;
15 use ext::tt::transcribe::tt_next_token;
16 use parse::token;
17 use parse::token::{str_to_ident};
18
19 use std::char;
20 use std::mem::replace;
21 use std::num::from_str_radix;
22 use std::rc::Rc;
23 use std::str;
24 use std::strbuf::StrBuf;
25
26 pub use ext::tt::transcribe::{TtReader, new_tt_reader};
27
28 pub trait Reader {
29 fn is_eof(&self) -> bool;
30 fn next_token(&mut self) -> TokenAndSpan;
31 fn fatal(&self, StrBuf) -> !;
32 fn span_diag<'a>(&'a self) -> &'a SpanHandler;
33 fn peek(&self) -> TokenAndSpan;
34 }
35
36 #[deriving(Clone, Eq, Show)]
37 pub struct TokenAndSpan {
38 pub tok: token::Token,
39 pub sp: Span,
40 }
41
42 pub struct StringReader<'a> {
43 pub span_diagnostic: &'a SpanHandler,
44 // The absolute offset within the codemap of the next character to read
45 pub pos: BytePos,
46 // The absolute offset within the codemap of the last character read(curr)
47 pub last_pos: BytePos,
48 // The column of the next character to read
49 pub col: CharPos,
50 // The last character to be read
51 pub curr: Option<char>,
52 pub filemap: Rc<codemap::FileMap>,
53 /* cached: */
54 pub peek_tok: token::Token,
55 pub peek_span: Span,
56 }
57
58 impl<'a> StringReader<'a> {
59 pub fn curr_is(&self, c: char) -> bool {
60 self.curr == Some(c)
61 }
62 }
63
64 pub fn new_string_reader<'a>(span_diagnostic: &'a SpanHandler,
65 filemap: Rc<codemap::FileMap>)
66 -> StringReader<'a> {
67 let mut r = new_low_level_string_reader(span_diagnostic, filemap);
68 string_advance_token(&mut r); /* fill in peek_* */
69 r
70 }
71
72 /* For comments.rs, which hackily pokes into 'pos' and 'curr' */
73 pub fn new_low_level_string_reader<'a>(span_diagnostic: &'a SpanHandler,
74 filemap: Rc<codemap::FileMap>)
75 -> StringReader<'a> {
76 // Force the initial reader bump to start on a fresh line
77 let initial_char = '\n';
78 let mut r = StringReader {
79 span_diagnostic: span_diagnostic,
80 pos: filemap.start_pos,
81 last_pos: filemap.start_pos,
82 col: CharPos(0),
83 curr: Some(initial_char),
84 filemap: filemap,
85 /* dummy values; not read */
86 peek_tok: token::EOF,
87 peek_span: codemap::DUMMY_SP,
88 };
89 bump(&mut r);
90 r
91 }
92
93 impl<'a> Reader for StringReader<'a> {
94 fn is_eof(&self) -> bool { is_eof(self) }
95 // return the next token. EFFECT: advances the string_reader.
96 fn next_token(&mut self) -> TokenAndSpan {
97 let ret_val = TokenAndSpan {
98 tok: replace(&mut self.peek_tok, token::UNDERSCORE),
99 sp: self.peek_span,
100 };
101 string_advance_token(self);
102 ret_val
103 }
104 fn fatal(&self, m: StrBuf) -> ! {
105 self.span_diagnostic.span_fatal(self.peek_span, m.as_slice())
106 }
107 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.span_diagnostic }
108 fn peek(&self) -> TokenAndSpan {
109 // FIXME(pcwalton): Bad copy!
110 TokenAndSpan {
111 tok: self.peek_tok.clone(),
112 sp: self.peek_span.clone(),
113 }
114 }
115 }
116
117 impl<'a> Reader for TtReader<'a> {
118 fn is_eof(&self) -> bool {
119 self.cur_tok == token::EOF
120 }
121 fn next_token(&mut self) -> TokenAndSpan {
122 let r = tt_next_token(self);
123 debug!("TtReader: r={:?}", r);
124 r
125 }
126 fn fatal(&self, m: StrBuf) -> ! {
127 self.sp_diag.span_fatal(self.cur_span, m.as_slice());
128 }
129 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.sp_diag }
130 fn peek(&self) -> TokenAndSpan {
131 TokenAndSpan {
132 tok: self.cur_tok.clone(),
133 sp: self.cur_span.clone(),
134 }
135 }
136 }
137
138 // report a lexical error spanning [`from_pos`, `to_pos`)
139 fn fatal_span(rdr: &mut StringReader,
140 from_pos: BytePos,
141 to_pos: BytePos,
142 m: StrBuf)
143 -> ! {
144 rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
145 rdr.fatal(m);
146 }
147
148 // report a lexical error spanning [`from_pos`, `to_pos`), appending an
149 // escaped character to the error message
150 fn fatal_span_char(rdr: &mut StringReader,
151 from_pos: BytePos,
152 to_pos: BytePos,
153 m: StrBuf,
154 c: char)
155 -> ! {
156 let mut m = m;
157 m.push_str(": ");
158 char::escape_default(c, |c| m.push_char(c));
159 fatal_span(rdr, from_pos, to_pos, m.into_strbuf());
160 }
161
162 // report a lexical error spanning [`from_pos`, `to_pos`), appending the
163 // offending string to the error message
164 fn fatal_span_verbose(rdr: &mut StringReader,
165 from_pos: BytePos,
166 to_pos: BytePos,
167 m: StrBuf)
168 -> ! {
169 let mut m = m;
170 m.push_str(": ");
171 let from = byte_offset(rdr, from_pos).to_uint();
172 let to = byte_offset(rdr, to_pos).to_uint();
173 m.push_str(rdr.filemap.src.as_slice().slice(from, to));
174 fatal_span(rdr, from_pos, to_pos, m);
175 }
176
177 // EFFECT: advance peek_tok and peek_span to refer to the next token.
178 // EFFECT: update the interner, maybe.
179 fn string_advance_token(r: &mut StringReader) {
180 match consume_whitespace_and_comments(r) {
181 Some(comment) => {
182 r.peek_span = comment.sp;
183 r.peek_tok = comment.tok;
184 },
185 None => {
186 if is_eof(r) {
187 r.peek_tok = token::EOF;
188 } else {
189 let start_bytepos = r.last_pos;
190 r.peek_tok = next_token_inner(r);
191 r.peek_span = codemap::mk_sp(start_bytepos,
192 r.last_pos);
193 };
194 }
195 }
196 }
197
198 fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
199 (pos - rdr.filemap.start_pos)
200 }
201
202 /// Calls `f` with a string slice of the source text spanning from `start`
203 /// up to but excluding `rdr.last_pos`, meaning the slice does not include
204 /// the character `rdr.curr`.
205 pub fn with_str_from<T>(
206 rdr: &StringReader,
207 start: BytePos,
208 f: |s: &str| -> T)
209 -> T {
210 with_str_from_to(rdr, start, rdr.last_pos, f)
211 }
212
213 /// Calls `f` with astring slice of the source text spanning from `start`
214 /// up to but excluding `end`.
215 fn with_str_from_to<T>(
216 rdr: &StringReader,
217 start: BytePos,
218 end: BytePos,
219 f: |s: &str| -> T)
220 -> T {
221 f(rdr.filemap.src.as_slice().slice(
222 byte_offset(rdr, start).to_uint(),
223 byte_offset(rdr, end).to_uint()))
224 }
225
226 // EFFECT: advance the StringReader by one character. If a newline is
227 // discovered, add it to the FileMap's list of line start offsets.
228 pub fn bump(rdr: &mut StringReader) {
229 rdr.last_pos = rdr.pos;
230 let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
231 if current_byte_offset < rdr.filemap.src.len() {
232 assert!(rdr.curr.is_some());
233 let last_char = rdr.curr.unwrap();
234 let next = rdr.filemap
235 .src
236 .as_slice()
237 .char_range_at(current_byte_offset);
238 let byte_offset_diff = next.next - current_byte_offset;
239 rdr.pos = rdr.pos + Pos::from_uint(byte_offset_diff);
240 rdr.curr = Some(next.ch);
241 rdr.col = rdr.col + CharPos(1u);
242 if last_char == '\n' {
243 rdr.filemap.next_line(rdr.last_pos);
244 rdr.col = CharPos(0u);
245 }
246
247 if byte_offset_diff > 1 {
248 rdr.filemap.record_multibyte_char(rdr.last_pos, byte_offset_diff);
249 }
250 } else {
251 rdr.curr = None;
252 }
253 }
254
255 pub fn is_eof(rdr: &StringReader) -> bool {
256 rdr.curr.is_none()
257 }
258
259 pub fn nextch(rdr: &StringReader) -> Option<char> {
260 let offset = byte_offset(rdr, rdr.pos).to_uint();
261 if offset < rdr.filemap.src.len() {
262 Some(rdr.filemap.src.as_slice().char_at(offset))
263 } else {
264 None
265 }
266 }
267 pub fn nextch_is(rdr: &StringReader, c: char) -> bool {
268 nextch(rdr) == Some(c)
269 }
270
271 pub fn nextnextch(rdr: &StringReader) -> Option<char> {
272 let offset = byte_offset(rdr, rdr.pos).to_uint();
273 let s = rdr.filemap.deref().src.as_slice();
274 if offset >= s.len() { return None }
275 let str::CharRange { next, .. } = s.char_range_at(offset);
276 if next < s.len() {
277 Some(s.char_at(next))
278 } else {
279 None
280 }
281 }
282 pub fn nextnextch_is(rdr: &StringReader, c: char) -> bool {
283 nextnextch(rdr) == Some(c)
284 }
285
286 fn hex_digit_val(c: Option<char>) -> int {
287 let d = c.unwrap_or('\x00');
288
289 if in_range(c, '0', '9') { return (d as int) - ('0' as int); }
290 if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; }
291 if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; }
292 fail!();
293 }
294
295 pub fn is_whitespace(c: Option<char>) -> bool {
296 match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
297 ' ' | '\n' | '\t' | '\r' => true,
298 _ => false
299 }
300 }
301
302 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
303 match c {
304 Some(c) => lo <= c && c <= hi,
305 _ => false
306 }
307 }
308
309 fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
310
311 fn is_hex_digit(c: Option<char>) -> bool {
312 return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
313 in_range(c, 'A', 'F');
314 }
315
316 // EFFECT: eats whitespace and comments.
317 // returns a Some(sugared-doc-attr) if one exists, None otherwise.
318 fn consume_whitespace_and_comments(rdr: &mut StringReader)
319 -> Option<TokenAndSpan> {
320 while is_whitespace(rdr.curr) { bump(rdr); }
321 return consume_any_line_comment(rdr);
322 }
323
324 pub fn is_line_non_doc_comment(s: &str) -> bool {
325 s.starts_with("////")
326 }
327
328 // PRECONDITION: rdr.curr is not whitespace
329 // EFFECT: eats any kind of comment.
330 // returns a Some(sugared-doc-attr) if one exists, None otherwise
331 fn consume_any_line_comment(rdr: &mut StringReader)
332 -> Option<TokenAndSpan> {
333 if rdr.curr_is('/') {
334 match nextch(rdr) {
335 Some('/') => {
336 bump(rdr);
337 bump(rdr);
338 // line comments starting with "///" or "//!" are doc-comments
339 if rdr.curr_is('/') || rdr.curr_is('!') {
340 let start_bpos = rdr.pos - BytePos(3);
341 while !rdr.curr_is('\n') && !is_eof(rdr) {
342 bump(rdr);
343 }
344 let ret = with_str_from(rdr, start_bpos, |string| {
345 // but comments with only more "/"s are not
346 if !is_line_non_doc_comment(string) {
347 Some(TokenAndSpan{
348 tok: token::DOC_COMMENT(str_to_ident(string)),
349 sp: codemap::mk_sp(start_bpos, rdr.pos)
350 })
351 } else {
352 None
353 }
354 });
355
356 if ret.is_some() {
357 return ret;
358 }
359 } else {
360 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
361 }
362 // Restart whitespace munch.
363 consume_whitespace_and_comments(rdr)
364 }
365 Some('*') => { bump(rdr); bump(rdr); consume_block_comment(rdr) }
366 _ => None
367 }
368 } else if rdr.curr_is('#') {
369 if nextch_is(rdr, '!') {
370
371 // Parse an inner attribute.
372 if nextnextch_is(rdr, '[') {
373 return None;
374 }
375
376 // I guess this is the only way to figure out if
377 // we're at the beginning of the file...
378 let cmap = CodeMap::new();
379 cmap.files.borrow_mut().push(rdr.filemap.clone());
380 let loc = cmap.lookup_char_pos_adj(rdr.last_pos);
381 if loc.line == 1u && loc.col == CharPos(0u) {
382 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
383 return consume_whitespace_and_comments(rdr);
384 }
385 }
386 None
387 } else {
388 None
389 }
390 }
391
392 pub fn is_block_non_doc_comment(s: &str) -> bool {
393 s.starts_with("/***")
394 }
395
396 // might return a sugared-doc-attr
397 fn consume_block_comment(rdr: &mut StringReader) -> Option<TokenAndSpan> {
398 // block comments starting with "/**" or "/*!" are doc-comments
399 let is_doc_comment = rdr.curr_is('*') || rdr.curr_is('!');
400 let start_bpos = rdr.pos - BytePos(if is_doc_comment {3} else {2});
401
402 let mut level: int = 1;
403 while level > 0 {
404 if is_eof(rdr) {
405 let msg = if is_doc_comment {
406 "unterminated block doc-comment".to_strbuf()
407 } else {
408 "unterminated block comment".to_strbuf()
409 };
410 fatal_span(rdr, start_bpos, rdr.last_pos, msg);
411 } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
412 level += 1;
413 bump(rdr);
414 bump(rdr);
415 } else if rdr.curr_is('*') && nextch_is(rdr, '/') {
416 level -= 1;
417 bump(rdr);
418 bump(rdr);
419 } else {
420 bump(rdr);
421 }
422 }
423
424 let res = if is_doc_comment {
425 with_str_from(rdr, start_bpos, |string| {
426 // but comments with only "*"s between two "/"s are not
427 if !is_block_non_doc_comment(string) {
428 Some(TokenAndSpan{
429 tok: token::DOC_COMMENT(str_to_ident(string)),
430 sp: codemap::mk_sp(start_bpos, rdr.pos)
431 })
432 } else {
433 None
434 }
435 })
436 } else {
437 None
438 };
439
440 // restart whitespace munch.
441 if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
442 }
443
444 fn scan_exponent(rdr: &mut StringReader, start_bpos: BytePos) -> Option<StrBuf> {
445 // \x00 hits the `return None` case immediately, so this is fine.
446 let mut c = rdr.curr.unwrap_or('\x00');
447 let mut rslt = StrBuf::new();
448 if c == 'e' || c == 'E' {
449 rslt.push_char(c);
450 bump(rdr);
451 c = rdr.curr.unwrap_or('\x00');
452 if c == '-' || c == '+' {
453 rslt.push_char(c);
454 bump(rdr);
455 }
456 let exponent = scan_digits(rdr, 10u);
457 if exponent.len() > 0u {
458 rslt.push_str(exponent.as_slice());
459 return Some(rslt);
460 } else {
461 fatal_span(rdr, start_bpos, rdr.last_pos,
462 "scan_exponent: bad fp literal".to_strbuf());
463 }
464 } else {
465 return None::<StrBuf>;
466 }
467 }
468
469 fn scan_digits(rdr: &mut StringReader, radix: uint) -> StrBuf {
470 let mut rslt = StrBuf::new();
471 loop {
472 let c = rdr.curr;
473 if c == Some('_') { bump(rdr); continue; }
474 match c.and_then(|cc| char::to_digit(cc, radix)) {
475 Some(_) => {
476 rslt.push_char(c.unwrap());
477 bump(rdr);
478 }
479 _ => return rslt
480 }
481 };
482 }
483
484 fn check_float_base(rdr: &mut StringReader, start_bpos: BytePos, last_bpos: BytePos,
485 base: uint) {
486 match base {
487 16u => {
488 fatal_span(rdr, start_bpos, last_bpos,
489 "hexadecimal float literal is not supported".to_strbuf())
490 }
491 8u => fatal_span(rdr, start_bpos, last_bpos,
492 "octal float literal is not supported".to_strbuf()),
493 2u => fatal_span(rdr, start_bpos, last_bpos,
494 "binary float literal is not supported".to_strbuf()),
495 _ => ()
496 }
497 }
498
499 fn scan_number(c: char, rdr: &mut StringReader) -> token::Token {
500 let mut num_str;
501 let mut base = 10u;
502 let mut c = c;
503 let mut n = nextch(rdr).unwrap_or('\x00');
504 let start_bpos = rdr.last_pos;
505 if c == '0' && n == 'x' {
506 bump(rdr);
507 bump(rdr);
508 base = 16u;
509 } else if c == '0' && n == 'o' {
510 bump(rdr);
511 bump(rdr);
512 base = 8u;
513 } else if c == '0' && n == 'b' {
514 bump(rdr);
515 bump(rdr);
516 base = 2u;
517 }
518 num_str = scan_digits(rdr, base);
519 c = rdr.curr.unwrap_or('\x00');
520 nextch(rdr);
521 if c == 'u' || c == 'i' {
522 enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
523 let signed = c == 'i';
524 let mut tp = {
525 if signed { Signed(ast::TyI) }
526 else { Unsigned(ast::TyU) }
527 };
528 bump(rdr);
529 c = rdr.curr.unwrap_or('\x00');
530 if c == '8' {
531 bump(rdr);
532 tp = if signed { Signed(ast::TyI8) }
533 else { Unsigned(ast::TyU8) };
534 }
535 n = nextch(rdr).unwrap_or('\x00');
536 if c == '1' && n == '6' {
537 bump(rdr);
538 bump(rdr);
539 tp = if signed { Signed(ast::TyI16) }
540 else { Unsigned(ast::TyU16) };
541 } else if c == '3' && n == '2' {
542 bump(rdr);
543 bump(rdr);
544 tp = if signed { Signed(ast::TyI32) }
545 else { Unsigned(ast::TyU32) };
546 } else if c == '6' && n == '4' {
547 bump(rdr);
548 bump(rdr);
549 tp = if signed { Signed(ast::TyI64) }
550 else { Unsigned(ast::TyU64) };
551 }
552 if num_str.len() == 0u {
553 fatal_span(rdr, start_bpos, rdr.last_pos,
554 "no valid digits found for number".to_strbuf());
555 }
556 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
557 base as uint) {
558 Some(p) => p,
559 None => fatal_span(rdr, start_bpos, rdr.last_pos,
560 "int literal is too large".to_strbuf())
561 };
562
563 match tp {
564 Signed(t) => return token::LIT_INT(parsed as i64, t),
565 Unsigned(t) => return token::LIT_UINT(parsed, t)
566 }
567 }
568 let mut is_float = false;
569 if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
570 is_float = true;
571 bump(rdr);
572 let dec_part = scan_digits(rdr, 10u);
573 num_str.push_char('.');
574 num_str.push_str(dec_part.as_slice());
575 }
576 match scan_exponent(rdr, start_bpos) {
577 Some(ref s) => {
578 is_float = true;
579 num_str.push_str(s.as_slice());
580 }
581 None => ()
582 }
583
584 if rdr.curr_is('f') {
585 bump(rdr);
586 c = rdr.curr.unwrap_or('\x00');
587 n = nextch(rdr).unwrap_or('\x00');
588 if c == '3' && n == '2' {
589 bump(rdr);
590 bump(rdr);
591 check_float_base(rdr, start_bpos, rdr.last_pos, base);
592 return token::LIT_FLOAT(str_to_ident(num_str.into_owned()),
593 ast::TyF32);
594 } else if c == '6' && n == '4' {
595 bump(rdr);
596 bump(rdr);
597 check_float_base(rdr, start_bpos, rdr.last_pos, base);
598 return token::LIT_FLOAT(str_to_ident(num_str.into_owned()),
599 ast::TyF64);
600 /* FIXME (#2252): if this is out of range for either a
601 32-bit or 64-bit float, it won't be noticed till the
602 back-end. */
603 } else if c == '1' && n == '2' && nextnextch(rdr).unwrap_or('\x00') == '8' {
604 bump(rdr);
605 bump(rdr);
606 bump(rdr);
607 check_float_base(rdr, start_bpos, rdr.last_pos, base);
608 return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), ast::TyF128);
609 }
610 fatal_span(rdr, start_bpos, rdr.last_pos,
611 "expected `f32`, `f64` or `f128` suffix".to_strbuf());
612 }
613 if is_float {
614 check_float_base(rdr, start_bpos, rdr.last_pos, base);
615 return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(
616 num_str.into_owned()));
617 } else {
618 if num_str.len() == 0u {
619 fatal_span(rdr, start_bpos, rdr.last_pos,
620 "no valid digits found for number".to_strbuf());
621 }
622 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
623 base as uint) {
624 Some(p) => p,
625 None => fatal_span(rdr, start_bpos, rdr.last_pos,
626 "int literal is too large".to_strbuf())
627 };
628
629 debug!("lexing {} as an unsuffixed integer literal",
630 num_str.as_slice());
631 return token::LIT_INT_UNSUFFIXED(parsed as i64);
632 }
633 }
634
635 fn scan_numeric_escape(rdr: &mut StringReader, n_hex_digits: uint) -> char {
636 let mut accum_int = 0;
637 let mut i = n_hex_digits;
638 let start_bpos = rdr.last_pos;
639 while i != 0u && !is_eof(rdr) {
640 let n = rdr.curr;
641 if !is_hex_digit(n) {
642 fatal_span_char(
643 rdr,
644 rdr.last_pos,
645 rdr.pos,
646 "illegal character in numeric character escape".to_strbuf(),
647 n.unwrap());
648 }
649 bump(rdr);
650 accum_int *= 16;
651 accum_int += hex_digit_val(n);
652 i -= 1u;
653 }
654 if i != 0 && is_eof(rdr) {
655 fatal_span(rdr, start_bpos, rdr.last_pos,
656 "unterminated numeric character escape".to_strbuf());
657 }
658
659 match char::from_u32(accum_int as u32) {
660 Some(x) => x,
661 None => fatal_span(rdr, start_bpos, rdr.last_pos,
662 "illegal numeric character escape".to_strbuf())
663 }
664 }
665
666 fn ident_start(c: Option<char>) -> bool {
667 let c = match c { Some(c) => c, None => return false };
668
669 (c >= 'a' && c <= 'z')
670 || (c >= 'A' && c <= 'Z')
671 || c == '_'
672 || (c > '\x7f' && char::is_XID_start(c))
673 }
674
675 fn ident_continue(c: Option<char>) -> bool {
676 let c = match c { Some(c) => c, None => return false };
677
678 (c >= 'a' && c <= 'z')
679 || (c >= 'A' && c <= 'Z')
680 || (c >= '0' && c <= '9')
681 || c == '_'
682 || (c > '\x7f' && char::is_XID_continue(c))
683 }
684
685 // return the next token from the string
686 // EFFECT: advances the input past that token
687 // EFFECT: updates the interner
688 fn next_token_inner(rdr: &mut StringReader) -> token::Token {
689 let c = rdr.curr;
690 if ident_start(c) && !nextch_is(rdr, '"') && !nextch_is(rdr, '#') {
691 // Note: r as in r" or r#" is part of a raw string literal,
692 // not an identifier, and is handled further down.
693
694 let start = rdr.last_pos;
695 while ident_continue(rdr.curr) {
696 bump(rdr);
697 }
698
699 return with_str_from(rdr, start, |string| {
700 if string == "_" {
701 token::UNDERSCORE
702 } else {
703 let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':');
704
705 // FIXME: perform NFKC normalization here. (Issue #2253)
706 token::IDENT(str_to_ident(string), is_mod_name)
707 }
708 })
709 }
710 if is_dec_digit(c) {
711 return scan_number(c.unwrap(), rdr);
712 }
713 fn binop(rdr: &mut StringReader, op: token::BinOp) -> token::Token {
714 bump(rdr);
715 if rdr.curr_is('=') {
716 bump(rdr);
717 return token::BINOPEQ(op);
718 } else { return token::BINOP(op); }
719 }
720 match c.expect("next_token_inner called at EOF") {
721
722
723
724
725
726 // One-byte tokens.
727 ';' => { bump(rdr); return token::SEMI; }
728 ',' => { bump(rdr); return token::COMMA; }
729 '.' => {
730 bump(rdr);
731 return if rdr.curr_is('.') {
732 bump(rdr);
733 if rdr.curr_is('.') {
734 bump(rdr);
735 token::DOTDOTDOT
736 } else {
737 token::DOTDOT
738 }
739 } else {
740 token::DOT
741 };
742 }
743 '(' => { bump(rdr); return token::LPAREN; }
744 ')' => { bump(rdr); return token::RPAREN; }
745 '{' => { bump(rdr); return token::LBRACE; }
746 '}' => { bump(rdr); return token::RBRACE; }
747 '[' => { bump(rdr); return token::LBRACKET; }
748 ']' => { bump(rdr); return token::RBRACKET; }
749 '@' => { bump(rdr); return token::AT; }
750 '#' => { bump(rdr); return token::POUND; }
751 '~' => { bump(rdr); return token::TILDE; }
752 ':' => {
753 bump(rdr);
754 if rdr.curr_is(':') {
755 bump(rdr);
756 return token::MOD_SEP;
757 } else { return token::COLON; }
758 }
759
760 '$' => { bump(rdr); return token::DOLLAR; }
761
762
763
764
765
766 // Multi-byte tokens.
767 '=' => {
768 bump(rdr);
769 if rdr.curr_is('=') {
770 bump(rdr);
771 return token::EQEQ;
772 } else if rdr.curr_is('>') {
773 bump(rdr);
774 return token::FAT_ARROW;
775 } else {
776 return token::EQ;
777 }
778 }
779 '!' => {
780 bump(rdr);
781 if rdr.curr_is('=') {
782 bump(rdr);
783 return token::NE;
784 } else { return token::NOT; }
785 }
786 '<' => {
787 bump(rdr);
788 match rdr.curr.unwrap_or('\x00') {
789 '=' => { bump(rdr); return token::LE; }
790 '<' => { return binop(rdr, token::SHL); }
791 '-' => {
792 bump(rdr);
793 match rdr.curr.unwrap_or('\x00') {
794 '>' => { bump(rdr); return token::DARROW; }
795 _ => { return token::LARROW; }
796 }
797 }
798 _ => { return token::LT; }
799 }
800 }
801 '>' => {
802 bump(rdr);
803 match rdr.curr.unwrap_or('\x00') {
804 '=' => { bump(rdr); return token::GE; }
805 '>' => { return binop(rdr, token::SHR); }
806 _ => { return token::GT; }
807 }
808 }
809 '\'' => {
810 // Either a character constant 'a' OR a lifetime name 'abc
811 bump(rdr);
812 let start = rdr.last_pos;
813
814 // the eof will be picked up by the final `'` check below
815 let mut c2 = rdr.curr.unwrap_or('\x00');
816 bump(rdr);
817
818 // If the character is an ident start not followed by another single
819 // quote, then this is a lifetime name:
820 if ident_start(Some(c2)) && !rdr.curr_is('\'') {
821 while ident_continue(rdr.curr) {
822 bump(rdr);
823 }
824 let ident = with_str_from(rdr, start, |lifetime_name| {
825 str_to_ident(lifetime_name)
826 });
827 let tok = &token::IDENT(ident, false);
828
829 if token::is_keyword(token::keywords::Self, tok) {
830 fatal_span(rdr, start, rdr.last_pos,
831 "invalid lifetime name: 'self \
832 is no longer a special lifetime".to_strbuf());
833 } else if token::is_any_keyword(tok) &&
834 !token::is_keyword(token::keywords::Static, tok) {
835 fatal_span(rdr, start, rdr.last_pos,
836 "invalid lifetime name".to_strbuf());
837 } else {
838 return token::LIFETIME(ident);
839 }
840 }
841
842 // Otherwise it is a character constant:
843 match c2 {
844 '\\' => {
845 // '\X' for some X must be a character constant:
846 let escaped = rdr.curr;
847 let escaped_pos = rdr.last_pos;
848 bump(rdr);
849 match escaped {
850 None => {}
851 Some(e) => {
852 c2 = match e {
853 'n' => '\n',
854 'r' => '\r',
855 't' => '\t',
856 '\\' => '\\',
857 '\'' => '\'',
858 '"' => '"',
859 '0' => '\x00',
860 'x' => scan_numeric_escape(rdr, 2u),
861 'u' => scan_numeric_escape(rdr, 4u),
862 'U' => scan_numeric_escape(rdr, 8u),
863 c2 => {
864 fatal_span_char(rdr,
865 escaped_pos,
866 rdr.last_pos,
867 "unknown character \
868 escape".to_strbuf(),
869 c2)
870 }
871 }
872 }
873 }
874 }
875 '\t' | '\n' | '\r' | '\'' => {
876 fatal_span_char(
877 rdr,
878 start,
879 rdr.last_pos,
880 "character constant must be escaped".to_strbuf(),
881 c2);
882 }
883 _ => {}
884 }
885 if !rdr.curr_is('\'') {
886 fatal_span_verbose(rdr,
887 // Byte offsetting here is okay because the
888 // character before position `start` is an
889 // ascii single quote.
890 start - BytePos(1),
891 rdr.last_pos,
892 "unterminated character constant".to_strbuf());
893 }
894 bump(rdr); // advance curr past token
895 return token::LIT_CHAR(c2);
896 }
897 '"' => {
898 let mut accum_str = StrBuf::new();
899 let start_bpos = rdr.last_pos;
900 bump(rdr);
901 while !rdr.curr_is('"') {
902 if is_eof(rdr) {
903 fatal_span(rdr, start_bpos, rdr.last_pos,
904 "unterminated double quote string".to_strbuf());
905 }
906
907 let ch = rdr.curr.unwrap();
908 bump(rdr);
909 match ch {
910 '\\' => {
911 if is_eof(rdr) {
912 fatal_span(rdr, start_bpos, rdr.last_pos,
913 "unterminated double quote string".to_strbuf());
914 }
915
916 let escaped = rdr.curr.unwrap();
917 let escaped_pos = rdr.last_pos;
918 bump(rdr);
919 match escaped {
920 'n' => accum_str.push_char('\n'),
921 'r' => accum_str.push_char('\r'),
922 't' => accum_str.push_char('\t'),
923 '\\' => accum_str.push_char('\\'),
924 '\'' => accum_str.push_char('\''),
925 '"' => accum_str.push_char('"'),
926 '\n' => consume_whitespace(rdr),
927 '0' => accum_str.push_char('\x00'),
928 'x' => {
929 accum_str.push_char(scan_numeric_escape(rdr, 2u));
930 }
931 'u' => {
932 accum_str.push_char(scan_numeric_escape(rdr, 4u));
933 }
934 'U' => {
935 accum_str.push_char(scan_numeric_escape(rdr, 8u));
936 }
937 c2 => {
938 fatal_span_char(rdr, escaped_pos, rdr.last_pos,
939 "unknown string escape".to_strbuf(), c2);
940 }
941 }
942 }
943 _ => accum_str.push_char(ch)
944 }
945 }
946 bump(rdr);
947 return token::LIT_STR(str_to_ident(accum_str.as_slice()));
948 }
949 'r' => {
950 let start_bpos = rdr.last_pos;
951 bump(rdr);
952 let mut hash_count = 0u;
953 while rdr.curr_is('#') {
954 bump(rdr);
955 hash_count += 1;
956 }
957
958 if is_eof(rdr) {
959 fatal_span(rdr, start_bpos, rdr.last_pos,
960 "unterminated raw string".to_strbuf());
961 } else if !rdr.curr_is('"') {
962 fatal_span_char(rdr, start_bpos, rdr.last_pos,
963 "only `#` is allowed in raw string delimitation; \
964 found illegal character".to_strbuf(),
965 rdr.curr.unwrap());
966 }
967 bump(rdr);
968 let content_start_bpos = rdr.last_pos;
969 let mut content_end_bpos;
970 'outer: loop {
971 if is_eof(rdr) {
972 fatal_span(rdr, start_bpos, rdr.last_pos,
973 "unterminated raw string".to_strbuf());
974 }
975 if rdr.curr_is('"') {
976 content_end_bpos = rdr.last_pos;
977 for _ in range(0, hash_count) {
978 bump(rdr);
979 if !rdr.curr_is('#') {
980 continue 'outer;
981 }
982 }
983 break;
984 }
985 bump(rdr);
986 }
987 bump(rdr);
988 let str_content = with_str_from_to(rdr,
989 content_start_bpos,
990 content_end_bpos,
991 str_to_ident);
992 return token::LIT_STR_RAW(str_content, hash_count);
993 }
994 '-' => {
995 if nextch_is(rdr, '>') {
996 bump(rdr);
997 bump(rdr);
998 return token::RARROW;
999 } else { return binop(rdr, token::MINUS); }
1000 }
1001 '&' => {
1002 if nextch_is(rdr, '&') {
1003 bump(rdr);
1004 bump(rdr);
1005 return token::ANDAND;
1006 } else { return binop(rdr, token::AND); }
1007 }
1008 '|' => {
1009 match nextch(rdr) {
1010 Some('|') => { bump(rdr); bump(rdr); return token::OROR; }
1011 _ => { return binop(rdr, token::OR); }
1012 }
1013 }
1014 '+' => { return binop(rdr, token::PLUS); }
1015 '*' => { return binop(rdr, token::STAR); }
1016 '/' => { return binop(rdr, token::SLASH); }
1017 '^' => { return binop(rdr, token::CARET); }
1018 '%' => { return binop(rdr, token::PERCENT); }
1019 c => {
1020 fatal_span_char(rdr, rdr.last_pos, rdr.pos,
1021 "unknown start of token".to_strbuf(), c);
1022 }
1023 }
1024 }
1025
1026 fn consume_whitespace(rdr: &mut StringReader) {
1027 while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
1028 }
1029
1030 #[cfg(test)]
1031 mod test {
1032 use super::*;
1033
1034 use codemap::{BytePos, CodeMap, Span};
1035 use diagnostic;
1036 use parse::token;
1037 use parse::token::{str_to_ident};
1038 use std::io::util;
1039
1040 fn mk_sh() -> diagnostic::SpanHandler {
1041 let emitter = diagnostic::EmitterWriter::new(box util::NullWriter);
1042 let handler = diagnostic::mk_handler(box emitter);
1043 diagnostic::mk_span_handler(handler, CodeMap::new())
1044 }
1045
1046 // open a string reader for the given string
1047 fn setup<'a>(span_handler: &'a diagnostic::SpanHandler,
1048 teststr: StrBuf) -> StringReader<'a> {
1049 let fm = span_handler.cm.new_filemap("zebra.rs".to_strbuf(), teststr);
1050 new_string_reader(span_handler, fm)
1051 }
1052
1053 #[test] fn t1 () {
1054 let span_handler = mk_sh();
1055 let mut string_reader = setup(&span_handler,
1056 "/* my source file */ \
1057 fn main() { println!(\"zebra\"); }\n".to_strbuf());
1058 let id = str_to_ident("fn");
1059 let tok1 = string_reader.next_token();
1060 let tok2 = TokenAndSpan{
1061 tok:token::IDENT(id, false),
1062 sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}};
1063 assert_eq!(tok1,tok2);
1064 // the 'main' id is already read:
1065 assert_eq!(string_reader.last_pos.clone(), BytePos(28));
1066 // read another token:
1067 let tok3 = string_reader.next_token();
1068 let tok4 = TokenAndSpan{
1069 tok:token::IDENT(str_to_ident("main"), false),
1070 sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}};
1071 assert_eq!(tok3,tok4);
1072 // the lparen is already read:
1073 assert_eq!(string_reader.last_pos.clone(), BytePos(29))
1074 }
1075
1076 // check that the given reader produces the desired stream
1077 // of tokens (stop checking after exhausting the expected vec)
1078 fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) {
1079 for expected_tok in expected.iter() {
1080 assert_eq!(&string_reader.next_token().tok, expected_tok);
1081 }
1082 }
1083
1084 // make the identifier by looking up the string in the interner
1085 fn mk_ident (id: &str, is_mod_name: bool) -> token::Token {
1086 token::IDENT (str_to_ident(id),is_mod_name)
1087 }
1088
1089 #[test] fn doublecolonparsing () {
1090 check_tokenization(setup(&mk_sh(), "a b".to_strbuf()),
1091 vec!(mk_ident("a",false),
1092 mk_ident("b",false)));
1093 }
1094
1095 #[test] fn dcparsing_2 () {
1096 check_tokenization(setup(&mk_sh(), "a::b".to_strbuf()),
1097 vec!(mk_ident("a",true),
1098 token::MOD_SEP,
1099 mk_ident("b",false)));
1100 }
1101
1102 #[test] fn dcparsing_3 () {
1103 check_tokenization(setup(&mk_sh(), "a ::b".to_strbuf()),
1104 vec!(mk_ident("a",false),
1105 token::MOD_SEP,
1106 mk_ident("b",false)));
1107 }
1108
1109 #[test] fn dcparsing_4 () {
1110 check_tokenization(setup(&mk_sh(), "a:: b".to_strbuf()),
1111 vec!(mk_ident("a",true),
1112 token::MOD_SEP,
1113 mk_ident("b",false)));
1114 }
1115
1116 #[test] fn character_a() {
1117 assert_eq!(setup(&mk_sh(), "'a'".to_strbuf()).next_token().tok,
1118 token::LIT_CHAR('a'));
1119 }
1120
1121 #[test] fn character_space() {
1122 assert_eq!(setup(&mk_sh(), "' '".to_strbuf()).next_token().tok,
1123 token::LIT_CHAR(' '));
1124 }
1125
1126 #[test] fn character_escaped() {
1127 assert_eq!(setup(&mk_sh(), "'\\n'".to_strbuf()).next_token().tok,
1128 token::LIT_CHAR('\n'));
1129 }
1130
1131 #[test] fn lifetime_name() {
1132 assert_eq!(setup(&mk_sh(), "'abc".to_strbuf()).next_token().tok,
1133 token::LIFETIME(token::str_to_ident("abc")));
1134 }
1135
1136 #[test] fn raw_string() {
1137 assert_eq!(setup(&mk_sh(),
1138 "r###\"\"#a\\b\x00c\"\"###".to_strbuf()).next_token()
1139 .tok,
1140 token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3));
1141 }
1142
1143 #[test] fn line_doc_comments() {
1144 assert!(!is_line_non_doc_comment("///"));
1145 assert!(!is_line_non_doc_comment("/// blah"));
1146 assert!(is_line_non_doc_comment("////"));
1147 }
1148
1149 #[test] fn nested_block_comments() {
1150 assert_eq!(setup(&mk_sh(),
1151 "/* /* */ */'a'".to_strbuf()).next_token().tok,
1152 token::LIT_CHAR('a'));
1153 }
1154
1155 }
libsyntax/parse/lexer.rs:227:67-227:67 -fn- definition:
// discovered, add it to the FileMap's list of line start offsets.
pub fn bump(rdr: &mut StringReader) {
rdr.last_pos = rdr.pos;
references:- 110libsyntax/parse/comments.rs:
libsyntax/parse/lexer.rs:281:2-281:2 -fn- definition:
}
pub fn nextnextch_is(rdr: &StringReader, c: char) -> bool {
nextnextch(rdr) == Some(c)
references:- 2371: // Parse an inner attribute.
372: if nextnextch_is(rdr, '[') {
373: return None;
libsyntax/parse/comments.rs:
339: (rdr.curr_is('#') && nextch_is(rdr, '!') &&
340: !lexer::nextnextch_is(rdr, '['));
341: }
libsyntax/parse/lexer.rs:301:1-301:1 -fn- definition:
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
match c {
Some(c) => lo <= c && c <= hi,
references:- 7290: if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; }
291: if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; }
292: fail!();
--
311: fn is_hex_digit(c: Option<char>) -> bool {
312: return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
313: in_range(c, 'A', 'F');
314: }
libsyntax/parse/lexer.rs:468:1-468:1 -fn- definition:
fn scan_digits(rdr: &mut StringReader, radix: uint) -> StrBuf {
let mut rslt = StrBuf::new();
loop {
references:- 3571: bump(rdr);
572: let dec_part = scan_digits(rdr, 10u);
573: num_str.push_char('.');
libsyntax/parse/lexer.rs:197:1-197:1 -fn- definition:
fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
(pos - rdr.filemap.start_pos)
}
references:- 7222: byte_offset(rdr, start).to_uint(),
223: byte_offset(rdr, end).to_uint()))
224: }
--
229: rdr.last_pos = rdr.pos;
230: let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
231: if current_byte_offset < rdr.filemap.src.len() {
--
259: pub fn nextch(rdr: &StringReader) -> Option<char> {
260: let offset = byte_offset(rdr, rdr.pos).to_uint();
261: if offset < rdr.filemap.src.len() {
--
271: pub fn nextnextch(rdr: &StringReader) -> Option<char> {
272: let offset = byte_offset(rdr, rdr.pos).to_uint();
273: let s = rdr.filemap.deref().src.as_slice();
libsyntax/parse/lexer.rs:41:1-41:1 -struct- definition:
pub struct StringReader<'a> {
pub span_diagnostic: &'a SpanHandler,
// The absolute offset within the codemap of the next character to read
references:- 39libsyntax/parse/comments.rs:
libsyntax/parse/lexer.rs:138:58-138:58 -fn- definition:
// report a lexical error spanning [`from_pos`, `to_pos`)
fn fatal_span(rdr: &mut StringReader,
from_pos: BytePos,
references:- 20460: } else {
461: fatal_span(rdr, start_bpos, rdr.last_pos,
462: "scan_exponent: bad fp literal".to_strbuf());
--
624: Some(p) => p,
625: None => fatal_span(rdr, start_bpos, rdr.last_pos,
626: "int literal is too large".to_strbuf())
--
958: if is_eof(rdr) {
959: fatal_span(rdr, start_bpos, rdr.last_pos,
960: "unterminated raw string".to_strbuf());
--
971: if is_eof(rdr) {
972: fatal_span(rdr, start_bpos, rdr.last_pos,
973: "unterminated raw string".to_strbuf());
libsyntax/parse/lexer.rs:36:29-36:29 -struct- definition:
pub struct TokenAndSpan {
pub tok: token::Token,
pub sp: Span,
references:- 34libsyntax/parse/parser.rs:
libsyntax/ext/tt/transcribe.rs:
libsyntax/parse/lexer.rs:
libsyntax/parse/parser.rs:
libsyntax/parse/comments.rs:
libsyntax/ext/tt/transcribe.rs:
libsyntax/ext/tt/macro_parser.rs:
libsyntax/parse/lexer.rs:
libsyntax/parse/lexer.rs:149:42-149:42 -fn- definition:
// escaped character to the error message
fn fatal_span_char(rdr: &mut StringReader,
from_pos: BytePos,
references:- 6875: '\t' | '\n' | '\r' | '\'' => {
876: fatal_span_char(
877: rdr,
--
937: c2 => {
938: fatal_span_char(rdr, escaped_pos, rdr.last_pos,
939: "unknown string escape".to_strbuf(), c2);
--
961: } else if !rdr.curr_is('"') {
962: fatal_span_char(rdr, start_bpos, rdr.last_pos,
963: "only `#` is allowed in raw string delimitation; \
--
1019: c => {
1020: fatal_span_char(rdr, rdr.last_pos, rdr.pos,
1021: "unknown start of token".to_strbuf(), c);
libsyntax/parse/lexer.rs:214:31-214:31 -fn- definition:
/// up to but excluding `end`.
fn with_str_from_to<T>(
rdr: &StringReader,
references:- 2209: -> T {
210: with_str_from_to(rdr, start, rdr.last_pos, f)
211: }
--
987: bump(rdr);
988: let str_content = with_str_from_to(rdr,
989: content_start_bpos,
libsyntax/parse/lexer.rs:72:65-72:65 -fn- definition:
/* For comments.rs, which hackily pokes into 'pos' and 'curr' */
pub fn new_low_level_string_reader<'a>(span_diagnostic: &'a SpanHandler,
filemap: Rc<codemap::FileMap>)
references:- 266: -> StringReader<'a> {
67: let mut r = new_low_level_string_reader(span_diagnostic, filemap);
68: string_advance_token(&mut r); /* fill in peek_* */
libsyntax/parse/comments.rs:
373: let filemap = cm.new_filemap(path, src);
374: let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
libsyntax/parse/lexer.rs:391:1-391:1 -fn- definition:
pub fn is_block_non_doc_comment(s: &str) -> bool {
s.starts_with("/***")
}
references:- 3426: // but comments with only "*"s between two "/"s are not
427: if !is_block_non_doc_comment(string) {
428: Some(TokenAndSpan{
libsyntax/parse/comments.rs:
283: }
284: if !is_block_non_doc_comment(curr_line.as_slice()) {
285: return
libsyntax/parse/lexer.rs:674:1-674:1 -fn- definition:
fn ident_continue(c: Option<char>) -> bool {
let c = match c { Some(c) => c, None => return false };
(c >= 'a' && c <= 'z')
references:- 2820: if ident_start(Some(c2)) && !rdr.curr_is('\'') {
821: while ident_continue(rdr.curr) {
822: bump(rdr);
libsyntax/parse/lexer.rs:204:30-204:30 -fn- definition:
/// the character `rdr.curr`.
pub fn with_str_from<T>(
rdr: &StringReader,
references:- 5424: let res = if is_doc_comment {
425: with_str_from(rdr, start_bpos, |string| {
426: // but comments with only "*"s between two "/"s are not
--
699: return with_str_from(rdr, start, |string| {
700: if string == "_" {
--
823: }
824: let ident = with_str_from(rdr, start, |lifetime_name| {
825: str_to_ident(lifetime_name)
libsyntax/parse/comments.rs:
399: if token::is_lit(&tok) {
400: with_str_from(&rdr, bstart, |s| {
401: debug!("tok lit: {}", s);
libsyntax/parse/lexer.rs:323:1-323:1 -fn- definition:
pub fn is_line_non_doc_comment(s: &str) -> bool {
s.starts_with("////")
}
references:- 2libsyntax/parse/comments.rs:
40: pub fn is_doc_comment(s: &str) -> bool {
41: (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
42: s.starts_with("//!") ||
libsyntax/parse/lexer.rs:
345: // but comments with only more "/"s are not
346: if !is_line_non_doc_comment(string) {
347: Some(TokenAndSpan{
libsyntax/parse/lexer.rs:483:1-483:1 -fn- definition:
fn check_float_base(rdr: &mut StringReader, start_bpos: BytePos, last_bpos: BytePos,
base: uint) {
match base {
references:- 4606: bump(rdr);
607: check_float_base(rdr, start_bpos, rdr.last_pos, base);
608: return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), ast::TyF128);
--
613: if is_float {
614: check_float_base(rdr, start_bpos, rdr.last_pos, base);
615: return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(
libsyntax/parse/lexer.rs:317:67-317:67 -fn- definition:
// returns a Some(sugared-doc-attr) if one exists, None otherwise.
fn consume_whitespace_and_comments(rdr: &mut StringReader)
-> Option<TokenAndSpan> {
references:- 4179: fn string_advance_token(r: &mut StringReader) {
180: match consume_whitespace_and_comments(r) {
181: Some(comment) => {
--
382: while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
383: return consume_whitespace_and_comments(rdr);
384: }
--
440: // restart whitespace munch.
441: if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
442: }
libsyntax/parse/lexer.rs:254:1-254:1 -fn- definition:
pub fn is_eof(rdr: &StringReader) -> bool {
rdr.curr.is_none()
}
references:- 2193: impl<'a> Reader for StringReader<'a> {
94: fn is_eof(&self) -> bool { is_eof(self) }
95: // return the next token. EFFECT: advances the string_reader.
--
653: }
654: if i != 0 && is_eof(rdr) {
655: fatal_span(rdr, start_bpos, rdr.last_pos,
--
1026: fn consume_whitespace(rdr: &mut StringReader) {
1027: while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
1028: }
libsyntax/parse/comments.rs:
378: let mut first_read: bool = true;
379: while !is_eof(&rdr) {
380: loop {
libsyntax/parse/lexer.rs:634:1-634:1 -fn- definition:
fn scan_numeric_escape(rdr: &mut StringReader, n_hex_digits: uint) -> char {
let mut accum_int = 0;
let mut i = n_hex_digits;
references:- 6928: 'x' => {
929: accum_str.push_char(scan_numeric_escape(rdr, 2u));
930: }
--
934: 'U' => {
935: accum_str.push_char(scan_numeric_escape(rdr, 8u));
936: }
libsyntax/parse/lexer.rs:266:2-266:2 -fn- definition:
}
pub fn nextch_is(rdr: &StringReader, c: char) -> bool {
nextch(rdr) == Some(c)
references:- 20702: } else {
703: let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':');
--
1001: '&' => {
1002: if nextch_is(rdr, '&') {
1003: bump(rdr);
libsyntax/parse/comments.rs:
348: read_line_comments(rdr, code_to_the_left, comments);
349: } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
350: read_block_comment(rdr, code_to_the_left, comments);
351: } else if rdr.curr_is('#') && nextch_is(rdr, '!') {
352: read_shebang_comment(rdr, code_to_the_left, comments);
libsyntax/parse/lexer.rs:270:1-270:1 -fn- definition:
pub fn nextnextch(rdr: &StringReader) -> Option<char> {
let offset = byte_offset(rdr, rdr.pos).to_uint();
let s = rdr.filemap.deref().src.as_slice();
references:- 2602: back-end. */
603: } else if c == '1' && n == '2' && nextnextch(rdr).unwrap_or('\x00') == '8' {
604: bump(rdr);
libsyntax/parse/lexer.rs:665:1-665:1 -fn- definition:
fn ident_start(c: Option<char>) -> bool {
let c = match c { Some(c) => c, None => return false };
(c >= 'a' && c <= 'z')
references:- 3568: let mut is_float = false;
569: if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
570: is_float = true;
--
819: // quote, then this is a lifetime name:
820: if ident_start(Some(c2)) && !rdr.curr_is('\'') {
821: while ident_continue(rdr.curr) {
libsyntax/parse/lexer.rs:258:1-258:1 -fn- definition:
pub fn nextch(rdr: &StringReader) -> Option<char> {
let offset = byte_offset(rdr, rdr.pos).to_uint();
if offset < rdr.filemap.src.len() {
references:- 8586: c = rdr.curr.unwrap_or('\x00');
587: n = nextch(rdr).unwrap_or('\x00');
588: if c == '3' && n == '2' {
--
1008: '|' => {
1009: match nextch(rdr) {
1010: Some('|') => { bump(rdr); bump(rdr); return token::OROR; }
libsyntax/parse/lexer.rs:713:4-713:4 -fn- definition:
fn binop(rdr: &mut StringReader, op: token::BinOp) -> token::Token {
bump(rdr);
if rdr.curr_is('=') {
references:- 10998: return token::RARROW;
999: } else { return binop(rdr, token::MINUS); }
1000: }
--
1014: '+' => { return binop(rdr, token::PLUS); }
1015: '*' => { return binop(rdr, token::STAR); }
1016: '/' => { return binop(rdr, token::SLASH); }
1017: '^' => { return binop(rdr, token::CARET); }
1018: '%' => { return binop(rdr, token::PERCENT); }
1019: c => {
libsyntax/parse/lexer.rs:178:39-178:39 -fn- definition:
// EFFECT: update the interner, maybe.
fn string_advance_token(r: &mut StringReader) {
match consume_whitespace_and_comments(r) {
references:- 267: let mut r = new_low_level_string_reader(span_diagnostic, filemap);
68: string_advance_token(&mut r); /* fill in peek_* */
69: r
--
100: };
101: string_advance_token(self);
102: ret_val
libsyntax/parse/lexer.rs:27:1-27:1 -trait- definition:
pub trait Reader {
fn is_eof(&self) -> bool;
fn next_token(&mut self) -> TokenAndSpan;
references:- 493: impl<'a> Reader for StringReader<'a> {
94: fn is_eof(&self) -> bool { is_eof(self) }
libsyntax/parse/parser.rs:
278: cfg: ast::CrateConfig,
279: mut rdr: Box<Reader:>)
280: -> Parser<'a> {
--
330: pub quote_depth: uint, // not (yet) related to the quasiquoter
331: pub reader: Box<Reader:>,
332: pub interner: Rc<token::IdentInterner>,
libsyntax/parse/lexer.rs:
117: impl<'a> Reader for TtReader<'a> {
118: fn is_eof(&self) -> bool {
libsyntax/parse/lexer.rs:294:1-294:1 -fn- definition:
pub fn is_whitespace(c: Option<char>) -> bool {
match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
' ' | '\n' | '\t' | '\r' => true,
references:- 4319: -> Option<TokenAndSpan> {
320: while is_whitespace(rdr.curr) { bump(rdr); }
321: return consume_any_line_comment(rdr);
libsyntax/parse/comments.rs:
163: fn consume_non_eol_whitespace(rdr: &mut StringReader) {
164: while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
165: bump(rdr);
--
179: comments: &mut Vec<Comment>) {
180: while is_whitespace(rdr.curr) && !is_eof(rdr) {
181: if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
libsyntax/parse/lexer.rs:
1026: fn consume_whitespace(rdr: &mut StringReader) {
1027: while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
1028: }