(index<- ) ./libcore/str.rs
git branch: * master 5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
modified: Fri May 9 13:02:28 2014
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! String manipulation
12 //!
13 //! For more details, see std::str
14
15 use cast::transmute;
16 use cast;
17 use char;
18 use clone::Clone;
19 use cmp::{Eq, TotalEq};
20 use container::Container;
21 use default::Default;
22 use iter::{Filter, Map, Iterator};
23 use iter::{Rev, DoubleEndedIterator, ExactSize};
24 use num::Saturating;
25 use option::{None, Option, Some};
26 use raw::Repr;
27 use slice::{ImmutableVector, Vector};
28 use slice;
29
30 /*
31 Section: Creating a string
32 */
33
34 /// Converts a vector to a string slice without performing any allocations.
35 ///
36 /// Once the slice has been validated as utf-8, it is transmuted in-place and
37 /// returned as a '&str' instead of a '&[u8]'
38 ///
39 /// Returns None if the slice is not utf-8.
40 pub fn from_utf8<'a>(v: &'a [u8]) -> Option<&'a str> {
41 if is_utf8(v) {
42 Some(unsafe { raw::from_utf8(v) })
43 } else { None }
44 }
45
46 /// Something that can be used to compare against a character
47 pub trait CharEq {
48 /// Determine if the splitter should split at the given character
49 fn matches(&mut self, char) -> bool;
50 /// Indicate if this is only concerned about ASCII characters,
51 /// which can allow for a faster implementation.
52 fn only_ascii(&self) -> bool;
53 }
54
55 impl CharEq for char {
56 #[inline]
57 fn matches(&mut self, c: char) -> bool { *self == c }
58
59 #[inline]
60 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
61 }
62
63 impl<'a> CharEq for |char|: 'a -> bool {
64 #[inline]
65 fn matches(&mut self, c: char) -> bool { (*self)(c) }
66
67 #[inline]
68 fn only_ascii(&self) -> bool { false }
69 }
70
71 impl CharEq for extern "Rust" fn(char) -> bool {
72 #[inline]
73 fn matches(&mut self, c: char) -> bool { (*self)(c) }
74
75 #[inline]
76 fn only_ascii(&self) -> bool { false }
77 }
78
79 impl<'a> CharEq for &'a [char] {
80 #[inline]
81 fn matches(&mut self, c: char) -> bool {
82 self.iter().any(|&mut m| m.matches(c))
83 }
84
85 #[inline]
86 fn only_ascii(&self) -> bool {
87 self.iter().all(|m| m.only_ascii())
88 }
89 }
90
91 /*
92 Section: Iterators
93 */
94
95 /// External iterator for a string's characters.
96 /// Use with the `std::iter` module.
97 #[deriving(Clone)]
98 pub struct Chars<'a> {
99 /// The slice remaining to be iterated
100 string: &'a str,
101 }
102
103 impl<'a> Iterator<char> for Chars<'a> {
104 #[inline]
105 fn next(&mut self) -> Option<char> {
106 // Decode the next codepoint, then update
107 // the slice to be just the remaining part
108 if self.string.len() != 0 {
109 let CharRange {ch, next} = self.string.char_range_at(0);
110 unsafe {
111 self.string = raw::slice_unchecked(self.string, next, self.string.len());
112 }
113 Some(ch)
114 } else {
115 None
116 }
117 }
118
119 #[inline]
120 fn size_hint(&self) -> (uint, Option<uint>) {
121 (self.string.len().saturating_add(3)/4, Some(self.string.len()))
122 }
123 }
124
125 impl<'a> DoubleEndedIterator<char> for Chars<'a> {
126 #[inline]
127 fn next_back(&mut self) -> Option<char> {
128 if self.string.len() != 0 {
129 let CharRange {ch, next} = self.string.char_range_at_reverse(self.string.len());
130 unsafe {
131 self.string = raw::slice_unchecked(self.string, 0, next);
132 }
133 Some(ch)
134 } else {
135 None
136 }
137 }
138 }
139
140 /// External iterator for a string's characters and their byte offsets.
141 /// Use with the `std::iter` module.
142 #[deriving(Clone)]
143 pub struct CharOffsets<'a> {
144 /// The original string to be iterated
145 string: &'a str,
146 iter: Chars<'a>,
147 }
148
149 impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
150 #[inline]
151 fn next(&mut self) -> Option<(uint, char)> {
152 // Compute the byte offset by using the pointer offset between
153 // the original string slice and the iterator's remaining part
154 let offset = self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
155 self.iter.next().map(|ch| (offset, ch))
156 }
157
158 #[inline]
159 fn size_hint(&self) -> (uint, Option<uint>) {
160 self.iter.size_hint()
161 }
162 }
163
164 impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
165 #[inline]
166 fn next_back(&mut self) -> Option<(uint, char)> {
167 self.iter.next_back().map(|ch| {
168 let offset = self.iter.string.len() +
169 self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
170 (offset, ch)
171 })
172 }
173 }
174
175 #[deprecated = "replaced by Rev<Chars<'a>>"]
176 pub type RevChars<'a> = Rev<Chars<'a>>;
177
178 #[deprecated = "replaced by Rev<CharOffsets<'a>>"]
179 pub type RevCharOffsets<'a> = Rev<CharOffsets<'a>>;
180
181 /// External iterator for a string's bytes.
182 /// Use with the `std::iter` module.
183 pub type Bytes<'a> =
184 Map<'a, &'a u8, u8, slice::Items<'a, u8>>;
185
186 #[deprecated = "replaced by Rev<Bytes<'a>>"]
187 pub type RevBytes<'a> = Rev<Bytes<'a>>;
188
189 /// An iterator over the substrings of a string, separated by `sep`.
190 #[deriving(Clone)]
191 pub struct CharSplits<'a, Sep> {
192 /// The slice remaining to be iterated
193 string: &'a str,
194 sep: Sep,
195 /// Whether an empty string at the end is allowed
196 allow_trailing_empty: bool,
197 only_ascii: bool,
198 finished: bool,
199 }
200
201 #[deprecated = "replaced by Rev<CharSplits<'a, Sep>>"]
202 pub type RevCharSplits<'a, Sep> = Rev<CharSplits<'a, Sep>>;
203
204 /// An iterator over the substrings of a string, separated by `sep`,
205 /// splitting at most `count` times.
206 #[deriving(Clone)]
207 pub struct CharSplitsN<'a, Sep> {
208 iter: CharSplits<'a, Sep>,
209 /// The number of splits remaining
210 count: uint,
211 invert: bool,
212 }
213
214 /// An iterator over the words of a string, separated by a sequence of whitespace
215 pub type Words<'a> =
216 Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
217
218 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
219 pub type AnyLines<'a> =
220 Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
221
222 impl<'a, Sep> CharSplits<'a, Sep> {
223 #[inline]
224 fn get_end(&mut self) -> Option<&'a str> {
225 if !self.finished && (self.allow_trailing_empty || self.string.len() > 0) {
226 self.finished = true;
227 Some(self.string)
228 } else {
229 None
230 }
231 }
232 }
233
234 impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplits<'a, Sep> {
235 #[inline]
236 fn next(&mut self) -> Option<&'a str> {
237 if self.finished { return None }
238
239 let mut next_split = None;
240 if self.only_ascii {
241 for (idx, byte) in self.string.bytes().enumerate() {
242 if self.sep.matches(byte as char) && byte < 128u8 {
243 next_split = Some((idx, idx + 1));
244 break;
245 }
246 }
247 } else {
248 for (idx, ch) in self.string.char_indices() {
249 if self.sep.matches(ch) {
250 next_split = Some((idx, self.string.char_range_at(idx).next));
251 break;
252 }
253 }
254 }
255 match next_split {
256 Some((a, b)) => unsafe {
257 let elt = raw::slice_unchecked(self.string, 0, a);
258 self.string = raw::slice_unchecked(self.string, b, self.string.len());
259 Some(elt)
260 },
261 None => self.get_end(),
262 }
263 }
264 }
265
266 impl<'a, Sep: CharEq> DoubleEndedIterator<&'a str>
267 for CharSplits<'a, Sep> {
268 #[inline]
269 fn next_back(&mut self) -> Option<&'a str> {
270 if self.finished { return None }
271
272 if !self.allow_trailing_empty {
273 self.allow_trailing_empty = true;
274 match self.next_back() {
275 Some(elt) if !elt.is_empty() => return Some(elt),
276 _ => if self.finished { return None }
277 }
278 }
279 let len = self.string.len();
280 let mut next_split = None;
281
282 if self.only_ascii {
283 for (idx, byte) in self.string.bytes().enumerate().rev() {
284 if self.sep.matches(byte as char) && byte < 128u8 {
285 next_split = Some((idx, idx + 1));
286 break;
287 }
288 }
289 } else {
290 for (idx, ch) in self.string.char_indices().rev() {
291 if self.sep.matches(ch) {
292 next_split = Some((idx, self.string.char_range_at(idx).next));
293 break;
294 }
295 }
296 }
297 match next_split {
298 Some((a, b)) => unsafe {
299 let elt = raw::slice_unchecked(self.string, b, len);
300 self.string = raw::slice_unchecked(self.string, 0, a);
301 Some(elt)
302 },
303 None => { self.finished = true; Some(self.string) }
304 }
305 }
306 }
307
308 impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplitsN<'a, Sep> {
309 #[inline]
310 fn next(&mut self) -> Option<&'a str> {
311 if self.count != 0 {
312 self.count -= 1;
313 if self.invert { self.iter.next_back() } else { self.iter.next() }
314 } else {
315 self.iter.get_end()
316 }
317 }
318 }
319
320 /// An iterator over the start and end indices of the matches of a
321 /// substring within a larger string
322 #[deriving(Clone)]
323 pub struct MatchIndices<'a> {
324 haystack: &'a str,
325 needle: &'a str,
326 position: uint,
327 }
328
329 /// An iterator over the substrings of a string separated by a given
330 /// search string
331 #[deriving(Clone)]
332 pub struct StrSplits<'a> {
333 it: MatchIndices<'a>,
334 last_end: uint,
335 finished: bool
336 }
337
338 impl<'a> Iterator<(uint, uint)> for MatchIndices<'a> {
339 #[inline]
340 fn next(&mut self) -> Option<(uint, uint)> {
341 // See Issue #1932 for why this is a naive search
342 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
343 let mut match_start = 0;
344 let mut match_i = 0;
345
346 while self.position < h_len {
347 if self.haystack[self.position] == self.needle[match_i] {
348 if match_i == 0 { match_start = self.position; }
349 match_i += 1;
350 self.position += 1;
351
352 if match_i == n_len {
353 // found a match!
354 return Some((match_start, self.position));
355 }
356 } else {
357 // failed match, backtrack
358 if match_i > 0 {
359 match_i = 0;
360 self.position = match_start;
361 }
362 self.position += 1;
363 }
364 }
365 None
366 }
367 }
368
369 impl<'a> Iterator<&'a str> for StrSplits<'a> {
370 #[inline]
371 fn next(&mut self) -> Option<&'a str> {
372 if self.finished { return None; }
373
374 match self.it.next() {
375 Some((from, to)) => {
376 let ret = Some(self.it.haystack.slice(self.last_end, from));
377 self.last_end = to;
378 ret
379 }
380 None => {
381 self.finished = true;
382 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
383 }
384 }
385 }
386 }
387
388 /*
389 Section: Comparing strings
390 */
391
392 // share the implementation of the lang-item vs. non-lang-item
393 // eq_slice.
394 #[inline]
395 fn eq_slice_(a: &str, b: &str) -> bool {
396 #[allow(ctypes)]
397 extern { fn memcmp(s1: *i8, s2: *i8, n: uint) -> i32; }
398 a.len() == b.len() && unsafe {
399 memcmp(a.as_ptr() as *i8,
400 b.as_ptr() as *i8,
401 a.len()) == 0
402 }
403 }
404
405 /// Bytewise slice equality
406 #[cfg(not(test))]
407 #[lang="str_eq"]
408 #[inline]
409 pub fn eq_slice(a: &str, b: &str) -> bool {
410 eq_slice_(a, b)
411 }
412
413 /// Bytewise slice equality
414 #[cfg(test)]
415 #[inline]
416 pub fn eq_slice(a: &str, b: &str) -> bool {
417 eq_slice_(a, b)
418 }
419
420 /// Bytewise string equality
421 #[cfg(not(test))]
422 #[lang="uniq_str_eq"]
423 #[inline]
424 pub fn eq(a: &~str, b: &~str) -> bool {
425 eq_slice(*a, *b)
426 }
427
428 #[cfg(test)]
429 #[inline]
430 pub fn eq(a: &~str, b: &~str) -> bool {
431 eq_slice(*a, *b)
432 }
433
434 /*
435 Section: Misc
436 */
437
438 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
439 /// returning `true` in that case, or, if it is invalid, `false` with
440 /// `iter` reset such that it is pointing at the first byte in the
441 /// invalid sequence.
442 #[inline(always)]
443 fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>) -> bool {
444 loop {
445 // save the current thing we're pointing at.
446 let old = *iter;
447
448 // restore the iterator we had at the start of this codepoint.
449 macro_rules! err ( () => { {*iter = old; return false} });
450 macro_rules! next ( () => {
451 match iter.next() {
452 Some(a) => *a,
453 // we needed data, but there was none: error!
454 None => err!()
455 }
456 });
457
458 let first = match iter.next() {
459 Some(&b) => b,
460 // we're at the end of the iterator and a codepoint
461 // boundary at the same time, so this string is valid.
462 None => return true
463 };
464
465 // ASCII characters are always valid, so only large
466 // bytes need more examination.
467 if first >= 128 {
468 let w = utf8_char_width(first);
469 let second = next!();
470 // 2-byte encoding is for codepoints \u0080 to \u07ff
471 // first C2 80 last DF BF
472 // 3-byte encoding is for codepoints \u0800 to \uffff
473 // first E0 A0 80 last EF BF BF
474 // excluding surrogates codepoints \ud800 to \udfff
475 // ED A0 80 to ED BF BF
476 // 4-byte encoding is for codepoints \u10000 to \u10ffff
477 // first F0 90 80 80 last F4 8F BF BF
478 //
479 // Use the UTF-8 syntax from the RFC
480 //
481 // https://tools.ietf.org/html/rfc3629
482 // UTF8-1 = %x00-7F
483 // UTF8-2 = %xC2-DF UTF8-tail
484 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
485 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
486 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
487 // %xF4 %x80-8F 2( UTF8-tail )
488 match w {
489 2 => if second & 192 != TAG_CONT_U8 {err!()},
490 3 => {
491 match (first, second, next!() & 192) {
492 (0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) |
493 (0xE1 .. 0xEC, 0x80 .. 0xBF, TAG_CONT_U8) |
494 (0xED , 0x80 .. 0x9F, TAG_CONT_U8) |
495 (0xEE .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => {}
496 _ => err!()
497 }
498 }
499 4 => {
500 match (first, second, next!() & 192, next!() & 192) {
501 (0xF0 , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
502 (0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
503 (0xF4 , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
504 _ => err!()
505 }
506 }
507 _ => err!()
508 }
509 }
510 }
511 }
512
513 /// Determines if a vector of bytes contains valid UTF-8.
514 pub fn is_utf8(v: &[u8]) -> bool {
515 run_utf8_validation_iterator(&mut v.iter())
516 }
517
518 /// Determines if a vector of `u16` contains valid UTF-16
519 pub fn is_utf16(v: &[u16]) -> bool {
520 let mut it = v.iter();
521 macro_rules! next ( ($ret:expr) => {
522 match it.next() { Some(u) => *u, None => return $ret }
523 }
524 )
525 loop {
526 let u = next!(true);
527
528 match char::from_u32(u as u32) {
529 Some(_) => {}
530 None => {
531 let u2 = next!(false);
532 if u < 0xD7FF || u > 0xDBFF ||
533 u2 < 0xDC00 || u2 > 0xDFFF { return false; }
534 }
535 }
536 }
537 }
538
539 /// An iterator that decodes UTF-16 encoded codepoints from a vector
540 /// of `u16`s.
541 #[deriving(Clone)]
542 pub struct UTF16Items<'a> {
543 iter: slice::Items<'a, u16>
544 }
545 /// The possibilities for values decoded from a `u16` stream.
546 #[deriving(Eq, TotalEq, Clone)]
547 pub enum UTF16Item {
548 /// A valid codepoint.
549 ScalarValue(char),
550 /// An invalid surrogate without its pair.
551 LoneSurrogate(u16)
552 }
553
554 impl UTF16Item {
555 /// Convert `self` to a `char`, taking `LoneSurrogate`s to the
556 /// replacement character (U+FFFD).
557 #[inline]
558 pub fn to_char_lossy(&self) -> char {
559 match *self {
560 ScalarValue(c) => c,
561 LoneSurrogate(_) => '\uFFFD'
562 }
563 }
564 }
565
566 impl<'a> Iterator<UTF16Item> for UTF16Items<'a> {
567 fn next(&mut self) -> Option<UTF16Item> {
568 let u = match self.iter.next() {
569 Some(u) => *u,
570 None => return None
571 };
572
573 if u < 0xD800 || 0xDFFF < u {
574 // not a surrogate
575 Some(ScalarValue(unsafe {cast::transmute(u as u32)}))
576 } else if u >= 0xDC00 {
577 // a trailing surrogate
578 Some(LoneSurrogate(u))
579 } else {
580 // preserve state for rewinding.
581 let old = self.iter;
582
583 let u2 = match self.iter.next() {
584 Some(u2) => *u2,
585 // eof
586 None => return Some(LoneSurrogate(u))
587 };
588 if u2 < 0xDC00 || u2 > 0xDFFF {
589 // not a trailing surrogate so we're not a valid
590 // surrogate pair, so rewind to redecode u2 next time.
591 self.iter = old;
592 return Some(LoneSurrogate(u))
593 }
594
595 // all ok, so lets decode it.
596 let c = ((u - 0xD800) as u32 << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
597 Some(ScalarValue(unsafe {cast::transmute(c)}))
598 }
599 }
600
601 #[inline]
602 fn size_hint(&self) -> (uint, Option<uint>) {
603 let (low, high) = self.iter.size_hint();
604 // we could be entirely valid surrogates (2 elements per
605 // char), or entirely non-surrogates (1 element per char)
606 (low / 2, high)
607 }
608 }
609
610 /// Create an iterator over the UTF-16 encoded codepoints in `v`,
611 /// returning invalid surrogates as `LoneSurrogate`s.
612 ///
613 /// # Example
614 ///
615 /// ```rust
616 /// use std::str;
617 /// use std::str::{ScalarValue, LoneSurrogate};
618 ///
619 /// // ðmus<invalid>ic<invalid>
620 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
621 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
622 /// 0xD834];
623 ///
624 /// assert_eq!(str::utf16_items(v).collect::<Vec<_>>(),
625 /// vec![ScalarValue('ð'),
626 /// ScalarValue('m'), ScalarValue('u'), ScalarValue('s'),
627 /// LoneSurrogate(0xDD1E),
628 /// ScalarValue('i'), ScalarValue('c'),
629 /// LoneSurrogate(0xD834)]);
630 /// ```
631 pub fn utf16_items<'a>(v: &'a [u16]) -> UTF16Items<'a> {
632 UTF16Items { iter : v.iter() }
633 }
634
635 /// Return a slice of `v` ending at (and not including) the first NUL
636 /// (0).
637 ///
638 /// # Example
639 ///
640 /// ```rust
641 /// use std::str;
642 ///
643 /// // "abcd"
644 /// let mut v = ['a' as u16, 'b' as u16, 'c' as u16, 'd' as u16];
645 /// // no NULs so no change
646 /// assert_eq!(str::truncate_utf16_at_nul(v), v.as_slice());
647 ///
648 /// // "ab\0d"
649 /// v[2] = 0;
650 /// assert_eq!(str::truncate_utf16_at_nul(v),
651 /// &['a' as u16, 'b' as u16]);
652 /// ```
653 pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] {
654 match v.iter().position(|c| *c == 0) {
655 // don't include the 0
656 Some(i) => v.slice_to(i),
657 None => v
658 }
659 }
660
661 // https://tools.ietf.org/html/rfc3629
662 static UTF8_CHAR_WIDTH: [u8, ..256] = [
663 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
664 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
665 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
666 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
667 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
668 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
669 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
670 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
671 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
672 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
673 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
674 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
675 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
676 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
677 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
678 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
679 ];
680
681 /// Given a first byte, determine how many bytes are in this UTF-8 character
682 #[inline]
683 pub fn utf8_char_width(b: u8) -> uint {
684 return UTF8_CHAR_WIDTH[b as uint] as uint;
685 }
686
687 /// Struct that contains a `char` and the index of the first byte of
688 /// the next `char` in a string. This can be used as a data structure
689 /// for iterating over the UTF-8 bytes of a string.
690 pub struct CharRange {
691 /// Current `char`
692 pub ch: char,
693 /// Index of the first byte of the next `char`
694 pub next: uint,
695 }
696
697 // Return the initial codepoint accumulator for the first byte.
698 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
699 // for width 3, and 3 bits for width 4
700 macro_rules! utf8_first_byte(
701 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
702 )
703
704 // return the value of $ch updated with continuation byte $byte
705 macro_rules! utf8_acc_cont_byte(
706 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
707 )
708
709 static TAG_CONT_U8: u8 = 128u8;
710
711 /// Unsafe operations
712 pub mod raw {
713 use cast;
714 use container::Container;
715 use iter::Iterator;
716 use ptr::RawPtr;
717 use raw::Slice;
718 use slice::{ImmutableVector};
719 use str::{is_utf8, StrSlice};
720
721 /// Converts a slice of bytes to a string slice without checking
722 /// that the string contains valid UTF-8.
723 pub unsafe fn from_utf8<'a>(v: &'a [u8]) -> &'a str {
724 cast::transmute(v)
725 }
726
727 /// Form a slice from a C string. Unsafe because the caller must ensure the
728 /// C string has the static lifetime, or else the return value may be
729 /// invalidated later.
730 pub unsafe fn c_str_to_static_slice(s: *i8) -> &'static str {
731 let s = s as *u8;
732 let mut curr = s;
733 let mut len = 0u;
734 while *curr != 0u8 {
735 len += 1u;
736 curr = s.offset(len as int);
737 }
738 let v = Slice { data: s, len: len };
739 assert!(is_utf8(::cast::transmute(v)));
740 ::cast::transmute(v)
741 }
742
743 /// Takes a bytewise (not UTF-8) slice from a string.
744 ///
745 /// Returns the substring from [`begin`..`end`).
746 ///
747 /// # Failure
748 ///
749 /// If begin is greater than end.
750 /// If end is greater than the length of the string.
751 #[inline]
752 pub unsafe fn slice_bytes<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
753 assert!(begin <= end);
754 assert!(end <= s.len());
755 slice_unchecked(s, begin, end)
756 }
757
758 /// Takes a bytewise (not UTF-8) slice from a string.
759 ///
760 /// Returns the substring from [`begin`..`end`).
761 ///
762 /// Caller must check slice boundaries!
763 #[inline]
764 pub unsafe fn slice_unchecked<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
765 cast::transmute(Slice {
766 data: s.as_ptr().offset(begin as int),
767 len: end - begin,
768 })
769 }
770 }
771
772 /*
773 Section: Trait implementations
774 */
775
776 #[cfg(not(test))]
777 #[allow(missing_doc)]
778 pub mod traits {
779 use container::Container;
780 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
781 use iter::Iterator;
782 use option::{Some, None};
783 use str::{Str, StrSlice, eq_slice};
784
785 impl<'a> TotalOrd for &'a str {
786 #[inline]
787 fn cmp(&self, other: & &'a str) -> Ordering {
788 for (s_b, o_b) in self.bytes().zip(other.bytes()) {
789 match s_b.cmp(&o_b) {
790 Greater => return Greater,
791 Less => return Less,
792 Equal => ()
793 }
794 }
795
796 self.len().cmp(&other.len())
797 }
798 }
799
800 impl TotalOrd for ~str {
801 #[inline]
802 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
803 }
804
805 impl<'a> Eq for &'a str {
806 #[inline]
807 fn eq(&self, other: & &'a str) -> bool {
808 eq_slice((*self), (*other))
809 }
810 #[inline]
811 fn ne(&self, other: & &'a str) -> bool { !(*self).eq(other) }
812 }
813
814 impl Eq for ~str {
815 #[inline]
816 fn eq(&self, other: &~str) -> bool {
817 eq_slice((*self), (*other))
818 }
819 }
820
821 impl<'a> TotalEq for &'a str {}
822
823 impl TotalEq for ~str {}
824
825 impl<'a> Ord for &'a str {
826 #[inline]
827 fn lt(&self, other: & &'a str) -> bool { self.cmp(other) == Less }
828 }
829
830 impl Ord for ~str {
831 #[inline]
832 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
833 }
834
835 impl<'a, S: Str> Equiv<S> for &'a str {
836 #[inline]
837 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
838 }
839
840 impl<'a, S: Str> Equiv<S> for ~str {
841 #[inline]
842 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
843 }
844 }
845
846 #[cfg(test)]
847 pub mod traits {}
848
849 /// Any string that can be represented as a slice
850 pub trait Str {
851 /// Work with `self` as a slice.
852 fn as_slice<'a>(&'a self) -> &'a str;
853 }
854
855 impl<'a> Str for &'a str {
856 #[inline]
857 fn as_slice<'a>(&'a self) -> &'a str { *self }
858 }
859
860 impl<'a> Str for ~str {
861 #[inline]
862 fn as_slice<'a>(&'a self) -> &'a str { let s: &'a str = *self; s }
863 }
864
865 impl<'a> Container for &'a str {
866 #[inline]
867 fn len(&self) -> uint {
868 self.repr().len
869 }
870 }
871
872 impl Container for ~str {
873 #[inline]
874 fn len(&self) -> uint { self.as_slice().len() }
875 }
876
877 /// Methods for string slices
878 pub trait StrSlice<'a> {
879 /// Returns true if one string contains another
880 ///
881 /// # Arguments
882 ///
883 /// - needle - The string to look for
884 fn contains<'a>(&self, needle: &'a str) -> bool;
885
886 /// Returns true if a string contains a char.
887 ///
888 /// # Arguments
889 ///
890 /// - needle - The char to look for
891 fn contains_char(&self, needle: char) -> bool;
892
893 /// An iterator over the characters of `self`. Note, this iterates
894 /// over unicode code-points, not unicode graphemes.
895 ///
896 /// # Example
897 ///
898 /// ```rust
899 /// let v: Vec<char> = "abc åäö".chars().collect();
900 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
901 /// ```
902 fn chars(&self) -> Chars<'a>;
903
904 /// Do not use this - it is deprecated.
905 #[deprecated = "replaced by .chars().rev()"]
906 fn chars_rev(&self) -> Rev<Chars<'a>>;
907
908 /// An iterator over the bytes of `self`
909 fn bytes(&self) -> Bytes<'a>;
910
911 /// Do not use this - it is deprecated.
912 #[deprecated = "replaced by .bytes().rev()"]
913 fn bytes_rev(&self) -> Rev<Bytes<'a>>;
914
915 /// An iterator over the characters of `self` and their byte offsets.
916 fn char_indices(&self) -> CharOffsets<'a>;
917
918 /// Do not use this - it is deprecated.
919 #[deprecated = "replaced by .char_indices().rev()"]
920 fn char_indices_rev(&self) -> Rev<CharOffsets<'a>>;
921
922 /// An iterator over substrings of `self`, separated by characters
923 /// matched by `sep`.
924 ///
925 /// # Example
926 ///
927 /// ```rust
928 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
929 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
930 ///
931 /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).collect();
932 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
933 ///
934 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
935 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
936 /// ```
937 fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
938
939 /// An iterator over substrings of `self`, separated by characters
940 /// matched by `sep`, restricted to splitting at most `count`
941 /// times.
942 ///
943 /// # Example
944 ///
945 /// ```rust
946 /// let v: Vec<&str> = "Mary had a little lambda".splitn(' ', 2).collect();
947 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
948 ///
949 /// let v: Vec<&str> = "abc1def2ghi".splitn(|c: char| c.is_digit(), 1).collect();
950 /// assert_eq!(v, vec!["abc", "def2ghi"]);
951 ///
952 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn('X', 2).collect();
953 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
954 /// ```
955 fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
956
957 /// An iterator over substrings of `self`, separated by characters
958 /// matched by `sep`.
959 ///
960 /// Equivalent to `split`, except that the trailing substring
961 /// is skipped if empty (terminator semantics).
962 ///
963 /// # Example
964 ///
965 /// ```rust
966 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
967 /// assert_eq!(v, vec!["A", "B"]);
968 ///
969 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
970 /// assert_eq!(v, vec!["A", "", "B", ""]);
971 ///
972 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
973 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
974 ///
975 /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).rev().collect();
976 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
977 ///
978 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
979 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
980 /// ```
981 fn split_terminator<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
982
983 /// Do not use this - it is deprecated.
984 #[deprecated = "replaced by .split(sep).rev()"]
985 fn rsplit<Sep: CharEq>(&self, sep: Sep) -> Rev<CharSplits<'a, Sep>>;
986
987 /// An iterator over substrings of `self`, separated by characters
988 /// matched by `sep`, starting from the end of the string.
989 /// Restricted to splitting at most `count` times.
990 ///
991 /// # Example
992 ///
993 /// ```rust
994 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(' ', 2).collect();
995 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
996 ///
997 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(|c: char| c.is_digit(), 1).collect();
998 /// assert_eq!(v, vec!["ghi", "abc1def"]);
999 ///
1000 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn('X', 2).collect();
1001 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
1002 /// ```
1003 fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
1004
1005 /// An iterator over the start and end indices of the disjoint
1006 /// matches of `sep` within `self`.
1007 ///
1008 /// That is, each returned value `(start, end)` satisfies
1009 /// `self.slice(start, end) == sep`. For matches of `sep` within
1010 /// `self` that overlap, only the indicies corresponding to the
1011 /// first match are returned.
1012 ///
1013 /// # Example
1014 ///
1015 /// ```rust
1016 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
1017 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
1018 ///
1019 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
1020 /// assert_eq!(v, vec![(1,4), (4,7)]);
1021 ///
1022 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
1023 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
1024 /// ```
1025 fn match_indices(&self, sep: &'a str) -> MatchIndices<'a>;
1026
1027 /// An iterator over the substrings of `self` separated by `sep`.
1028 ///
1029 /// # Example
1030 ///
1031 /// ```rust
1032 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
1033 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
1034 ///
1035 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
1036 /// assert_eq!(v, vec!["1", "", "2"]);
1037 /// ```
1038 fn split_str(&self, &'a str) -> StrSplits<'a>;
1039
1040 /// An iterator over the lines of a string (subsequences separated
1041 /// by `\n`). This does not include the empty string after a
1042 /// trailing `\n`.
1043 ///
1044 /// # Example
1045 ///
1046 /// ```rust
1047 /// let four_lines = "foo\nbar\n\nbaz\n";
1048 /// let v: Vec<&str> = four_lines.lines().collect();
1049 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1050 /// ```
1051 fn lines(&self) -> CharSplits<'a, char>;
1052
1053 /// An iterator over the lines of a string, separated by either
1054 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
1055 /// empty trailing line.
1056 ///
1057 /// # Example
1058 ///
1059 /// ```rust
1060 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
1061 /// let v: Vec<&str> = four_lines.lines_any().collect();
1062 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1063 /// ```
1064 fn lines_any(&self) -> AnyLines<'a>;
1065
1066 /// An iterator over the words of a string (subsequences separated
1067 /// by any sequence of whitespace). Sequences of whitespace are
1068 /// collapsed, so empty "words" are not included.
1069 ///
1070 /// # Example
1071 ///
1072 /// ```rust
1073 /// let some_words = " Mary had\ta little \n\t lamb";
1074 /// let v: Vec<&str> = some_words.words().collect();
1075 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1076 /// ```
1077 fn words(&self) -> Words<'a>;
1078
1079 /// Returns true if the string contains only whitespace.
1080 ///
1081 /// Whitespace characters are determined by `char::is_whitespace`.
1082 ///
1083 /// # Example
1084 ///
1085 /// ```rust
1086 /// assert!(" \t\n".is_whitespace());
1087 /// assert!("".is_whitespace());
1088 ///
1089 /// assert!( !"abc".is_whitespace());
1090 /// ```
1091 fn is_whitespace(&self) -> bool;
1092
1093 /// Returns true if the string contains only alphanumeric code
1094 /// points.
1095 ///
1096 /// Alphanumeric characters are determined by `char::is_alphanumeric`.
1097 ///
1098 /// # Example
1099 ///
1100 /// ```rust
1101 /// assert!("LöweèèLéopard123".is_alphanumeric());
1102 /// assert!("".is_alphanumeric());
1103 ///
1104 /// assert!( !" &*~".is_alphanumeric());
1105 /// ```
1106 fn is_alphanumeric(&self) -> bool;
1107
1108 /// Returns the number of Unicode code points (`char`) that a
1109 /// string holds.
1110 ///
1111 /// This does not perform any normalization, and is `O(n)`, since
1112 /// UTF-8 is a variable width encoding of code points.
1113 ///
1114 /// *Warning*: The number of code points in a string does not directly
1115 /// correspond to the number of visible characters or width of the
1116 /// visible text due to composing characters, and double- and
1117 /// zero-width ones.
1118 ///
1119 /// See also `.len()` for the byte length.
1120 ///
1121 /// # Example
1122 ///
1123 /// ```rust
1124 /// // composed forms of `ö` and `é`
1125 /// let c = "Löwe èè Léopard"; // German, Simplified Chinese, French
1126 /// // decomposed forms of `ö` and `é`
1127 /// let d = "Lo\u0308we èè Le\u0301opard";
1128 ///
1129 /// assert_eq!(c.char_len(), 15);
1130 /// assert_eq!(d.char_len(), 17);
1131 ///
1132 /// assert_eq!(c.len(), 21);
1133 /// assert_eq!(d.len(), 23);
1134 ///
1135 /// // the two strings *look* the same
1136 /// println!("{}", c);
1137 /// println!("{}", d);
1138 /// ```
1139 fn char_len(&self) -> uint;
1140
1141 /// Returns a slice of the given string from the byte range
1142 /// [`begin`..`end`).
1143 ///
1144 /// This operation is `O(1)`.
1145 ///
1146 /// Fails when `begin` and `end` do not point to valid characters
1147 /// or point beyond the last character of the string.
1148 ///
1149 /// See also `slice_to` and `slice_from` for slicing prefixes and
1150 /// suffixes of strings, and `slice_chars` for slicing based on
1151 /// code point counts.
1152 ///
1153 /// # Example
1154 ///
1155 /// ```rust
1156 /// let s = "Löwe èè Léopard";
1157 /// assert_eq!(s.slice(0, 1), "L");
1158 ///
1159 /// assert_eq!(s.slice(1, 9), "öwe è");
1160 ///
1161 /// // these will fail:
1162 /// // byte 2 lies within `ö`:
1163 /// // s.slice(2, 3);
1164 ///
1165 /// // byte 8 lies within `è`
1166 /// // s.slice(1, 8);
1167 ///
1168 /// // byte 100 is outside the string
1169 /// // s.slice(3, 100);
1170 /// ```
1171 fn slice(&self, begin: uint, end: uint) -> &'a str;
1172
1173 /// Returns a slice of the string from `begin` to its end.
1174 ///
1175 /// Equivalent to `self.slice(begin, self.len())`.
1176 ///
1177 /// Fails when `begin` does not point to a valid character, or is
1178 /// out of bounds.
1179 ///
1180 /// See also `slice`, `slice_to` and `slice_chars`.
1181 fn slice_from(&self, begin: uint) -> &'a str;
1182
1183 /// Returns a slice of the string from the beginning to byte
1184 /// `end`.
1185 ///
1186 /// Equivalent to `self.slice(0, end)`.
1187 ///
1188 /// Fails when `end` does not point to a valid character, or is
1189 /// out of bounds.
1190 ///
1191 /// See also `slice`, `slice_from` and `slice_chars`.
1192 fn slice_to(&self, end: uint) -> &'a str;
1193
1194 /// Returns a slice of the string from the character range
1195 /// [`begin`..`end`).
1196 ///
1197 /// That is, start at the `begin`-th code point of the string and
1198 /// continue to the `end`-th code point. This does not detect or
1199 /// handle edge cases such as leaving a combining character as the
1200 /// first code point of the string.
1201 ///
1202 /// Due to the design of UTF-8, this operation is `O(end)`.
1203 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
1204 /// variants that use byte indices rather than code point
1205 /// indices.
1206 ///
1207 /// Fails if `begin` > `end` or the either `begin` or `end` are
1208 /// beyond the last character of the string.
1209 ///
1210 /// # Example
1211 ///
1212 /// ```rust
1213 /// let s = "Löwe èè Léopard";
1214 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
1215 /// assert_eq!(s.slice_chars(5, 7), "èè");
1216 /// ```
1217 fn slice_chars(&self, begin: uint, end: uint) -> &'a str;
1218
1219 /// Returns true if `needle` is a prefix of the string.
1220 fn starts_with(&self, needle: &str) -> bool;
1221
1222 /// Returns true if `needle` is a suffix of the string.
1223 fn ends_with(&self, needle: &str) -> bool;
1224
1225 /// Returns a string with leading and trailing whitespace removed.
1226 fn trim(&self) -> &'a str;
1227
1228 /// Returns a string with leading whitespace removed.
1229 fn trim_left(&self) -> &'a str;
1230
1231 /// Returns a string with trailing whitespace removed.
1232 fn trim_right(&self) -> &'a str;
1233
1234 /// Returns a string with characters that match `to_trim` removed.
1235 ///
1236 /// # Arguments
1237 ///
1238 /// * to_trim - a character matcher
1239 ///
1240 /// # Example
1241 ///
1242 /// ```rust
1243 /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar")
1244 /// assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar")
1245 /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar")
1246 /// ```
1247 fn trim_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
1248
1249 /// Returns a string with leading `chars_to_trim` removed.
1250 ///
1251 /// # Arguments
1252 ///
1253 /// * to_trim - a character matcher
1254 ///
1255 /// # Example
1256 ///
1257 /// ```rust
1258 /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11")
1259 /// assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12")
1260 /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123")
1261 /// ```
1262 fn trim_left_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
1263
1264 /// Returns a string with trailing `chars_to_trim` removed.
1265 ///
1266 /// # Arguments
1267 ///
1268 /// * to_trim - a character matcher
1269 ///
1270 /// # Example
1271 ///
1272 /// ```rust
1273 /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar")
1274 /// assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar")
1275 /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar")
1276 /// ```
1277 fn trim_right_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
1278
1279 /// Check that `index`-th byte lies at the start and/or end of a
1280 /// UTF-8 code point sequence.
1281 ///
1282 /// The start and end of the string (when `index == self.len()`)
1283 /// are considered to be boundaries.
1284 ///
1285 /// Fails if `index` is greater than `self.len()`.
1286 ///
1287 /// # Example
1288 ///
1289 /// ```rust
1290 /// let s = "Löwe èè Léopard";
1291 /// assert!(s.is_char_boundary(0));
1292 /// // start of `è`
1293 /// assert!(s.is_char_boundary(6));
1294 /// assert!(s.is_char_boundary(s.len()));
1295 ///
1296 /// // second byte of `ö`
1297 /// assert!(!s.is_char_boundary(2));
1298 ///
1299 /// // third byte of `è`
1300 /// assert!(!s.is_char_boundary(8));
1301 /// ```
1302 fn is_char_boundary(&self, index: uint) -> bool;
1303
1304 /// Pluck a character out of a string and return the index of the next
1305 /// character.
1306 ///
1307 /// This function can be used to iterate over the unicode characters of a
1308 /// string.
1309 ///
1310 /// # Example
1311 ///
1312 /// This example manually iterate through the characters of a
1313 /// string; this should normally by done by `.chars()` or
1314 /// `.char_indices`.
1315 ///
1316 /// ```rust
1317 /// use std::str::CharRange;
1318 ///
1319 /// let s = "ä¸åViá»t Nam";
1320 /// let mut i = 0u;
1321 /// while i < s.len() {
1322 /// let CharRange {ch, next} = s.char_range_at(i);
1323 /// println!("{}: {}", i, ch);
1324 /// i = next;
1325 /// }
1326 /// ```
1327 ///
1328 /// ## Output
1329 ///
1330 /// ```ignore
1331 /// 0: ä¸
1332 /// 3: å
1333 /// 6: V
1334 /// 7: i
1335 /// 8: á»
1336 /// 11: t
1337 /// 12:
1338 /// 13: N
1339 /// 14: a
1340 /// 15: m
1341 /// ```
1342 ///
1343 /// # Arguments
1344 ///
1345 /// * s - The string
1346 /// * i - The byte offset of the char to extract
1347 ///
1348 /// # Return value
1349 ///
1350 /// A record {ch: char, next: uint} containing the char value and the byte
1351 /// index of the next unicode character.
1352 ///
1353 /// # Failure
1354 ///
1355 /// If `i` is greater than or equal to the length of the string.
1356 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1357 fn char_range_at(&self, start: uint) -> CharRange;
1358
1359 /// Given a byte position and a str, return the previous char and its position.
1360 ///
1361 /// This function can be used to iterate over a unicode string in reverse.
1362 ///
1363 /// Returns 0 for next index if called on start index 0.
1364 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1365
1366 /// Plucks the character starting at the `i`th byte of a string
1367 fn char_at(&self, i: uint) -> char;
1368
1369 /// Plucks the character ending at the `i`th byte of a string
1370 fn char_at_reverse(&self, i: uint) -> char;
1371
1372 /// Work with the byte buffer of a string as a byte slice.
1373 fn as_bytes(&self) -> &'a [u8];
1374
1375 /// Returns the byte index of the first character of `self` that
1376 /// matches `search`.
1377 ///
1378 /// # Return value
1379 ///
1380 /// `Some` containing the byte index of the last matching character
1381 /// or `None` if there is no match
1382 ///
1383 /// # Example
1384 ///
1385 /// ```rust
1386 /// let s = "Löwe èè Léopard";
1387 ///
1388 /// assert_eq!(s.find('L'), Some(0));
1389 /// assert_eq!(s.find('é'), Some(14));
1390 ///
1391 /// // the first space
1392 /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
1393 ///
1394 /// // neither are found
1395 /// assert_eq!(s.find(&['1', '2']), None);
1396 /// ```
1397 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1398
1399 /// Returns the byte index of the last character of `self` that
1400 /// matches `search`.
1401 ///
1402 /// # Return value
1403 ///
1404 /// `Some` containing the byte index of the last matching character
1405 /// or `None` if there is no match.
1406 ///
1407 /// # Example
1408 ///
1409 /// ```rust
1410 /// let s = "Löwe èè Léopard";
1411 ///
1412 /// assert_eq!(s.rfind('L'), Some(13));
1413 /// assert_eq!(s.rfind('é'), Some(14));
1414 ///
1415 /// // the second space
1416 /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
1417 ///
1418 /// // searches for an occurrence of either `1` or `2`, but neither are found
1419 /// assert_eq!(s.rfind(&['1', '2']), None);
1420 /// ```
1421 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1422
1423 /// Returns the byte index of the first matching substring
1424 ///
1425 /// # Arguments
1426 ///
1427 /// * `needle` - The string to search for
1428 ///
1429 /// # Return value
1430 ///
1431 /// `Some` containing the byte index of the first matching substring
1432 /// or `None` if there is no match.
1433 ///
1434 /// # Example
1435 ///
1436 /// ```rust
1437 /// let s = "Löwe èè Léopard";
1438 ///
1439 /// assert_eq!(s.find_str("èè L"), Some(6));
1440 /// assert_eq!(s.find_str("muffin man"), None);
1441 /// ```
1442 fn find_str(&self, &str) -> Option<uint>;
1443
1444 /// Retrieves the first character from a string slice and returns
1445 /// it. This does not allocate a new string; instead, it returns a
1446 /// slice that point one character beyond the character that was
1447 /// shifted. If the string does not contain any characters,
1448 /// a tuple of None and an empty string is returned instead.
1449 ///
1450 /// # Example
1451 ///
1452 /// ```rust
1453 /// let s = "Löwe èè Léopard";
1454 /// let (c, s1) = s.slice_shift_char();
1455 /// assert_eq!(c, Some('L'));
1456 /// assert_eq!(s1, "öwe èè Léopard");
1457 ///
1458 /// let (c, s2) = s1.slice_shift_char();
1459 /// assert_eq!(c, Some('ö'));
1460 /// assert_eq!(s2, "we èè Léopard");
1461 /// ```
1462 fn slice_shift_char(&self) -> (Option<char>, &'a str);
1463
1464 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1465 ///
1466 /// Fails if `inner` is not a direct slice contained within self.
1467 ///
1468 /// # Example
1469 ///
1470 /// ```rust
1471 /// let string = "a\nb\nc";
1472 /// let lines: Vec<&str> = string.lines().collect();
1473 /// let lines = lines.as_slice();
1474 ///
1475 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1476 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1477 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1478 /// ```
1479 fn subslice_offset(&self, inner: &str) -> uint;
1480
1481 /// Return an unsafe pointer to the strings buffer.
1482 ///
1483 /// The caller must ensure that the string outlives this pointer,
1484 /// and that it is not reallocated (e.g. by pushing to the
1485 /// string).
1486 fn as_ptr(&self) -> *u8;
1487 }
1488
1489 impl<'a> StrSlice<'a> for &'a str {
1490 #[inline]
1491 fn contains<'a>(&self, needle: &'a str) -> bool {
1492 self.find_str(needle).is_some()
1493 }
1494
1495 #[inline]
1496 fn contains_char(&self, needle: char) -> bool {
1497 self.find(needle).is_some()
1498 }
1499
1500 #[inline]
1501 fn chars(&self) -> Chars<'a> {
1502 Chars{string: *self}
1503 }
1504
1505 #[inline]
1506 #[deprecated = "replaced by .chars().rev()"]
1507 fn chars_rev(&self) -> RevChars<'a> {
1508 self.chars().rev()
1509 }
1510
1511 #[inline]
1512 fn bytes(&self) -> Bytes<'a> {
1513 self.as_bytes().iter().map(|&b| b)
1514 }
1515
1516 #[inline]
1517 #[deprecated = "replaced by .bytes().rev()"]
1518 fn bytes_rev(&self) -> RevBytes<'a> {
1519 self.bytes().rev()
1520 }
1521
1522 #[inline]
1523 fn char_indices(&self) -> CharOffsets<'a> {
1524 CharOffsets{string: *self, iter: self.chars()}
1525 }
1526
1527 #[inline]
1528 #[deprecated = "replaced by .char_indices().rev()"]
1529 fn char_indices_rev(&self) -> RevCharOffsets<'a> {
1530 self.char_indices().rev()
1531 }
1532
1533 #[inline]
1534 fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep> {
1535 CharSplits {
1536 string: *self,
1537 only_ascii: sep.only_ascii(),
1538 sep: sep,
1539 allow_trailing_empty: true,
1540 finished: false,
1541 }
1542 }
1543
1544 #[inline]
1545 fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint)
1546 -> CharSplitsN<'a, Sep> {
1547 CharSplitsN {
1548 iter: self.split(sep),
1549 count: count,
1550 invert: false,
1551 }
1552 }
1553
1554 #[inline]
1555 fn split_terminator<Sep: CharEq>(&self, sep: Sep)
1556 -> CharSplits<'a, Sep> {
1557 CharSplits {
1558 allow_trailing_empty: false,
1559 ..self.split(sep)
1560 }
1561 }
1562
1563 #[inline]
1564 #[deprecated = "replaced by .split(sep).rev()"]
1565 fn rsplit<Sep: CharEq>(&self, sep: Sep) -> RevCharSplits<'a, Sep> {
1566 self.split(sep).rev()
1567 }
1568
1569 #[inline]
1570 fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint)
1571 -> CharSplitsN<'a, Sep> {
1572 CharSplitsN {
1573 iter: self.split(sep),
1574 count: count,
1575 invert: true,
1576 }
1577 }
1578
1579 #[inline]
1580 fn match_indices(&self, sep: &'a str) -> MatchIndices<'a> {
1581 assert!(!sep.is_empty())
1582 MatchIndices {
1583 haystack: *self,
1584 needle: sep,
1585 position: 0
1586 }
1587 }
1588
1589 #[inline]
1590 fn split_str(&self, sep: &'a str) -> StrSplits<'a> {
1591 StrSplits {
1592 it: self.match_indices(sep),
1593 last_end: 0,
1594 finished: false
1595 }
1596 }
1597
1598 #[inline]
1599 fn lines(&self) -> CharSplits<'a, char> {
1600 self.split_terminator('\n')
1601 }
1602
1603 fn lines_any(&self) -> AnyLines<'a> {
1604 self.lines().map(|line| {
1605 let l = line.len();
1606 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1607 else { line }
1608 })
1609 }
1610
1611 #[inline]
1612 fn words(&self) -> Words<'a> {
1613 self.split(char::is_whitespace).filter(|s| !s.is_empty())
1614 }
1615
1616 #[inline]
1617 fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
1618
1619 #[inline]
1620 fn is_alphanumeric(&self) -> bool { self.chars().all(char::is_alphanumeric) }
1621
1622 #[inline]
1623 fn char_len(&self) -> uint { self.chars().len() }
1624
1625 #[inline]
1626 fn slice(&self, begin: uint, end: uint) -> &'a str {
1627 assert!(self.is_char_boundary(begin) && self.is_char_boundary(end));
1628 unsafe { raw::slice_bytes(*self, begin, end) }
1629 }
1630
1631 #[inline]
1632 fn slice_from(&self, begin: uint) -> &'a str {
1633 self.slice(begin, self.len())
1634 }
1635
1636 #[inline]
1637 fn slice_to(&self, end: uint) -> &'a str {
1638 assert!(self.is_char_boundary(end));
1639 unsafe { raw::slice_bytes(*self, 0, end) }
1640 }
1641
1642 fn slice_chars(&self, begin: uint, end: uint) -> &'a str {
1643 assert!(begin <= end);
1644 let mut count = 0;
1645 let mut begin_byte = None;
1646 let mut end_byte = None;
1647
1648 // This could be even more efficient by not decoding,
1649 // only finding the char boundaries
1650 for (idx, _) in self.char_indices() {
1651 if count == begin { begin_byte = Some(idx); }
1652 if count == end { end_byte = Some(idx); break; }
1653 count += 1;
1654 }
1655 if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
1656 if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
1657
1658 match (begin_byte, end_byte) {
1659 (None, _) => fail!("slice_chars: `begin` is beyond end of string"),
1660 (_, None) => fail!("slice_chars: `end` is beyond end of string"),
1661 (Some(a), Some(b)) => unsafe { raw::slice_bytes(*self, a, b) }
1662 }
1663 }
1664
1665 #[inline]
1666 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1667 let n = needle.len();
1668 self.len() >= n && needle.as_bytes() == self.as_bytes().slice_to(n)
1669 }
1670
1671 #[inline]
1672 fn ends_with(&self, needle: &str) -> bool {
1673 let (m, n) = (self.len(), needle.len());
1674 m >= n && needle.as_bytes() == self.as_bytes().slice_from(m - n)
1675 }
1676
1677 #[inline]
1678 fn trim(&self) -> &'a str {
1679 self.trim_left().trim_right()
1680 }
1681
1682 #[inline]
1683 fn trim_left(&self) -> &'a str {
1684 self.trim_left_chars(char::is_whitespace)
1685 }
1686
1687 #[inline]
1688 fn trim_right(&self) -> &'a str {
1689 self.trim_right_chars(char::is_whitespace)
1690 }
1691
1692 #[inline]
1693 fn trim_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1694 let cur = match self.find(|c: char| !to_trim.matches(c)) {
1695 None => "",
1696 Some(i) => unsafe { raw::slice_bytes(*self, i, self.len()) }
1697 };
1698 match cur.rfind(|c: char| !to_trim.matches(c)) {
1699 None => "",
1700 Some(i) => {
1701 let right = cur.char_range_at(i).next;
1702 unsafe { raw::slice_bytes(cur, 0, right) }
1703 }
1704 }
1705 }
1706
1707 #[inline]
1708 fn trim_left_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1709 match self.find(|c: char| !to_trim.matches(c)) {
1710 None => "",
1711 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1712 }
1713 }
1714
1715 #[inline]
1716 fn trim_right_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1717 match self.rfind(|c: char| !to_trim.matches(c)) {
1718 None => "",
1719 Some(last) => {
1720 let next = self.char_range_at(last).next;
1721 unsafe { raw::slice_bytes(*self, 0u, next) }
1722 }
1723 }
1724 }
1725
1726 #[inline]
1727 fn is_char_boundary(&self, index: uint) -> bool {
1728 if index == self.len() { return true; }
1729 let b = self[index];
1730 return b < 128u8 || b >= 192u8;
1731 }
1732
1733 #[inline]
1734 fn char_range_at(&self, i: uint) -> CharRange {
1735 if self[i] < 128u8 {
1736 return CharRange {ch: self[i] as char, next: i + 1 };
1737 }
1738
1739 // Multibyte case is a fn to allow char_range_at to inline cleanly
1740 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1741 let mut val = s[i] as u32;
1742 let w = UTF8_CHAR_WIDTH[val as uint] as uint;
1743 assert!((w != 0));
1744
1745 val = utf8_first_byte!(val, w);
1746 val = utf8_acc_cont_byte!(val, s[i + 1]);
1747 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1748 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1749
1750 return CharRange {ch: unsafe { transmute(val) }, next: i + w};
1751 }
1752
1753 return multibyte_char_range_at(*self, i);
1754 }
1755
1756 #[inline]
1757 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1758 let mut prev = start;
1759
1760 prev = prev.saturating_sub(1);
1761 if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
1762
1763 // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
1764 fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
1765 // while there is a previous byte == 10......
1766 while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
1767 i -= 1u;
1768 }
1769
1770 let mut val = s[i] as u32;
1771 let w = UTF8_CHAR_WIDTH[val as uint] as uint;
1772 assert!((w != 0));
1773
1774 val = utf8_first_byte!(val, w);
1775 val = utf8_acc_cont_byte!(val, s[i + 1]);
1776 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1777 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1778
1779 return CharRange {ch: unsafe { transmute(val) }, next: i};
1780 }
1781
1782 return multibyte_char_range_at_reverse(*self, prev);
1783 }
1784
1785 #[inline]
1786 fn char_at(&self, i: uint) -> char {
1787 self.char_range_at(i).ch
1788 }
1789
1790 #[inline]
1791 fn char_at_reverse(&self, i: uint) -> char {
1792 self.char_range_at_reverse(i).ch
1793 }
1794
1795 #[inline]
1796 fn as_bytes(&self) -> &'a [u8] {
1797 unsafe { cast::transmute(*self) }
1798 }
1799
1800 fn find<C: CharEq>(&self, mut search: C) -> Option<uint> {
1801 if search.only_ascii() {
1802 self.bytes().position(|b| search.matches(b as char))
1803 } else {
1804 for (index, c) in self.char_indices() {
1805 if search.matches(c) { return Some(index); }
1806 }
1807 None
1808 }
1809 }
1810
1811 fn rfind<C: CharEq>(&self, mut search: C) -> Option<uint> {
1812 if search.only_ascii() {
1813 self.bytes().rposition(|b| search.matches(b as char))
1814 } else {
1815 for (index, c) in self.char_indices().rev() {
1816 if search.matches(c) { return Some(index); }
1817 }
1818 None
1819 }
1820 }
1821
1822 fn find_str(&self, needle: &str) -> Option<uint> {
1823 if needle.is_empty() {
1824 Some(0)
1825 } else {
1826 self.match_indices(needle)
1827 .next()
1828 .map(|(start, _end)| start)
1829 }
1830 }
1831
1832 #[inline]
1833 fn slice_shift_char(&self) -> (Option<char>, &'a str) {
1834 if self.is_empty() {
1835 return (None, *self);
1836 } else {
1837 let CharRange {ch, next} = self.char_range_at(0u);
1838 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1839 return (Some(ch), next_s);
1840 }
1841 }
1842
1843 fn subslice_offset(&self, inner: &str) -> uint {
1844 let a_start = self.as_ptr() as uint;
1845 let a_end = a_start + self.len();
1846 let b_start = inner.as_ptr() as uint;
1847 let b_end = b_start + inner.len();
1848
1849 assert!(a_start <= b_start);
1850 assert!(b_end <= a_end);
1851 b_start - a_start
1852 }
1853
1854 #[inline]
1855 fn as_ptr(&self) -> *u8 {
1856 self.repr().data
1857 }
1858 }
1859
1860 impl<'a> Default for &'a str {
1861 fn default() -> &'a str { "" }
1862 }
libcore/str.rs:322:19-322:19 -struct- definition:
pub struct MatchIndices<'a> {
haystack: &'a str,
needle: &'a str,
references:- 9321: /// substring within a larger string
323: pub struct MatchIndices<'a> {
--
1581: assert!(!sep.is_empty())
1582: MatchIndices {
1583: haystack: *self,
libcore/str.rs:764:4-764:4 -fn- definition:
pub unsafe fn slice_unchecked<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
cast::transmute(Slice {
data: s.as_ptr().offset(begin as int),
references:- 7110: unsafe {
111: self.string = raw::slice_unchecked(self.string, next, self.string.len());
112: }
--
130: unsafe {
131: self.string = raw::slice_unchecked(self.string, 0, next);
132: }
--
257: let elt = raw::slice_unchecked(self.string, 0, a);
258: self.string = raw::slice_unchecked(self.string, b, self.string.len());
259: Some(elt)
--
754: assert!(end <= s.len());
755: slice_unchecked(s, begin, end)
756: }
libcore/str.rs:849:50-849:50 -trait- definition:
/// Any string that can be represented as a slice
pub trait Str {
/// Work with `self` as a slice.
references:- 4855: impl<'a> Str for &'a str {
856: #[inline]
--
860: impl<'a> Str for ~str {
861: #[inline]
libcore/str.rs:730:4-730:4 -fn- definition:
pub unsafe fn c_str_to_static_slice(s: *i8) -> &'static str {
let s = s as *u8;
let mut curr = s;
references:- 2libcore/failure.rs:
33: let expr = c_str_to_static_slice(expr as *i8);
34: let file = c_str_to_static_slice(file as *i8);
35: begin_unwind(expr, file, line)
libcore/str.rs:513:58-513:58 -fn- definition:
/// Determines if a vector of bytes contains valid UTF-8.
pub fn is_utf8(v: &[u8]) -> bool {
run_utf8_validation_iterator(&mut v.iter())
references:- 240: pub fn from_utf8<'a>(v: &'a [u8]) -> Option<&'a str> {
41: if is_utf8(v) {
42: Some(unsafe { raw::from_utf8(v) })
--
738: let v = Slice { data: s, len: len };
739: assert!(is_utf8(::cast::transmute(v)));
740: ::cast::transmute(v)
libcore/str.rs:408:10-408:10 -fn- definition:
pub fn eq_slice(a: &str, b: &str) -> bool {
eq_slice_(a, b)
}
references:- 5807: fn eq(&self, other: & &'a str) -> bool {
808: eq_slice((*self), (*other))
809: }
--
836: #[inline]
837: fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
838: }
--
841: #[inline]
842: fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
843: }
libcore/str.rs:689:52-689:52 -struct- definition:
/// for iterating over the UTF-8 bytes of a string.
pub struct CharRange {
/// Current `char`
references:- 131779: return CharRange {ch: unsafe { transmute(val) }, next: i};
1780: }
--
1836: } else {
1837: let CharRange {ch, next} = self.char_range_at(0u);
1838: let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
libcore/str.rs:218:82-218:82 -NK_AS_STR_TODO- definition:
/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
pub type AnyLines<'a> =
Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
references:- 21603: fn lines_any(&self) -> AnyLines<'a> {
1604: self.lines().map(|line| {
libcore/str.rs:331:19-331:19 -struct- definition:
pub struct StrSplits<'a> {
it: MatchIndices<'a>,
last_end: uint,
references:- 8330: /// search string
332: pub struct StrSplits<'a> {
--
369: impl<'a> Iterator<&'a str> for StrSplits<'a> {
370: #[inline]
--
1037: /// ```
1038: fn split_str(&self, &'a str) -> StrSplits<'a>;
--
1589: #[inline]
1590: fn split_str(&self, sep: &'a str) -> StrSplits<'a> {
1591: StrSplits {
1592: it: self.match_indices(sep),
libcore/str.rs:182:37-182:37 -NK_AS_STR_TODO- definition:
/// Use with the `std::iter` module.
pub type Bytes<'a> =
Map<'a, &'a u8, u8, slice::Items<'a, u8>>;
references:- 4912: #[deprecated = "replaced by .bytes().rev()"]
913: fn bytes_rev(&self) -> Rev<Bytes<'a>>;
--
1511: #[inline]
1512: fn bytes(&self) -> Bytes<'a> {
1513: self.as_bytes().iter().map(|&b| b)
libcore/str.rs:546:32-546:32 -enum- definition:
pub enum UTF16Item {
/// A valid codepoint.
ScalarValue(char),
references:- 9566: impl<'a> Iterator<UTF16Item> for UTF16Items<'a> {
567: fn next(&mut self) -> Option<UTF16Item> {
568: let u = match self.iter.next() {
libcore/str.rs:46:62-46:62 -trait- definition:
/// Something that can be used to compare against a character
pub trait CharEq {
/// Determine if the splitter should split at the given character
references:- 271420: /// ```
1421: fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
--
1707: #[inline]
1708: fn trim_left_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1709: match self.find(|c: char| !to_trim.matches(c)) {
--
1800: fn find<C: CharEq>(&self, mut search: C) -> Option<uint> {
1801: if search.only_ascii() {
--
1811: fn rfind<C: CharEq>(&self, mut search: C) -> Option<uint> {
1812: if search.only_ascii() {
libcore/str.rs:142:19-142:19 -struct- definition:
pub struct CharOffsets<'a> {
/// The original string to be iterated
string: &'a str,
references:- 11141: /// Use with the `std::iter` module.
143: pub struct CharOffsets<'a> {
--
1523: fn char_indices(&self) -> CharOffsets<'a> {
1524: CharOffsets{string: *self, iter: self.chars()}
1525: }
libcore/str.rs:97:19-97:19 -struct- definition:
pub struct Chars<'a> {
/// The slice remaining to be iterated
string: &'a str,
references:- 1296: /// Use with the `std::iter` module.
98: pub struct Chars<'a> {
--
145: string: &'a str,
146: iter: Chars<'a>,
147: }
--
1500: #[inline]
1501: fn chars(&self) -> Chars<'a> {
1502: Chars{string: *self}
1503: }
libcore/str.rs:206:19-206:19 -struct- definition:
pub struct CharSplitsN<'a, Sep> {
iter: CharSplits<'a, Sep>,
/// The number of splits remaining
references:- 11205: /// splitting at most `count` times.
207: pub struct CharSplitsN<'a, Sep> {
--
1571: -> CharSplitsN<'a, Sep> {
1572: CharSplitsN {
1573: iter: self.split(sep),
libcore/str.rs:190:19-190:19 -struct- definition:
pub struct CharSplits<'a, Sep> {
/// The slice remaining to be iterated
string: &'a str,
references:- 20189: /// An iterator over the substrings of a string, separated by `sep`.
191: pub struct CharSplits<'a, Sep> {
--
1534: fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep> {
1535: CharSplits {
1536: string: *self,
--
1556: -> CharSplits<'a, Sep> {
1557: CharSplits {
1558: allow_trailing_empty: false,
--
1598: #[inline]
1599: fn lines(&self) -> CharSplits<'a, char> {
1600: self.split_terminator('\n')
libcore/str.rs:752:4-752:4 -fn- definition:
pub unsafe fn slice_bytes<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
assert!(begin <= end);
assert!(end <= s.len());
references:- 81627: assert!(self.is_char_boundary(begin) && self.is_char_boundary(end));
1628: unsafe { raw::slice_bytes(*self, begin, end) }
1629: }
--
1695: None => "",
1696: Some(i) => unsafe { raw::slice_bytes(*self, i, self.len()) }
1697: };
--
1701: let right = cur.char_range_at(i).next;
1702: unsafe { raw::slice_bytes(cur, 0, right) }
1703: }
--
1710: None => "",
1711: Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1712: }
--
1837: let CharRange {ch, next} = self.char_range_at(0u);
1838: let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1839: return (Some(ch), next_s);
libcore/str.rs:214:82-214:82 -NK_AS_STR_TODO- definition:
/// An iterator over the words of a string, separated by a sequence of whitespace
pub type Words<'a> =
Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
references:- 21076: /// ```
1077: fn words(&self) -> Words<'a>;
--
1611: #[inline]
1612: fn words(&self) -> Words<'a> {
1613: self.split(char::is_whitespace).filter(|s| !s.is_empty())
libcore/str.rs:541:19-541:19 -struct- definition:
pub struct UTF16Items<'a> {
iter: slice::Items<'a, u16>
}
references:- 7631: pub fn utf16_items<'a>(v: &'a [u16]) -> UTF16Items<'a> {
632: UTF16Items { iter : v.iter() }
633: }