(index<- ) ./libstd/str.rs
git branch: * master 5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
modified: Fri May 9 13:02:28 2014
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 /*!
12
13 Unicode string manipulation (`str` type)
14
15 # Basic Usage
16
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with its ownership. This means that
20 there are two common kinds of strings in Rust:
21
22 * `~str` - This is an owned string. This type obeys all of the normal semantics
23 of the `Box<T>` types, meaning that it has one, and only one,
24 owner. This type cannot be implicitly copied, and is moved out of
25 when passed to other functions.
26
27 * `&str` - This is the borrowed string type. This type of string can only be
28 created from the other kind of string. As the name "borrowed"
29 implies, this type of string is owned elsewhere, and this string
30 cannot be moved out of.
31
32 As an example, here's a few different kinds of strings.
33
34 ```rust
35 fn main() {
36 let owned_string = "I am an owned string".to_owned();
37 let borrowed_string1 = "This string is borrowed with the 'static lifetime";
38 let borrowed_string2: &str = owned_string; // owned strings can be borrowed
39 }
40 ```
41
42 From the example above, you can see that Rust has 2 different kinds of string
43 literals. The owned literals correspond to the owned string types, but the
44 "borrowed literal" is actually more akin to C's concept of a static string.
45
46 When a string is declared without a `~` sigil, then the string is allocated
47 statically in the rodata of the executable/library. The string then has the
48 type `&'static str` meaning that the string is valid for the `'static`
49 lifetime, otherwise known as the lifetime of the entire program. As can be
50 inferred from the type, these static strings are not mutable.
51
52 # Mutability
53
54 Many languages have immutable strings by default, and Rust has a particular
55 flavor on this idea. As with the rest of Rust types, strings are immutable by
56 default. If a string is declared as `mut`, however, it may be mutated. This
57 works the same way as the rest of Rust's type system in the sense that if
58 there's a mutable reference to a string, there may only be one mutable reference
59 to that string. With these guarantees, strings can easily transition between
60 being mutable/immutable with the same benefits of having mutable strings in
61 other languages.
62
63 # Representation
64
65 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
66 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
67 encoded UTF-8 sequences. Additionally, strings are not null-terminated
68 and can contain null codepoints.
69
70 The actual representation of strings have direct mappings to vectors:
71
72 * `~str` is the same as `~[u8]`
73 * `&str` is the same as `&[u8]`
74
75 */
76
77 use cast;
78 use cast::transmute;
79 use char;
80 use char::Char;
81 use clone::Clone;
82 use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
83 use container::Container;
84 use fmt;
85 use io::Writer;
86 use iter::{Iterator, range, AdditiveIterator};
87 use option::{None, Option, Some};
88 use from_str::FromStr;
89 use slice::{ImmutableVector, MutableVector, CloneableVector};
90 use slice::Vector;
91 use vec::Vec;
92 use default::Default;
93 use strbuf::StrBuf;
94
95 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets, RevChars};
96 pub use core::str::{RevCharOffsets, Bytes, RevBytes, CharSplits, RevCharSplits};
97 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
98 pub use core::str::{eq_slice, eq, is_utf8, is_utf16, UTF16Items};
99 pub use core::str::{UTF16Item, ScalarValue, LoneSurrogate, utf16_items};
100 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
101 pub use core::str::{Str, StrSlice};
102
103 /*
104 Section: Creating a string
105 */
106
107 /// Consumes a vector of bytes to create a new utf-8 string.
108 /// Returns None if the vector contains invalid UTF-8.
109 pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> {
110 if is_utf8(vv) {
111 Some(unsafe { raw::from_utf8_owned(vv) })
112 } else {
113 None
114 }
115 }
116
117 impl FromStr for ~str {
118 #[inline]
119 fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
120 }
121
122 /// Convert a byte to a UTF-8 string
123 ///
124 /// # Failure
125 ///
126 /// Fails if invalid UTF-8
127 pub fn from_byte(b: u8) -> ~str {
128 assert!(b < 128u8);
129 unsafe { ::cast::transmute(box [b]) }
130 }
131
132 /// Convert a char to a string
133 pub fn from_char(ch: char) -> ~str {
134 let mut buf = StrBuf::new();
135 buf.push_char(ch);
136 buf.into_owned()
137 }
138
139 /// Convert a vector of chars to a string
140 pub fn from_chars(chs: &[char]) -> ~str {
141 chs.iter().map(|c| *c).collect()
142 }
143
144 /// Methods for vectors of strings
145 pub trait StrVector {
146 /// Concatenate a vector of strings.
147 fn concat(&self) -> ~str;
148
149 /// Concatenate a vector of strings, placing a given separator between each.
150 fn connect(&self, sep: &str) -> ~str;
151 }
152
153 impl<'a, S: Str> StrVector for &'a [S] {
154 fn concat(&self) -> ~str {
155 if self.is_empty() { return "".to_owned(); }
156
157 // `len` calculation may overflow but push_str but will check boundaries
158 let len = self.iter().map(|s| s.as_slice().len()).sum();
159
160 let mut result = StrBuf::with_capacity(len);
161
162 for s in self.iter() {
163 result.push_str(s.as_slice())
164 }
165
166 result.into_owned()
167 }
168
169 fn connect(&self, sep: &str) -> ~str {
170 if self.is_empty() { return "".to_owned(); }
171
172 // concat is faster
173 if sep.is_empty() { return self.concat(); }
174
175 // this is wrong without the guarantee that `self` is non-empty
176 // `len` calculation may overflow but push_str but will check boundaries
177 let len = sep.len() * (self.len() - 1)
178 + self.iter().map(|s| s.as_slice().len()).sum();
179 let mut result = StrBuf::with_capacity(len);
180 let mut first = true;
181
182 for s in self.iter() {
183 if first {
184 first = false;
185 } else {
186 result.push_str(sep);
187 }
188 result.push_str(s.as_slice());
189 }
190 result.into_owned()
191 }
192 }
193
194 impl<'a, S: Str> StrVector for Vec<S> {
195 #[inline]
196 fn concat(&self) -> ~str {
197 self.as_slice().concat()
198 }
199
200 #[inline]
201 fn connect(&self, sep: &str) -> ~str {
202 self.as_slice().connect(sep)
203 }
204 }
205
206 /*
207 Section: Iterators
208 */
209
210 // Helper functions used for Unicode normalization
211 fn canonical_sort(comb: &mut [(char, u8)]) {
212 use iter::range;
213 use tuple::Tuple2;
214
215 let len = comb.len();
216 for i in range(0, len) {
217 let mut swapped = false;
218 for j in range(1, len-i) {
219 let class_a = *comb[j-1].ref1();
220 let class_b = *comb[j].ref1();
221 if class_a != 0 && class_b != 0 && class_a > class_b {
222 comb.swap(j-1, j);
223 swapped = true;
224 }
225 }
226 if !swapped { break; }
227 }
228 }
229
230 #[deriving(Clone)]
231 enum NormalizationForm {
232 NFD,
233 NFKD
234 }
235
236 /// External iterator for a string's normalization's characters.
237 /// Use with the `std::iter` module.
238 #[deriving(Clone)]
239 pub struct Normalizations<'a> {
240 kind: NormalizationForm,
241 iter: Chars<'a>,
242 buffer: Vec<(char, u8)>,
243 sorted: bool
244 }
245
246 impl<'a> Iterator<char> for Normalizations<'a> {
247 #[inline]
248 fn next(&mut self) -> Option<char> {
249 use unicode::decompose::canonical_combining_class;
250
251 match self.buffer.as_slice().head() {
252 Some(&(c, 0)) => {
253 self.sorted = false;
254 self.buffer.shift();
255 return Some(c);
256 }
257 Some(&(c, _)) if self.sorted => {
258 self.buffer.shift();
259 return Some(c);
260 }
261 _ => self.sorted = false
262 }
263
264 let decomposer = match self.kind {
265 NFD => char::decompose_canonical,
266 NFKD => char::decompose_compatible
267 };
268
269 if !self.sorted {
270 for ch in self.iter {
271 let buffer = &mut self.buffer;
272 let sorted = &mut self.sorted;
273 decomposer(ch, |d| {
274 let class = canonical_combining_class(d);
275 if class == 0 && !*sorted {
276 canonical_sort(buffer.as_mut_slice());
277 *sorted = true;
278 }
279 buffer.push((d, class));
280 });
281 if *sorted { break }
282 }
283 }
284
285 if !self.sorted {
286 canonical_sort(self.buffer.as_mut_slice());
287 self.sorted = true;
288 }
289
290 match self.buffer.shift() {
291 Some((c, 0)) => {
292 self.sorted = false;
293 Some(c)
294 }
295 Some((c, _)) => Some(c),
296 None => None
297 }
298 }
299
300 fn size_hint(&self) -> (uint, Option<uint>) {
301 let (lower, _) = self.iter.size_hint();
302 (lower, None)
303 }
304 }
305
306 /// Replace all occurrences of one string with another
307 ///
308 /// # Arguments
309 ///
310 /// * s - The string containing substrings to replace
311 /// * from - The string to replace
312 /// * to - The replacement string
313 ///
314 /// # Return value
315 ///
316 /// The original string with all occurrences of `from` replaced with `to`
317 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
318 let mut result = StrBuf::new();
319 let mut last_end = 0;
320 for (start, end) in s.match_indices(from) {
321 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
322 result.push_str(to);
323 last_end = end;
324 }
325 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
326 result.into_owned()
327 }
328
329 /*
330 Section: Misc
331 */
332
333 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
334 /// if `v` contains any invalid data.
335 ///
336 /// # Example
337 ///
338 /// ```rust
339 /// use std::str;
340 ///
341 /// // ðmusic
342 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
343 /// 0x0073, 0x0069, 0x0063];
344 /// assert_eq!(str::from_utf16(v), Some("ðmusic".to_owned()));
345 ///
346 /// // ðmu<invalid>ic
347 /// v[4] = 0xD800;
348 /// assert_eq!(str::from_utf16(v), None);
349 /// ```
350 pub fn from_utf16(v: &[u16]) -> Option<~str> {
351 let mut s = StrBuf::with_capacity(v.len() / 2);
352 for c in utf16_items(v) {
353 match c {
354 ScalarValue(c) => s.push_char(c),
355 LoneSurrogate(_) => return None
356 }
357 }
358 Some(s.into_owned())
359 }
360
361 /// Decode a UTF-16 encoded vector `v` into a string, replacing
362 /// invalid data with the replacement character (U+FFFD).
363 ///
364 /// # Example
365 /// ```rust
366 /// use std::str;
367 ///
368 /// // ðmus<invalid>ic<invalid>
369 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
370 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
371 /// 0xD834];
372 ///
373 /// assert_eq!(str::from_utf16_lossy(v),
374 /// "ðmus\uFFFDic\uFFFD".to_owned());
375 /// ```
376 pub fn from_utf16_lossy(v: &[u16]) -> ~str {
377 utf16_items(v).map(|c| c.to_char_lossy()).collect()
378 }
379
380 // Return the initial codepoint accumulator for the first byte.
381 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
382 // for width 3, and 3 bits for width 4
383 macro_rules! utf8_first_byte(
384 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
385 )
386
387 // return the value of $ch updated with continuation byte $byte
388 macro_rules! utf8_acc_cont_byte(
389 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
390 )
391
392 static TAG_CONT_U8: u8 = 128u8;
393
394 /// Converts a vector of bytes to a new utf-8 string.
395 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
396 ///
397 /// # Example
398 ///
399 /// ```rust
400 /// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
401 /// let output = std::str::from_utf8_lossy(input);
402 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
403 /// ```
404 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
405 if is_utf8(v) {
406 return Slice(unsafe { cast::transmute(v) })
407 }
408
409 static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
410 let mut i = 0;
411 let total = v.len();
412 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
413 unsafe { *xs.unsafe_ref(i) }
414 }
415 fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
416 if i >= total {
417 0
418 } else {
419 unsafe_get(xs, i)
420 }
421 }
422
423 let mut res = StrBuf::with_capacity(total);
424
425 if i > 0 {
426 unsafe {
427 res.push_bytes(v.slice_to(i))
428 };
429 }
430
431 // subseqidx is the index of the first byte of the subsequence we're looking at.
432 // It's used to copy a bunch of contiguous good codepoints at once instead of copying
433 // them one by one.
434 let mut subseqidx = 0;
435
436 while i < total {
437 let i_ = i;
438 let byte = unsafe_get(v, i);
439 i += 1;
440
441 macro_rules! error(() => ({
442 unsafe {
443 if subseqidx != i_ {
444 res.push_bytes(v.slice(subseqidx, i_));
445 }
446 subseqidx = i;
447 res.push_bytes(REPLACEMENT);
448 }
449 }))
450
451 if byte < 128u8 {
452 // subseqidx handles this
453 } else {
454 let w = utf8_char_width(byte);
455
456 match w {
457 2 => {
458 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
459 error!();
460 continue;
461 }
462 i += 1;
463 }
464 3 => {
465 match (byte, safe_get(v, i, total)) {
466 (0xE0 , 0xA0 .. 0xBF) => (),
467 (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
468 (0xED , 0x80 .. 0x9F) => (),
469 (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
470 _ => {
471 error!();
472 continue;
473 }
474 }
475 i += 1;
476 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
477 error!();
478 continue;
479 }
480 i += 1;
481 }
482 4 => {
483 match (byte, safe_get(v, i, total)) {
484 (0xF0 , 0x90 .. 0xBF) => (),
485 (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
486 (0xF4 , 0x80 .. 0x8F) => (),
487 _ => {
488 error!();
489 continue;
490 }
491 }
492 i += 1;
493 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
494 error!();
495 continue;
496 }
497 i += 1;
498 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
499 error!();
500 continue;
501 }
502 i += 1;
503 }
504 _ => {
505 error!();
506 continue;
507 }
508 }
509 }
510 }
511 if subseqidx < total {
512 unsafe {
513 res.push_bytes(v.slice(subseqidx, total))
514 };
515 }
516 Owned(res.into_owned())
517 }
518
519 /*
520 Section: MaybeOwned
521 */
522
523 /// A MaybeOwned is a string that can hold either a ~str or a &str.
524 /// This can be useful as an optimization when an allocation is sometimes
525 /// needed but not always.
526 pub enum MaybeOwned<'a> {
527 /// A borrowed string
528 Slice(&'a str),
529 /// An owned string
530 Owned(~str)
531 }
532
533 /// SendStr is a specialization of `MaybeOwned` to be sendable
534 pub type SendStr = MaybeOwned<'static>;
535
536 impl<'a> MaybeOwned<'a> {
537 /// Returns `true` if this `MaybeOwned` wraps an owned string
538 #[inline]
539 pub fn is_owned(&self) -> bool {
540 match *self {
541 Slice(_) => false,
542 Owned(_) => true
543 }
544 }
545
546 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
547 #[inline]
548 pub fn is_slice(&self) -> bool {
549 match *self {
550 Slice(_) => true,
551 Owned(_) => false
552 }
553 }
554 }
555
556 /// Trait for moving into a `MaybeOwned`
557 pub trait IntoMaybeOwned<'a> {
558 /// Moves self into a `MaybeOwned`
559 fn into_maybe_owned(self) -> MaybeOwned<'a>;
560 }
561
562 impl<'a> IntoMaybeOwned<'a> for ~str {
563 #[inline]
564 fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self) }
565 }
566
567 impl<'a> IntoMaybeOwned<'a> for &'a str {
568 #[inline]
569 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
570 }
571
572 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
573 #[inline]
574 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
575 }
576
577 impl<'a> Eq for MaybeOwned<'a> {
578 #[inline]
579 fn eq(&self, other: &MaybeOwned) -> bool {
580 self.as_slice() == other.as_slice()
581 }
582 }
583
584 impl<'a> TotalEq for MaybeOwned<'a> {}
585
586 impl<'a> Ord for MaybeOwned<'a> {
587 #[inline]
588 fn lt(&self, other: &MaybeOwned) -> bool {
589 self.as_slice().lt(&other.as_slice())
590 }
591 }
592
593 impl<'a> TotalOrd for MaybeOwned<'a> {
594 #[inline]
595 fn cmp(&self, other: &MaybeOwned) -> Ordering {
596 self.as_slice().cmp(&other.as_slice())
597 }
598 }
599
600 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
601 #[inline]
602 fn equiv(&self, other: &S) -> bool {
603 self.as_slice() == other.as_slice()
604 }
605 }
606
607 impl<'a> Str for MaybeOwned<'a> {
608 #[inline]
609 fn as_slice<'b>(&'b self) -> &'b str {
610 match *self {
611 Slice(s) => s,
612 Owned(ref s) => s.as_slice()
613 }
614 }
615 }
616
617 impl<'a> StrAllocating for MaybeOwned<'a> {
618 #[inline]
619 fn into_owned(self) -> ~str {
620 match self {
621 Slice(s) => s.to_owned(),
622 Owned(s) => s
623 }
624 }
625 }
626
627 impl<'a> Container for MaybeOwned<'a> {
628 #[inline]
629 fn len(&self) -> uint { self.as_slice().len() }
630 }
631
632 impl<'a> Clone for MaybeOwned<'a> {
633 #[inline]
634 fn clone(&self) -> MaybeOwned<'a> {
635 match *self {
636 Slice(s) => Slice(s),
637 Owned(ref s) => Owned(s.to_owned())
638 }
639 }
640 }
641
642 impl<'a> Default for MaybeOwned<'a> {
643 #[inline]
644 fn default() -> MaybeOwned<'a> { Slice("") }
645 }
646
647 impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
648 #[inline]
649 fn hash(&self, hasher: &mut H) {
650 match *self {
651 Slice(s) => s.hash(hasher),
652 Owned(ref s) => s.hash(hasher),
653 }
654 }
655 }
656
657 impl<'a> fmt::Show for MaybeOwned<'a> {
658 #[inline]
659 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
660 match *self {
661 Slice(ref s) => s.fmt(f),
662 Owned(ref s) => s.fmt(f)
663 }
664 }
665 }
666
667 /// Unsafe operations
668 pub mod raw {
669 use cast;
670 use libc;
671 use ptr::RawPtr;
672 use raw::Slice;
673 use slice::CloneableVector;
674 use str::{is_utf8, StrAllocating};
675
676 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
677 pub use core::str::raw::{slice_unchecked};
678
679 /// Create a Rust string from a *u8 buffer of the given length
680 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
681 let v = Slice { data: buf, len: len };
682 let bytes: &[u8] = ::cast::transmute(v);
683 assert!(is_utf8(bytes));
684 let s: &str = ::cast::transmute(bytes);
685 s.to_owned()
686 }
687
688 #[lang="strdup_uniq"]
689 #[cfg(not(test))]
690 #[inline]
691 unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
692 from_buf_len(ptr, len)
693 }
694
695 /// Create a Rust string from a null-terminated C string
696 pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
697 let mut curr = buf;
698 let mut i = 0;
699 while *curr != 0 {
700 i += 1;
701 curr = buf.offset(i);
702 }
703 from_buf_len(buf as *u8, i as uint)
704 }
705
706 /// Converts an owned vector of bytes to a new owned string. This assumes
707 /// that the utf-8-ness of the vector has already been validated
708 #[inline]
709 pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
710 cast::transmute(v)
711 }
712
713 /// Converts a byte to a string.
714 pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
715
716 /// Access the str in its vector representation.
717 /// The caller must preserve the valid UTF-8 property when modifying.
718 #[inline]
719 pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
720 cast::transmute(s)
721 }
722
723 /// Sets the length of a string
724 ///
725 /// This will explicitly set the size of the string, without actually
726 /// modifying its buffers, so it is up to the caller to ensure that
727 /// the string is actually the specified size.
728 #[test]
729 fn test_from_buf_len() {
730 use slice::ImmutableVector;
731 use str::StrAllocating;
732
733 unsafe {
734 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
735 let b = a.as_ptr();
736 let c = from_buf_len(b, 3u);
737 assert_eq!(c, "AAA".to_owned());
738 }
739 }
740 }
741
742 /*
743 Section: Trait implementations
744 */
745
746 /// Any string that can be represented as a slice
747 pub trait StrAllocating: Str {
748 /// Convert `self` into a ~str, not making a copy if possible.
749 fn into_owned(self) -> ~str;
750
751 /// Convert `self` into a `StrBuf`.
752 #[inline]
753 fn to_strbuf(&self) -> StrBuf {
754 StrBuf::from_str(self.as_slice())
755 }
756
757 /// Convert `self` into a `StrBuf`, not making a copy if possible.
758 #[inline]
759 fn into_strbuf(self) -> StrBuf {
760 StrBuf::from_owned_str(self.into_owned())
761 }
762
763 /// Escape each char in `s` with `char::escape_default`.
764 fn escape_default(&self) -> ~str {
765 let me = self.as_slice();
766 let mut out = StrBuf::with_capacity(me.len());
767 for c in me.chars() {
768 c.escape_default(|c| out.push_char(c));
769 }
770 out.into_owned()
771 }
772
773 /// Escape each char in `s` with `char::escape_unicode`.
774 fn escape_unicode(&self) -> ~str {
775 let me = self.as_slice();
776 let mut out = StrBuf::with_capacity(me.len());
777 for c in me.chars() {
778 c.escape_unicode(|c| out.push_char(c));
779 }
780 out.into_owned()
781 }
782
783 /// Replace all occurrences of one string with another.
784 ///
785 /// # Arguments
786 ///
787 /// * `from` - The string to replace
788 /// * `to` - The replacement string
789 ///
790 /// # Return value
791 ///
792 /// The original string with all occurrences of `from` replaced with `to`.
793 ///
794 /// # Example
795 ///
796 /// ```rust
797 /// let s = "Do you know the muffin man,
798 /// The muffin man, the muffin man, ...".to_owned();
799 ///
800 /// assert_eq!(s.replace("muffin man", "little lamb"),
801 /// "Do you know the little lamb,
802 /// The little lamb, the little lamb, ...".to_owned());
803 ///
804 /// // not found, so no change.
805 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
806 /// ```
807 fn replace(&self, from: &str, to: &str) -> ~str {
808 let me = self.as_slice();
809 let mut result = StrBuf::new();
810 let mut last_end = 0;
811 for (start, end) in me.match_indices(from) {
812 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
813 result.push_str(to);
814 last_end = end;
815 }
816 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
817 result.into_owned()
818 }
819
820 /// Copy a slice into a new owned str.
821 #[inline]
822 fn to_owned(&self) -> ~str {
823 use slice::Vector;
824
825 unsafe {
826 ::cast::transmute(self.as_slice().as_bytes().to_owned())
827 }
828 }
829
830 /// Converts to a vector of `u16` encoded as UTF-16.
831 fn to_utf16(&self) -> Vec<u16> {
832 let me = self.as_slice();
833 let mut u = Vec::new();
834 for ch in me.chars() {
835 let mut buf = [0u16, ..2];
836 let n = ch.encode_utf16(buf /* as mut slice! */);
837 u.push_all(buf.slice_to(n));
838 }
839 u
840 }
841
842 /// Given a string, make a new string with repeated copies of it.
843 fn repeat(&self, nn: uint) -> ~str {
844 let me = self.as_slice();
845 let mut ret = StrBuf::with_capacity(nn * me.len());
846 for _ in range(0, nn) {
847 ret.push_str(me);
848 }
849 ret.into_owned()
850 }
851
852 /// Levenshtein Distance between two strings.
853 fn lev_distance(&self, t: &str) -> uint {
854 let me = self.as_slice();
855 let slen = me.len();
856 let tlen = t.len();
857
858 if slen == 0 { return tlen; }
859 if tlen == 0 { return slen; }
860
861 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
862
863 for (i, sc) in me.chars().enumerate() {
864
865 let mut current = i;
866 *dcol.get_mut(0) = current + 1;
867
868 for (j, tc) in t.chars().enumerate() {
869
870 let next = *dcol.get(j + 1);
871
872 if sc == tc {
873 *dcol.get_mut(j + 1) = current;
874 } else {
875 *dcol.get_mut(j + 1) = ::cmp::min(current, next);
876 *dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
877 *dcol.get(j)) + 1;
878 }
879
880 current = next;
881 }
882 }
883
884 return *dcol.get(tlen);
885 }
886
887 /// An Iterator over the string in Unicode Normalization Form D
888 /// (canonical decomposition).
889 #[inline]
890 fn nfd_chars<'a>(&'a self) -> Normalizations<'a> {
891 Normalizations {
892 iter: self.as_slice().chars(),
893 buffer: Vec::new(),
894 sorted: false,
895 kind: NFD
896 }
897 }
898
899 /// An Iterator over the string in Unicode Normalization Form KD
900 /// (compatibility decomposition).
901 #[inline]
902 fn nfkd_chars<'a>(&'a self) -> Normalizations<'a> {
903 Normalizations {
904 iter: self.as_slice().chars(),
905 buffer: Vec::new(),
906 sorted: false,
907 kind: NFKD
908 }
909 }
910 }
911
912 impl<'a> StrAllocating for &'a str {
913 #[inline]
914 fn into_owned(self) -> ~str { self.to_owned() }
915 }
916
917 impl<'a> StrAllocating for ~str {
918 #[inline]
919 fn into_owned(self) -> ~str { self }
920 }
921
922 /// Methods for owned strings
923 pub trait OwnedStr {
924 /// Consumes the string, returning the underlying byte buffer.
925 ///
926 /// The buffer does not have a null terminator.
927 fn into_bytes(self) -> ~[u8];
928
929 /// Pushes the given string onto this string, returning the concatenation of the two strings.
930 fn append(self, rhs: &str) -> ~str;
931 }
932
933 impl OwnedStr for ~str {
934 #[inline]
935 fn into_bytes(self) -> ~[u8] {
936 unsafe { cast::transmute(self) }
937 }
938
939 #[inline]
940 fn append(self, rhs: &str) -> ~str {
941 let mut new_str = StrBuf::from_owned_str(self);
942 new_str.push_str(rhs);
943 new_str.into_owned()
944 }
945 }
946
947 #[cfg(test)]
948 mod tests {
949 use iter::AdditiveIterator;
950 use default::Default;
951 use prelude::*;
952 use str::*;
953 use strbuf::StrBuf;
954
955 #[test]
956 fn test_eq() {
957 assert!((eq(&"".to_owned(), &"".to_owned())));
958 assert!((eq(&"foo".to_owned(), &"foo".to_owned())));
959 assert!((!eq(&"foo".to_owned(), &"bar".to_owned())));
960 }
961
962 #[test]
963 fn test_eq_slice() {
964 assert!((eq_slice("foobar".slice(0, 3), "foo")));
965 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
966 assert!((!eq_slice("foo1", "foo2")));
967 }
968
969 #[test]
970 fn test_le() {
971 assert!("" <= "");
972 assert!("" <= "foo");
973 assert!("foo" <= "foo");
974 assert!("foo" != "bar");
975 }
976
977 #[test]
978 fn test_len() {
979 assert_eq!("".len(), 0u);
980 assert_eq!("hello world".len(), 11u);
981 assert_eq!("\x63".len(), 1u);
982 assert_eq!("\xa2".len(), 2u);
983 assert_eq!("\u03c0".len(), 2u);
984 assert_eq!("\u2620".len(), 3u);
985 assert_eq!("\U0001d11e".len(), 4u);
986
987 assert_eq!("".char_len(), 0u);
988 assert_eq!("hello world".char_len(), 11u);
989 assert_eq!("\x63".char_len(), 1u);
990 assert_eq!("\xa2".char_len(), 1u);
991 assert_eq!("\u03c0".char_len(), 1u);
992 assert_eq!("\u2620".char_len(), 1u);
993 assert_eq!("\U0001d11e".char_len(), 1u);
994 assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".char_len(), 19u);
995 }
996
997 #[test]
998 fn test_find() {
999 assert_eq!("hello".find('l'), Some(2u));
1000 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
1001 assert!("hello".find('x').is_none());
1002 assert!("hello".find(|c:char| c == 'x').is_none());
1003 assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".find('å'), Some(30u));
1004 assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".find(|c: char| c == 'å'), Some(30u));
1005 }
1006
1007 #[test]
1008 fn test_rfind() {
1009 assert_eq!("hello".rfind('l'), Some(3u));
1010 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
1011 assert!("hello".rfind('x').is_none());
1012 assert!("hello".rfind(|c:char| c == 'x').is_none());
1013 assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".rfind('å'), Some(30u));
1014 assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".rfind(|c: char| c == 'å'), Some(30u));
1015 }
1016
1017 #[test]
1018 fn test_collect() {
1019 let empty = "".to_owned();
1020 let s: ~str = empty.chars().collect();
1021 assert_eq!(empty, s);
1022 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸".to_owned();
1023 let s: ~str = data.chars().collect();
1024 assert_eq!(data, s);
1025 }
1026
1027 #[test]
1028 fn test_into_bytes() {
1029 let data = "asdf".to_owned();
1030 let buf = data.into_bytes();
1031 assert_eq!(bytes!("asdf"), buf.as_slice());
1032 }
1033
1034 #[test]
1035 fn test_find_str() {
1036 // byte positions
1037 assert_eq!("".find_str(""), Some(0u));
1038 assert!("banana".find_str("apple pie").is_none());
1039
1040 let data = "abcabc";
1041 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1042 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1043 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1044
1045 let mut data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1046 data = data + data;
1047 assert!(data.find_str("à¹à¸å").is_none());
1048 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1049 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1050
1051 assert_eq!(data.slice(0u, 43u).find_str("à¸à¸£à¸°"), Some( 0u));
1052 assert_eq!(data.slice(0u, 43u).find_str("à¸à¸¨à¹"), Some(12u));
1053 assert_eq!(data.slice(0u, 43u).find_str("ยä¸"), Some(24u));
1054 assert_eq!(data.slice(0u, 43u).find_str("iá»t"), Some(34u));
1055 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1056
1057 assert_eq!(data.slice(43u, 86u).find_str("à¸à¸£à¸°"), Some(43u - 43u));
1058 assert_eq!(data.slice(43u, 86u).find_str("à¸à¸¨à¹"), Some(55u - 43u));
1059 assert_eq!(data.slice(43u, 86u).find_str("ยä¸"), Some(67u - 43u));
1060 assert_eq!(data.slice(43u, 86u).find_str("iá»t"), Some(77u - 43u));
1061 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1062 }
1063
1064 #[test]
1065 fn test_slice_chars() {
1066 fn t(a: &str, b: &str, start: uint) {
1067 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1068 }
1069 t("", "", 0);
1070 t("hello", "llo", 2);
1071 t("hello", "el", 1);
1072 t("αβλ", "β", 1);
1073 t("αβλ", "", 3);
1074 assert_eq!("ะà¹à¸à¸¨à¹à¸", "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".slice_chars(2, 8));
1075 }
1076
1077 #[test]
1078 fn test_concat() {
1079 fn t(v: &[~str], s: &str) {
1080 assert_eq!(v.concat(), s.to_str());
1081 }
1082 t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1083 "no".to_owned(), "good".to_owned()], "youknowI'mnogood");
1084 let v: &[~str] = [];
1085 t(v, "");
1086 t(["hi".to_owned()], "hi");
1087 }
1088
1089 #[test]
1090 fn test_connect() {
1091 fn t(v: &[~str], sep: &str, s: &str) {
1092 assert_eq!(v.connect(sep), s.to_str());
1093 }
1094 t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1095 "no".to_owned(), "good".to_owned()],
1096 " ", "you know I'm no good");
1097 let v: &[~str] = [];
1098 t(v, " ", "");
1099 t(["hi".to_owned()], " ", "hi");
1100 }
1101
1102 #[test]
1103 fn test_concat_slices() {
1104 fn t(v: &[&str], s: &str) {
1105 assert_eq!(v.concat(), s.to_str());
1106 }
1107 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1108 let v: &[&str] = [];
1109 t(v, "");
1110 t(["hi"], "hi");
1111 }
1112
1113 #[test]
1114 fn test_connect_slices() {
1115 fn t(v: &[&str], sep: &str, s: &str) {
1116 assert_eq!(v.connect(sep), s.to_str());
1117 }
1118 t(["you", "know", "I'm", "no", "good"],
1119 " ", "you know I'm no good");
1120 t([], " ", "");
1121 t(["hi"], " ", "hi");
1122 }
1123
1124 #[test]
1125 fn test_repeat() {
1126 assert_eq!("x".repeat(4), "xxxx".to_owned());
1127 assert_eq!("hi".repeat(4), "hihihihi".to_owned());
1128 assert_eq!("à¹à¸å".repeat(3), "à¹à¸åà¹à¸åà¹à¸å".to_owned());
1129 assert_eq!("".repeat(4), "".to_owned());
1130 assert_eq!("hi".repeat(0), "".to_owned());
1131 }
1132
1133 #[test]
1134 fn test_unsafe_slice() {
1135 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1136 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1137 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1138 fn a_million_letter_a() -> ~str {
1139 let mut i = 0;
1140 let mut rs = StrBuf::new();
1141 while i < 100000 {
1142 rs.push_str("aaaaaaaaaa");
1143 i += 1;
1144 }
1145 rs.into_owned()
1146 }
1147 fn half_a_million_letter_a() -> ~str {
1148 let mut i = 0;
1149 let mut rs = StrBuf::new();
1150 while i < 100000 {
1151 rs.push_str("aaaaa");
1152 i += 1;
1153 }
1154 rs.into_owned()
1155 }
1156 let letters = a_million_letter_a();
1157 assert!(half_a_million_letter_a() ==
1158 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
1159 }
1160
1161 #[test]
1162 fn test_starts_with() {
1163 assert!(("".starts_with("")));
1164 assert!(("abc".starts_with("")));
1165 assert!(("abc".starts_with("a")));
1166 assert!((!"a".starts_with("abc")));
1167 assert!((!"".starts_with("abc")));
1168 assert!((!"ödd".starts_with("-")));
1169 assert!(("ödd".starts_with("öd")));
1170 }
1171
1172 #[test]
1173 fn test_ends_with() {
1174 assert!(("".ends_with("")));
1175 assert!(("abc".ends_with("")));
1176 assert!(("abc".ends_with("c")));
1177 assert!((!"a".ends_with("abc")));
1178 assert!((!"".ends_with("abc")));
1179 assert!((!"ddö".ends_with("-")));
1180 assert!(("ddö".ends_with("dö")));
1181 }
1182
1183 #[test]
1184 fn test_is_empty() {
1185 assert!("".is_empty());
1186 assert!(!"a".is_empty());
1187 }
1188
1189 #[test]
1190 fn test_replace() {
1191 let a = "a";
1192 assert_eq!("".replace(a, "b"), "".to_owned());
1193 assert_eq!("a".replace(a, "b"), "b".to_owned());
1194 assert_eq!("ab".replace(a, "b"), "bb".to_owned());
1195 let test = "test";
1196 assert!(" test test ".replace(test, "toast") ==
1197 " toast toast ".to_owned());
1198 assert_eq!(" test test ".replace(test, ""), " ".to_owned());
1199 }
1200
1201 #[test]
1202 fn test_replace_2a() {
1203 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1204 let repl = "دÙÙØ© اÙÙÙÙت".to_owned();
1205
1206 let a = "à¸à¸£à¸°à¹".to_owned();
1207 let a2 = "دÙÙØ© اÙÙÙÙتà¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1208 assert_eq!(data.replace(a, repl), a2);
1209 }
1210
1211 #[test]
1212 fn test_replace_2b() {
1213 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1214 let repl = "دÙÙØ© اÙÙÙÙت".to_owned();
1215
1216 let b = "ะà¹".to_owned();
1217 let b2 = "à¸à¸£Ø¯ÙÙØ© اÙÙÙÙتà¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1218 assert_eq!(data.replace(b, repl), b2);
1219 }
1220
1221 #[test]
1222 fn test_replace_2c() {
1223 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1224 let repl = "دÙÙØ© اÙÙÙÙت".to_owned();
1225
1226 let c = "ä¸å".to_owned();
1227 let c2 = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢Ø¯ÙÙØ© اÙÙÙÙت".to_owned();
1228 assert_eq!(data.replace(c, repl), c2);
1229 }
1230
1231 #[test]
1232 fn test_replace_2d() {
1233 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1234 let repl = "دÙÙØ© اÙÙÙÙت".to_owned();
1235
1236 let d = "à¹à¸å".to_owned();
1237 assert_eq!(data.replace(d, repl), data);
1238 }
1239
1240 #[test]
1241 fn test_slice() {
1242 assert_eq!("ab", "abc".slice(0, 2));
1243 assert_eq!("bc", "abc".slice(1, 3));
1244 assert_eq!("", "abc".slice(1, 1));
1245 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1246
1247 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å";
1248 assert_eq!("à¸", data.slice(0, 3));
1249 assert_eq!("ร", data.slice(3, 6));
1250 assert_eq!("", data.slice(3, 3));
1251 assert_eq!("å", data.slice(30, 33));
1252
1253 fn a_million_letter_X() -> ~str {
1254 let mut i = 0;
1255 let mut rs = StrBuf::new();
1256 while i < 100000 {
1257 rs.push_str("åååååååååå");
1258 i += 1;
1259 }
1260 rs.into_owned()
1261 }
1262 fn half_a_million_letter_X() -> ~str {
1263 let mut i = 0;
1264 let mut rs = StrBuf::new();
1265 while i < 100000 {
1266 rs.push_str("ååååå");
1267 i += 1;
1268 }
1269 rs.into_owned()
1270 }
1271 let letters = a_million_letter_X();
1272 assert!(half_a_million_letter_X() ==
1273 letters.slice(0u, 3u * 500000u).to_owned());
1274 }
1275
1276 #[test]
1277 fn test_slice_2() {
1278 let ss = "ä¸åViá»t Nam";
1279
1280 assert_eq!("å", ss.slice(3u, 6u));
1281 assert_eq!("Viá»t Nam", ss.slice(6u, 16u));
1282
1283 assert_eq!("ab", "abc".slice(0u, 2u));
1284 assert_eq!("bc", "abc".slice(1u, 3u));
1285 assert_eq!("", "abc".slice(1u, 1u));
1286
1287 assert_eq!("ä¸", ss.slice(0u, 3u));
1288 assert_eq!("åV", ss.slice(3u, 7u));
1289 assert_eq!("", ss.slice(3u, 3u));
1290 /*0: ä¸
1291 3: å
1292 6: V
1293 7: i
1294 8: á»
1295 11: t
1296 12:
1297 13: N
1298 14: a
1299 15: m */
1300 }
1301
1302 #[test]
1303 #[should_fail]
1304 fn test_slice_fail() {
1305 "ä¸åViá»t Nam".slice(0u, 2u);
1306 }
1307
1308 #[test]
1309 fn test_slice_from() {
1310 assert_eq!("abcd".slice_from(0), "abcd");
1311 assert_eq!("abcd".slice_from(2), "cd");
1312 assert_eq!("abcd".slice_from(4), "");
1313 }
1314 #[test]
1315 fn test_slice_to() {
1316 assert_eq!("abcd".slice_to(0), "");
1317 assert_eq!("abcd".slice_to(2), "ab");
1318 assert_eq!("abcd".slice_to(4), "abcd");
1319 }
1320
1321 #[test]
1322 fn test_trim_left_chars() {
1323 let v: &[char] = &[];
1324 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1325 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1326 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1327 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1328
1329 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1330 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1331 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1332 }
1333
1334 #[test]
1335 fn test_trim_right_chars() {
1336 let v: &[char] = &[];
1337 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1338 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1339 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1340 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1341
1342 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1343 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1344 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1345 }
1346
1347 #[test]
1348 fn test_trim_chars() {
1349 let v: &[char] = &[];
1350 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1351 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1352 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1353 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1354
1355 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1356 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1357 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1358 }
1359
1360 #[test]
1361 fn test_trim_left() {
1362 assert_eq!("".trim_left(), "");
1363 assert_eq!("a".trim_left(), "a");
1364 assert_eq!(" ".trim_left(), "");
1365 assert_eq!(" blah".trim_left(), "blah");
1366 assert_eq!(" \u3000 wut".trim_left(), "wut");
1367 assert_eq!("hey ".trim_left(), "hey ");
1368 }
1369
1370 #[test]
1371 fn test_trim_right() {
1372 assert_eq!("".trim_right(), "");
1373 assert_eq!("a".trim_right(), "a");
1374 assert_eq!(" ".trim_right(), "");
1375 assert_eq!("blah ".trim_right(), "blah");
1376 assert_eq!("wut \u3000 ".trim_right(), "wut");
1377 assert_eq!(" hey".trim_right(), " hey");
1378 }
1379
1380 #[test]
1381 fn test_trim() {
1382 assert_eq!("".trim(), "");
1383 assert_eq!("a".trim(), "a");
1384 assert_eq!(" ".trim(), "");
1385 assert_eq!(" blah ".trim(), "blah");
1386 assert_eq!("\nwut \u3000 ".trim(), "wut");
1387 assert_eq!(" hey dude ".trim(), "hey dude");
1388 }
1389
1390 #[test]
1391 fn test_is_whitespace() {
1392 assert!("".is_whitespace());
1393 assert!(" ".is_whitespace());
1394 assert!("\u2009".is_whitespace()); // Thin space
1395 assert!(" \n\t ".is_whitespace());
1396 assert!(!" _ ".is_whitespace());
1397 }
1398
1399 #[test]
1400 fn test_slice_shift_char() {
1401 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸";
1402 assert_eq!(data.slice_shift_char(), (Some('à¸'), "ระà¹à¸à¸¨à¹à¸à¸¢ä¸"));
1403 }
1404
1405 #[test]
1406 fn test_slice_shift_char_2() {
1407 let empty = "";
1408 assert_eq!(empty.slice_shift_char(), (None, ""));
1409 }
1410
1411 #[test]
1412 fn test_is_utf8() {
1413 // deny overlong encodings
1414 assert!(!is_utf8([0xc0, 0x80]));
1415 assert!(!is_utf8([0xc0, 0xae]));
1416 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1417 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1418 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1419 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1420 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1421
1422 // deny surrogates
1423 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1424 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1425
1426 assert!(is_utf8([0xC2, 0x80]));
1427 assert!(is_utf8([0xDF, 0xBF]));
1428 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1429 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1430 assert!(is_utf8([0xEE, 0x80, 0x80]));
1431 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1432 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1433 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1434 }
1435
1436 #[test]
1437 fn test_is_utf16() {
1438 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1439
1440 // non-surrogates
1441 pos!([0x0000],
1442 [0x0001, 0x0002],
1443 [0xD7FF],
1444 [0xE000]);
1445
1446 // surrogate pairs (randomly generated with Python 3's
1447 // .encode('utf-16be'))
1448 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1449 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1450 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1451
1452 // mixtures (also random)
1453 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1454 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1455 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1456
1457 // negative tests
1458 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1459
1460 neg!(
1461 // surrogate + regular unit
1462 [0xdb45, 0x0000],
1463 // surrogate + lead surrogate
1464 [0xd900, 0xd900],
1465 // unterminated surrogate
1466 [0xd8ff],
1467 // trail surrogate without a lead
1468 [0xddb7]);
1469
1470 // random byte sequences that Python 3's .decode('utf-16be')
1471 // failed on
1472 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1473 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1474 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1475 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1476 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1477 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1478 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1479 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1480 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1481 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1482 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1483 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1484 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1485 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1486 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1487 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1488 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1489 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1490 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1491 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1492 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1493 }
1494
1495 #[test]
1496 fn test_raw_from_c_str() {
1497 unsafe {
1498 let a = box [65, 65, 65, 65, 65, 65, 65, 0];
1499 let b = a.as_ptr();
1500 let c = raw::from_c_str(b);
1501 assert_eq!(c, "AAAAAAA".to_owned());
1502 }
1503 }
1504
1505 #[test]
1506 fn test_as_bytes() {
1507 // no null
1508 let v = [
1509 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1510 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1511 109
1512 ];
1513 assert_eq!("".as_bytes(), &[]);
1514 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1515 assert_eq!("ศà¹à¸à¸¢ä¸åViá»t Nam".as_bytes(), v.as_slice());
1516 }
1517
1518 #[test]
1519 #[should_fail]
1520 fn test_as_bytes_fail() {
1521 // Don't double free. (I'm not sure if this exercises the
1522 // original problem code path anymore.)
1523 let s = "".to_owned();
1524 let _bytes = s.as_bytes();
1525 fail!();
1526 }
1527
1528 #[test]
1529 fn test_as_ptr() {
1530 let buf = "hello".as_ptr();
1531 unsafe {
1532 assert_eq!(*buf.offset(0), 'h' as u8);
1533 assert_eq!(*buf.offset(1), 'e' as u8);
1534 assert_eq!(*buf.offset(2), 'l' as u8);
1535 assert_eq!(*buf.offset(3), 'l' as u8);
1536 assert_eq!(*buf.offset(4), 'o' as u8);
1537 }
1538 }
1539
1540 #[test]
1541 fn test_subslice_offset() {
1542 let a = "kernelsprite";
1543 let b = a.slice(7, a.len());
1544 let c = a.slice(0, a.len() - 6);
1545 assert_eq!(a.subslice_offset(b), 7);
1546 assert_eq!(a.subslice_offset(c), 0);
1547
1548 let string = "a\nb\nc";
1549 let lines: Vec<&str> = string.lines().collect();
1550 let lines = lines.as_slice();
1551 assert_eq!(string.subslice_offset(lines[0]), 0);
1552 assert_eq!(string.subslice_offset(lines[1]), 2);
1553 assert_eq!(string.subslice_offset(lines[2]), 4);
1554 }
1555
1556 #[test]
1557 #[should_fail]
1558 fn test_subslice_offset_2() {
1559 let a = "alchemiter";
1560 let b = "cruxtruder";
1561 a.subslice_offset(b);
1562 }
1563
1564 #[test]
1565 fn vec_str_conversions() {
1566 let s1: ~str = "All mimsy were the borogoves".to_owned();
1567
1568 let v: ~[u8] = s1.as_bytes().to_owned();
1569 let s2: ~str = from_utf8(v).unwrap().to_owned();
1570 let mut i: uint = 0u;
1571 let n1: uint = s1.len();
1572 let n2: uint = v.len();
1573 assert_eq!(n1, n2);
1574 while i < n1 {
1575 let a: u8 = s1[i];
1576 let b: u8 = s2[i];
1577 debug!("{}", a);
1578 debug!("{}", b);
1579 assert_eq!(a, b);
1580 i += 1u;
1581 }
1582 }
1583
1584 #[test]
1585 fn test_contains() {
1586 assert!("abcde".contains("bcd"));
1587 assert!("abcde".contains("abcd"));
1588 assert!("abcde".contains("bcde"));
1589 assert!("abcde".contains(""));
1590 assert!("".contains(""));
1591 assert!(!"abcde".contains("def"));
1592 assert!(!"".contains("a"));
1593
1594 let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1595 assert!(data.contains("à¸à¸£à¸°à¹"));
1596 assert!(data.contains("ะà¹"));
1597 assert!(data.contains("ä¸å"));
1598 assert!(!data.contains("à¹à¸å"));
1599 }
1600
1601 #[test]
1602 fn test_contains_char() {
1603 assert!("abc".contains_char('b'));
1604 assert!("a".contains_char('a'));
1605 assert!(!"abc".contains_char('d'));
1606 assert!(!"".contains_char('a'));
1607 }
1608
1609 #[test]
1610 fn test_utf16() {
1611 let pairs =
1612 [("ð
ð¿ð»ðð¹ð»ð°\n".to_owned(),
1613 vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1614 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1615 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1616 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1617
1618 ("ððð®ðð²ð ðð²ð\n".to_owned(),
1619 vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1620 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1621 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1622 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1623 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1624 0x000a_u16]),
1625
1626 ("ðððððð·ððððððð\n".to_owned(),
1627 vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1628 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1629 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1630 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1631 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1632 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1633 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1634
1635 ("ðððððð ðð ððð ððððð ðð\n".to_owned(),
1636 vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1637 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1638 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1639 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1640 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1641 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1642 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1643 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1644 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1645 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1646 0x000a_u16 ]),
1647 // Issue #12318, even-numbered non-BMP planes
1648 ("\U00020000".to_owned(),
1649 vec![0xD840, 0xDC00])];
1650
1651 for p in pairs.iter() {
1652 let (s, u) = (*p).clone();
1653 assert!(is_utf16(u.as_slice()));
1654 assert_eq!(s.to_utf16(), u);
1655
1656 assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1657 assert_eq!(from_utf16_lossy(u.as_slice()), s);
1658
1659 assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1660 assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1661 }
1662 }
1663
1664 #[test]
1665 fn test_utf16_invalid() {
1666 // completely positive cases tested above.
1667 // lead + eof
1668 assert_eq!(from_utf16([0xD800]), None);
1669 // lead + lead
1670 assert_eq!(from_utf16([0xD800, 0xD800]), None);
1671
1672 // isolated trail
1673 assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1674
1675 // general
1676 assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1677 }
1678
1679 #[test]
1680 fn test_utf16_lossy() {
1681 // completely positive cases tested above.
1682 // lead + eof
1683 assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_owned());
1684 // lead + lead
1685 assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_owned());
1686
1687 // isolated trail
1688 assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_owned());
1689
1690 // general
1691 assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), "\uFFFDð\uFFFD".to_owned());
1692 }
1693
1694 #[test]
1695 fn test_truncate_utf16_at_nul() {
1696 let v = [];
1697 assert_eq!(truncate_utf16_at_nul(v), &[]);
1698
1699 let v = [0, 2, 3];
1700 assert_eq!(truncate_utf16_at_nul(v), &[]);
1701
1702 let v = [1, 0, 3];
1703 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1704
1705 let v = [1, 2, 0];
1706 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1707
1708 let v = [1, 2, 3];
1709 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1710 }
1711
1712 #[test]
1713 fn test_char_at() {
1714 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1715 let v = box ['ศ','à¹','à¸','ย','ä¸','å','V','i','á»','t',' ','N','a','m'];
1716 let mut pos = 0;
1717 for ch in v.iter() {
1718 assert!(s.char_at(pos) == *ch);
1719 pos += from_char(*ch).len();
1720 }
1721 }
1722
1723 #[test]
1724 fn test_char_at_reverse() {
1725 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1726 let v = box ['ศ','à¹','à¸','ย','ä¸','å','V','i','á»','t',' ','N','a','m'];
1727 let mut pos = s.len();
1728 for ch in v.iter().rev() {
1729 assert!(s.char_at_reverse(pos) == *ch);
1730 pos -= from_char(*ch).len();
1731 }
1732 }
1733
1734 #[test]
1735 fn test_escape_unicode() {
1736 assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_owned());
1737 assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_owned());
1738 assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_owned());
1739 assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_owned());
1740 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_owned());
1741 assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_owned());
1742 assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_owned());
1743 assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_owned());
1744 assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_owned());
1745 }
1746
1747 #[test]
1748 fn test_escape_default() {
1749 assert_eq!("abc".escape_default(), "abc".to_owned());
1750 assert_eq!("a c".escape_default(), "a c".to_owned());
1751 assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_owned());
1752 assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_owned());
1753 assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_owned());
1754 assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_owned());
1755 assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_owned());
1756 assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_owned());
1757 }
1758
1759 #[test]
1760 fn test_total_ord() {
1761 "1234".cmp(&("123")) == Greater;
1762 "123".cmp(&("1234")) == Less;
1763 "1234".cmp(&("1234")) == Equal;
1764 "12345555".cmp(&("123456")) == Less;
1765 "22".cmp(&("1234")) == Greater;
1766 }
1767
1768 #[test]
1769 fn test_char_range_at() {
1770 let data = "b¢â¬ð¤¢ð¤¢â¬Â¢b".to_owned();
1771 assert_eq!('b', data.char_range_at(0).ch);
1772 assert_eq!('¢', data.char_range_at(1).ch);
1773 assert_eq!('â¬', data.char_range_at(3).ch);
1774 assert_eq!('ð¤¢', data.char_range_at(6).ch);
1775 assert_eq!('ð¤¢', data.char_range_at(10).ch);
1776 assert_eq!('â¬', data.char_range_at(14).ch);
1777 assert_eq!('¢', data.char_range_at(17).ch);
1778 assert_eq!('b', data.char_range_at(19).ch);
1779 }
1780
1781 #[test]
1782 fn test_char_range_at_reverse_underflow() {
1783 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1784 }
1785
1786 #[test]
1787 fn test_add() {
1788 #![allow(unnecessary_allocation)]
1789 macro_rules! t (
1790 ($s1:expr, $s2:expr, $e:expr) => { {
1791 let s1 = $s1;
1792 let s2 = $s2;
1793 let e = $e;
1794 assert_eq!(s1 + s2, e.to_owned());
1795 assert_eq!(s1.to_owned() + s2, e.to_owned());
1796 } }
1797 );
1798
1799 t!("foo", "bar", "foobar");
1800 t!("foo", "bar".to_owned(), "foobar");
1801 t!("ศà¹à¸à¸¢ä¸", "åViá»t Nam", "ศà¹à¸à¸¢ä¸åViá»t Nam");
1802 t!("ศà¹à¸à¸¢ä¸", "åViá»t Nam".to_owned(), "ศà¹à¸à¸¢ä¸åViá»t Nam");
1803 }
1804
1805 #[test]
1806 fn test_iterator() {
1807 use iter::*;
1808 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1809 let v = box ['ศ','à¹','à¸','ย','ä¸','å','V','i','á»','t',' ','N','a','m'];
1810
1811 let mut pos = 0;
1812 let mut it = s.chars();
1813
1814 for c in it {
1815 assert_eq!(c, v[pos]);
1816 pos += 1;
1817 }
1818 assert_eq!(pos, v.len());
1819 }
1820
1821 #[test]
1822 fn test_rev_iterator() {
1823 use iter::*;
1824 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1825 let v = box ['m', 'a', 'N', ' ', 't', 'á»','i','V','å','ä¸','ย','à¸','à¹','ศ'];
1826
1827 let mut pos = 0;
1828 let mut it = s.chars().rev();
1829
1830 for c in it {
1831 assert_eq!(c, v[pos]);
1832 pos += 1;
1833 }
1834 assert_eq!(pos, v.len());
1835 }
1836
1837 #[test]
1838 fn test_iterator_clone() {
1839 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam";
1840 let mut it = s.chars();
1841 it.next();
1842 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1843 }
1844
1845 #[test]
1846 fn test_bytesator() {
1847 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1848 let v = [
1849 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1850 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1851 109
1852 ];
1853 let mut pos = 0;
1854
1855 for b in s.bytes() {
1856 assert_eq!(b, v[pos]);
1857 pos += 1;
1858 }
1859 }
1860
1861 #[test]
1862 fn test_bytes_revator() {
1863 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1864 let v = [
1865 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1866 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1867 109
1868 ];
1869 let mut pos = v.len();
1870
1871 for b in s.bytes().rev() {
1872 pos -= 1;
1873 assert_eq!(b, v[pos]);
1874 }
1875 }
1876
1877 #[test]
1878 fn test_char_indicesator() {
1879 use iter::*;
1880 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam";
1881 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1882 let v = ['ศ','à¹','à¸','ย','ä¸','å','V','i','á»','t',' ','N','a','m'];
1883
1884 let mut pos = 0;
1885 let mut it = s.char_indices();
1886
1887 for c in it {
1888 assert_eq!(c, (p[pos], v[pos]));
1889 pos += 1;
1890 }
1891 assert_eq!(pos, v.len());
1892 assert_eq!(pos, p.len());
1893 }
1894
1895 #[test]
1896 fn test_char_indices_revator() {
1897 use iter::*;
1898 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam";
1899 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1900 let v = ['m', 'a', 'N', ' ', 't', 'á»','i','V','å','ä¸','ย','à¸','à¹','ศ'];
1901
1902 let mut pos = 0;
1903 let mut it = s.char_indices().rev();
1904
1905 for c in it {
1906 assert_eq!(c, (p[pos], v[pos]));
1907 pos += 1;
1908 }
1909 assert_eq!(pos, v.len());
1910 assert_eq!(pos, p.len());
1911 }
1912
1913 #[test]
1914 fn test_split_char_iterator() {
1915 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1916
1917 let split: Vec<&str> = data.split(' ').collect();
1918 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1919
1920 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1921 rsplit.reverse();
1922 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1923
1924 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1925 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1926
1927 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1928 rsplit.reverse();
1929 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1930
1931 // Unicode
1932 let split: Vec<&str> = data.split('ä').collect();
1933 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1934
1935 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1936 rsplit.reverse();
1937 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1938
1939 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1940 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1941
1942 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1943 rsplit.reverse();
1944 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1945 }
1946
1947 #[test]
1948 fn test_splitn_char_iterator() {
1949 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1950
1951 let split: Vec<&str> = data.splitn(' ', 3).collect();
1952 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1953
1954 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1955 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1956
1957 // Unicode
1958 let split: Vec<&str> = data.splitn('ä', 3).collect();
1959 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1960
1961 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1962 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1963 }
1964
1965 #[test]
1966 fn test_rsplitn_char_iterator() {
1967 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1968
1969 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1970 split.reverse();
1971 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1972
1973 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1974 split.reverse();
1975 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1976
1977 // Unicode
1978 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1979 split.reverse();
1980 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1981
1982 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1983 split.reverse();
1984 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1985 }
1986
1987 #[test]
1988 fn test_split_char_iterator_no_trailing() {
1989 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1990
1991 let split: Vec<&str> = data.split('\n').collect();
1992 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1993
1994 let split: Vec<&str> = data.split_terminator('\n').collect();
1995 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1996 }
1997
1998 #[test]
1999 fn test_rev_split_char_iterator_no_trailing() {
2000 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2001
2002 let mut split: Vec<&str> = data.split('\n').rev().collect();
2003 split.reverse();
2004 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2005
2006 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
2007 split.reverse();
2008 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2009 }
2010
2011 #[test]
2012 fn test_words() {
2013 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2014 let words: Vec<&str> = data.words().collect();
2015 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2016 }
2017
2018 #[test]
2019 fn test_nfd_chars() {
2020 assert_eq!("abc".nfd_chars().collect::<~str>(), "abc".to_owned());
2021 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), "d\u0307\u01c4".to_owned());
2022 assert_eq!("\u2026".nfd_chars().collect::<~str>(), "\u2026".to_owned());
2023 assert_eq!("\u2126".nfd_chars().collect::<~str>(), "\u03a9".to_owned());
2024 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2025 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2026 assert_eq!("a\u0301".nfd_chars().collect::<~str>(), "a\u0301".to_owned());
2027 assert_eq!("\u0301a".nfd_chars().collect::<~str>(), "\u0301a".to_owned());
2028 assert_eq!("\ud4db".nfd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2029 assert_eq!("\uac1c".nfd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2030 }
2031
2032 #[test]
2033 fn test_nfkd_chars() {
2034 assert_eq!("abc".nfkd_chars().collect::<~str>(), "abc".to_owned());
2035 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), "d\u0307DZ\u030c".to_owned());
2036 assert_eq!("\u2026".nfkd_chars().collect::<~str>(), "...".to_owned());
2037 assert_eq!("\u2126".nfkd_chars().collect::<~str>(), "\u03a9".to_owned());
2038 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2039 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2040 assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), "a\u0301".to_owned());
2041 assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), "\u0301a".to_owned());
2042 assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2043 assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2044 }
2045
2046 #[test]
2047 fn test_lines() {
2048 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2049 let lines: Vec<&str> = data.lines().collect();
2050 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2051
2052 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2053 let lines: Vec<&str> = data.lines().collect();
2054 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2055 }
2056
2057 #[test]
2058 fn test_split_strator() {
2059 fn t(s: &str, sep: &str, u: &[&str]) {
2060 let v: Vec<&str> = s.split_str(sep).collect();
2061 assert_eq!(v.as_slice(), u.as_slice());
2062 }
2063 t("--1233345--", "12345", ["--1233345--"]);
2064 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2065 t("::hello::there", "::", ["", "hello", "there"]);
2066 t("hello::there::", "::", ["hello", "there", ""]);
2067 t("::hello::there::", "::", ["", "hello", "there", ""]);
2068 t("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam", "ä¸å", ["à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢", "Viá»t Nam"]);
2069 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2070 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2071 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2072 t("", ".", [""]);
2073 t("zz", "zz", ["",""]);
2074 t("ok", "z", ["ok"]);
2075 t("zzz", "zz", ["","z"]);
2076 t("zzzzz", "zz", ["","","z"]);
2077 }
2078
2079 #[test]
2080 fn test_str_default() {
2081 use default::Default;
2082 fn t<S: Default + Str>() {
2083 let s: S = Default::default();
2084 assert_eq!(s.as_slice(), "");
2085 }
2086
2087 t::<&str>();
2088 t::<~str>();
2089 }
2090
2091 #[test]
2092 fn test_str_container() {
2093 fn sum_len<S: Container>(v: &[S]) -> uint {
2094 v.iter().map(|x| x.len()).sum()
2095 }
2096
2097 let s = "01234".to_owned();
2098 assert_eq!(5, sum_len(["012", "", "34"]));
2099 assert_eq!(5, sum_len(["01".to_owned(), "2".to_owned(), "34".to_owned(), "".to_owned()]));
2100 assert_eq!(5, sum_len([s.as_slice()]));
2101 }
2102
2103 #[test]
2104 fn test_str_from_utf8() {
2105 let xs = bytes!("hello");
2106 assert_eq!(from_utf8(xs), Some("hello"));
2107
2108 let xs = bytes!("ศà¹à¸à¸¢ä¸åViá»t Nam");
2109 assert_eq!(from_utf8(xs), Some("ศà¹à¸à¸¢ä¸åViá»t Nam"));
2110
2111 let xs = bytes!("hello", 0xff);
2112 assert_eq!(from_utf8(xs), None);
2113 }
2114
2115 #[test]
2116 fn test_str_from_utf8_owned() {
2117 let xs = bytes!("hello").to_owned();
2118 assert_eq!(from_utf8_owned(xs), Some("hello".to_owned()));
2119
2120 let xs = bytes!("ศà¹à¸à¸¢ä¸åViá»t Nam").to_owned();
2121 assert_eq!(from_utf8_owned(xs), Some("ศà¹à¸à¸¢ä¸åViá»t Nam".to_owned()));
2122
2123 let xs = bytes!("hello", 0xff).to_owned();
2124 assert_eq!(from_utf8_owned(xs), None);
2125 }
2126
2127 #[test]
2128 fn test_str_from_utf8_lossy() {
2129 let xs = bytes!("hello");
2130 assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2131
2132 let xs = bytes!("ศà¹à¸à¸¢ä¸åViá»t Nam");
2133 assert_eq!(from_utf8_lossy(xs), Slice("ศà¹à¸à¸¢ä¸åViá»t Nam"));
2134
2135 let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
2136 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_owned()));
2137
2138 let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2139 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_owned()));
2140
2141 let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
2142 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_owned()));
2143
2144 let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
2145 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_owned()));
2146
2147 let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
2148 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_owned()));
2149
2150 let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
2151 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2152 foo\U00010000bar".to_owned()));
2153
2154 // surrogates
2155 let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
2156 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2157 \uFFFD\uFFFD\uFFFDbar".to_owned()));
2158 }
2159
2160 #[test]
2161 fn test_from_str() {
2162 let owned: Option<~str> = from_str("string");
2163 assert_eq!(owned, Some("string".to_owned()));
2164 }
2165
2166 #[test]
2167 fn test_maybe_owned_traits() {
2168 let s = Slice("abcde");
2169 assert_eq!(s.len(), 5);
2170 assert_eq!(s.as_slice(), "abcde");
2171 assert_eq!(s.to_str(), "abcde".to_owned());
2172 assert_eq!(format!("{}", s), "abcde".to_owned());
2173 assert!(s.lt(&Owned("bcdef".to_owned())));
2174 assert_eq!(Slice(""), Default::default());
2175
2176 let o = Owned("abcde".to_owned());
2177 assert_eq!(o.len(), 5);
2178 assert_eq!(o.as_slice(), "abcde");
2179 assert_eq!(o.to_str(), "abcde".to_owned());
2180 assert_eq!(format!("{}", o), "abcde".to_owned());
2181 assert!(o.lt(&Slice("bcdef")));
2182 assert_eq!(Owned("".to_owned()), Default::default());
2183
2184 assert!(s.cmp(&o) == Equal);
2185 assert!(s.equiv(&o));
2186
2187 assert!(o.cmp(&s) == Equal);
2188 assert!(o.equiv(&s));
2189 }
2190
2191 #[test]
2192 fn test_maybe_owned_methods() {
2193 let s = Slice("abcde");
2194 assert!(s.is_slice());
2195 assert!(!s.is_owned());
2196
2197 let o = Owned("abcde".to_owned());
2198 assert!(!o.is_slice());
2199 assert!(o.is_owned());
2200 }
2201
2202 #[test]
2203 fn test_maybe_owned_clone() {
2204 assert_eq!(Owned("abcde".to_owned()), Slice("abcde").clone());
2205 assert_eq!(Owned("abcde".to_owned()), Owned("abcde".to_owned()).clone());
2206 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2207 assert_eq!(Slice("abcde"), Owned("abcde".to_owned()).clone());
2208 }
2209
2210 #[test]
2211 fn test_maybe_owned_into_owned() {
2212 assert_eq!(Slice("abcde").into_owned(), "abcde".to_owned());
2213 assert_eq!(Owned("abcde".to_owned()).into_owned(), "abcde".to_owned());
2214 }
2215
2216 #[test]
2217 fn test_into_maybe_owned() {
2218 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2219 assert_eq!(("abcde".to_owned()).into_maybe_owned(), Slice("abcde"));
2220 assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_owned()));
2221 assert_eq!(("abcde".to_owned()).into_maybe_owned(), Owned("abcde".to_owned()));
2222 }
2223 }
2224
2225 #[cfg(test)]
2226 mod bench {
2227 extern crate test;
2228 use self::test::Bencher;
2229 use super::*;
2230 use prelude::*;
2231
2232 #[bench]
2233 fn char_iterator(b: &mut Bencher) {
2234 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2235 let len = s.char_len();
2236
2237 b.iter(|| assert_eq!(s.chars().len(), len));
2238 }
2239
2240 #[bench]
2241 fn char_iterator_ascii(b: &mut Bencher) {
2242 let s = "Mary had a little lamb, Little lamb
2243 Mary had a little lamb, Little lamb
2244 Mary had a little lamb, Little lamb
2245 Mary had a little lamb, Little lamb
2246 Mary had a little lamb, Little lamb
2247 Mary had a little lamb, Little lamb";
2248 let len = s.char_len();
2249
2250 b.iter(|| assert_eq!(s.chars().len(), len));
2251 }
2252
2253 #[bench]
2254 fn char_iterator_rev(b: &mut Bencher) {
2255 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2256 let len = s.char_len();
2257
2258 b.iter(|| assert_eq!(s.chars().rev().len(), len));
2259 }
2260
2261 #[bench]
2262 fn char_indicesator(b: &mut Bencher) {
2263 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2264 let len = s.char_len();
2265
2266 b.iter(|| assert_eq!(s.char_indices().len(), len));
2267 }
2268
2269 #[bench]
2270 fn char_indicesator_rev(b: &mut Bencher) {
2271 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2272 let len = s.char_len();
2273
2274 b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
2275 }
2276
2277 #[bench]
2278 fn split_unicode_ascii(b: &mut Bencher) {
2279 let s = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Namà¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
2280
2281 b.iter(|| assert_eq!(s.split('V').len(), 3));
2282 }
2283
2284 #[bench]
2285 fn split_unicode_not_ascii(b: &mut Bencher) {
2286 struct NotAscii(char);
2287 impl CharEq for NotAscii {
2288 fn matches(&mut self, c: char) -> bool {
2289 let NotAscii(cc) = *self;
2290 cc == c
2291 }
2292 fn only_ascii(&self) -> bool { false }
2293 }
2294 let s = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Namà¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
2295
2296 b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
2297 }
2298
2299
2300 #[bench]
2301 fn split_ascii(b: &mut Bencher) {
2302 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2303 let len = s.split(' ').len();
2304
2305 b.iter(|| assert_eq!(s.split(' ').len(), len));
2306 }
2307
2308 #[bench]
2309 fn split_not_ascii(b: &mut Bencher) {
2310 struct NotAscii(char);
2311 impl CharEq for NotAscii {
2312 #[inline]
2313 fn matches(&mut self, c: char) -> bool {
2314 let NotAscii(cc) = *self;
2315 cc == c
2316 }
2317 fn only_ascii(&self) -> bool { false }
2318 }
2319 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2320 let len = s.split(' ').len();
2321
2322 b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
2323 }
2324
2325 #[bench]
2326 fn split_extern_fn(b: &mut Bencher) {
2327 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2328 let len = s.split(' ').len();
2329 fn pred(c: char) -> bool { c == ' ' }
2330
2331 b.iter(|| assert_eq!(s.split(pred).len(), len));
2332 }
2333
2334 #[bench]
2335 fn split_closure(b: &mut Bencher) {
2336 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2337 let len = s.split(' ').len();
2338
2339 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
2340 }
2341
2342 #[bench]
2343 fn split_slice(b: &mut Bencher) {
2344 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2345 let len = s.split(' ').len();
2346
2347 b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
2348 }
2349
2350 #[bench]
2351 fn is_utf8_100_ascii(b: &mut Bencher) {
2352
2353 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2354 Lorem ipsum dolor sit amet, consectetur. ");
2355
2356 assert_eq!(100, s.len());
2357 b.iter(|| {
2358 is_utf8(s)
2359 });
2360 }
2361
2362 #[bench]
2363 fn is_utf8_100_multibyte(b: &mut Bencher) {
2364 let s = bytes!("ððððððà¸à¸£Ø¯ÙÙØ© اÙÙÙÙتà¸à¸¨à¹à¸à¸¢ä¸åð
ð¿ð»ðð¹ð»ð°");
2365 assert_eq!(100, s.len());
2366 b.iter(|| {
2367 is_utf8(s)
2368 });
2369 }
2370
2371 #[bench]
2372 fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2373 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2374 Lorem ipsum dolor sit amet, consectetur. ");
2375
2376 assert_eq!(100, s.len());
2377 b.iter(|| {
2378 let _ = from_utf8_lossy(s);
2379 });
2380 }
2381
2382 #[bench]
2383 fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2384 let s = bytes!("ððððððà¸à¸£Ø¯ÙÙØ© اÙÙÙÙتà¸à¸¨à¹à¸à¸¢ä¸åð
ð¿ð»ðð¹ð»ð°");
2385 assert_eq!(100, s.len());
2386 b.iter(|| {
2387 let _ = from_utf8_lossy(s);
2388 });
2389 }
2390
2391 #[bench]
2392 fn from_utf8_lossy_invalid(b: &mut Bencher) {
2393 let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2394 b.iter(|| {
2395 let _ = from_utf8_lossy(s);
2396 });
2397 }
2398
2399 #[bench]
2400 fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2401 let s = Vec::from_elem(100, 0xF5u8);
2402 b.iter(|| {
2403 let _ = from_utf8_lossy(s.as_slice());
2404 });
2405 }
2406
2407 #[bench]
2408 fn bench_connect(b: &mut Bencher) {
2409 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2410 let sep = "â";
2411 let v = [s, s, s, s, s, s, s, s, s, s];
2412 b.iter(|| {
2413 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2414 })
2415 }
2416 }
libstd/str.rs:415:4-415:4 -fn- definition:
fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
if i >= total {
0
references:- 6464: 3 => {
465: match (byte, safe_get(v, i, total)) {
466: (0xE0 , 0xA0 .. 0xBF) => (),
--
482: 4 => {
483: match (byte, safe_get(v, i, total)) {
484: (0xF0 , 0x90 .. 0xBF) => (),
--
497: i += 1;
498: if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
499: error!();
libstd/str.rs:230:19-230:19 -enum- definition:
enum NormalizationForm {
NFD,
NFKD
references:- 3231: enum NormalizationForm {
--
239: pub struct Normalizations<'a> {
240: kind: NormalizationForm,
241: iter: Chars<'a>,
libstd/str.rs:144:35-144:35 -trait- definition:
/// Methods for vectors of strings
pub trait StrVector {
/// Concatenate a vector of strings.
references:- 2153: impl<'a, S: Str> StrVector for &'a [S] {
154: fn concat(&self) -> ~str {
--
194: impl<'a, S: Str> StrVector for Vec<S> {
195: #[inline]
libstd/str.rs:709:4-709:4 -fn- definition:
pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
cast::transmute(v)
}
references:- 3713: /// Converts a byte to a string.
714: pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
libstd/ascii.rs:
417: str::raw::from_utf8_owned(bytes)
418: }
libstd/str.rs:
110: if is_utf8(vv) {
111: Some(unsafe { raw::from_utf8_owned(vv) })
112: } else {
libstd/str.rs:412:4-412:4 -fn- definition:
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
unsafe { *xs.unsafe_ref(i) }
}
references:- 2437: let i_ = i;
438: let byte = unsafe_get(v, i);
439: i += 1;
libstd/str.rs:525:27-525:27 -enum- definition:
/// needed but not always.
pub enum MaybeOwned<'a> {
/// A borrowed string
references:- 28568: #[inline]
569: fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
570: }
--
643: #[inline]
644: fn default() -> MaybeOwned<'a> { Slice("") }
645: }
--
657: impl<'a> fmt::Show for MaybeOwned<'a> {
658: #[inline]
libstd/path/mod.rs:
496: #[inline]
497: pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
498: from_utf8_lossy(if self.filename {
--
583: impl<'a> BytesContainer for str::MaybeOwned<'a> {
584: #[inline]
--
592: #[inline]
593: fn is_str(_: Option<str::MaybeOwned>) -> bool { true }
594: }
libstd/str.rs:
642: impl<'a> Default for MaybeOwned<'a> {
643: #[inline]
libstd/str.rs:746:50-746:50 -trait- definition:
/// Any string that can be represented as a slice
pub trait StrAllocating: Str {
/// Convert `self` into a ~str, not making a copy if possible.
references:- 5917: impl<'a> StrAllocating for ~str {
918: #[inline]
libstd/strbuf.rs:
273: impl StrAllocating for StrBuf {
274: #[inline]
libstd/path/windows.rs:
687: fn normalize_<S: StrAllocating>(s: S) -> (Option<PathPrefix>, StrBuf) {
688: // make borrowck happy
libstd/str.rs:
617: impl<'a> StrAllocating for MaybeOwned<'a> {
618: #[inline]
libstd/str.rs:210:51-210:51 -fn- definition:
// Helper functions used for Unicode normalization
fn canonical_sort(comb: &mut [(char, u8)]) {
use iter::range;
references:- 2275: if class == 0 && !*sorted {
276: canonical_sort(buffer.as_mut_slice());
277: *sorted = true;
--
285: if !self.sorted {
286: canonical_sort(self.buffer.as_mut_slice());
287: self.sorted = true;
libstd/str.rs:533:63-533:63 -NK_AS_STR_TODO- definition:
/// SendStr is a specialization of `MaybeOwned` to be sendable
pub type SendStr = MaybeOwned<'static>;
impl<'a> MaybeOwned<'a> {
references:- 2libstd/task.rs:
67: /// A name for the task-to-be, for identification in failure messages
68: pub name: Option<SendStr>,
69: /// The size of the stack for the spawned task
libstd/rt/task.rs:
52: pub destroyed: bool,
53: pub name: Option<SendStr>,
libstd/str.rs:696:4-696:4 -fn- definition:
pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
let mut curr = buf;
let mut i = 0;
references:- 2libstd/os.rs:
708: str::raw::from_c_str(p as *c_char)
709: }
libstd/unstable/dynamic_lib.rs:
189: } else {
190: Err(str::raw::from_c_str(last_error))
191: };
libstd/str.rs:403:8-403:8 -fn- definition:
/// ```
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
if is_utf8(v) {
references:- 5libstd/os.rs:
174: let k = str::from_utf8_lossy(k).into_owned();
175: let v = str::from_utf8_lossy(v).into_owned();
176: (k,v)
--
827: fn real_args() -> Vec<~str> {
828: real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
829: }
libstd/path/mod.rs:
497: pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
498: from_utf8_lossy(if self.filename {
499: match self.path.filename() {
libstd/os.rs:
173: env_as_bytes().move_iter().map(|(k,v)| {
174: let k = str::from_utf8_lossy(k).into_owned();
175: let v = str::from_utf8_lossy(v).into_owned();
libstd/str.rs:238:19-238:19 -struct- definition:
pub struct Normalizations<'a> {
kind: NormalizationForm,
iter: Chars<'a>,
references:- 9902: fn nfkd_chars<'a>(&'a self) -> Normalizations<'a> {
903: Normalizations {
904: iter: self.as_slice().chars(),
libstd/str.rs:556:41-556:41 -trait- definition:
/// Trait for moving into a `MaybeOwned`
pub trait IntoMaybeOwned<'a> {
/// Moves self into a `MaybeOwned`
references:- 4567: impl<'a> IntoMaybeOwned<'a> for &'a str {
568: #[inline]
--
572: impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
573: #[inline]
libstd/task.rs:
136: /// only in failure messages.
137: pub fn named<S: IntoMaybeOwned<'static>>(mut self, name: S) -> TaskBuilder {
138: self.opts.name = Some(name.into_maybe_owned());
libstd/str.rs:680:4-680:4 -fn- definition:
pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
let v = Slice { data: buf, len: len };
let bytes: &[u8] = ::cast::transmute(v);
references:- 2691: unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
692: from_buf_len(ptr, len)
693: }
--
702: }
703: from_buf_len(buf as *u8, i as uint)
704: }