(index<- )        ./libstd/str.rs

    git branch:    * master           5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
    modified:    Fri May  9 13:02:28 2014

    1  // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
    2  // file at the top-level directory of this distribution and at
    3  // http://rust-lang.org/COPYRIGHT.
    4  //
    5  // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
    6  // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
    7  // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
    8  // option. This file may not be copied, modified, or distributed
    9  // except according to those terms.
   10
   11  /*!
   12
   13  Unicode string manipulation (`str` type)
   14
   15  # Basic Usage
   16
   17  Rust's string type is one of the core primitive types of the language. While
   18  represented by the name `str`, the name `str` is not actually a valid type in
   19  Rust. Each string must also be decorated with its ownership. This means that
   20  there are two common kinds of strings in Rust:
   21
   22  * `~str` - This is an owned string. This type obeys all of the normal semantics
   23             of the `Box<T>` types, meaning that it has one, and only one,
   24             owner. This type cannot be implicitly copied, and is moved out of
   25             when passed to other functions.
   26
   27  * `&str` - This is the borrowed string type. This type of string can only be
   28             created from the other kind of string. As the name "borrowed"
   29             implies, this type of string is owned elsewhere, and this string
   30             cannot be moved out of.
   31
   32  As an example, here's a few different kinds of strings.
   33
   34  ```rust
   35  fn main() {
   36      let owned_string = "I am an owned string".to_owned();
   37      let borrowed_string1 = "This string is borrowed with the 'static lifetime";
   38      let borrowed_string2: &str = owned_string;   // owned strings can be borrowed
   39  }
   40  ```
   41
   42  From the example above, you can see that Rust has 2 different kinds of string
   43  literals. The owned literals correspond to the owned string types, but the
   44  "borrowed literal" is actually more akin to C's concept of a static string.
   45
   46  When a string is declared without a `~` sigil, then the string is allocated
   47  statically in the rodata of the executable/library. The string then has the
   48  type `&'static str` meaning that the string is valid for the `'static`
   49  lifetime, otherwise known as the lifetime of the entire program. As can be
   50  inferred from the type, these static strings are not mutable.
   51
   52  # Mutability
   53
   54  Many languages have immutable strings by default, and Rust has a particular
   55  flavor on this idea. As with the rest of Rust types, strings are immutable by
   56  default. If a string is declared as `mut`, however, it may be mutated. This
   57  works the same way as the rest of Rust's type system in the sense that if
   58  there's a mutable reference to a string, there may only be one mutable reference
   59  to that string. With these guarantees, strings can easily transition between
   60  being mutable/immutable with the same benefits of having mutable strings in
   61  other languages.
   62
   63  # Representation
   64
   65  Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
   66  stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
   67  encoded UTF-8 sequences. Additionally, strings are not null-terminated
   68  and can contain null codepoints.
   69
   70  The actual representation of strings have direct mappings to vectors:
   71
   72  * `~str` is the same as `~[u8]`
   73  * `&str` is the same as `&[u8]`
   74
   75  */
   76
   77  use cast;
   78  use cast::transmute;
   79  use char;
   80  use char::Char;
   81  use clone::Clone;
   82  use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
   83  use container::Container;
   84  use fmt;
   85  use io::Writer;
   86  use iter::{Iterator, range, AdditiveIterator};
   87  use option::{None, Option, Some};
   88  use from_str::FromStr;
   89  use slice::{ImmutableVector, MutableVector, CloneableVector};
   90  use slice::Vector;
   91  use vec::Vec;
   92  use default::Default;
   93  use strbuf::StrBuf;
   94
   95  pub use core::str::{from_utf8, CharEq, Chars, CharOffsets, RevChars};
   96  pub use core::str::{RevCharOffsets, Bytes, RevBytes, CharSplits, RevCharSplits};
   97  pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
   98  pub use core::str::{eq_slice, eq, is_utf8, is_utf16, UTF16Items};
   99  pub use core::str::{UTF16Item, ScalarValue, LoneSurrogate, utf16_items};
  100  pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
  101  pub use core::str::{Str, StrSlice};
  102
  103  /*
  104  Section: Creating a string
  105  */
  106
  107  /// Consumes a vector of bytes to create a new utf-8 string.
  108  /// Returns None if the vector contains invalid UTF-8.
  109  pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> {
  110      if is_utf8(vv) {
  111          Some(unsafe { raw::from_utf8_owned(vv) })
  112      } else {
  113          None
  114      }
  115  }
  116
  117  impl FromStr for ~str {
  118      #[inline]
  119      fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
  120  }
  121
  122  /// Convert a byte to a UTF-8 string
  123  ///
  124  /// # Failure
  125  ///
  126  /// Fails if invalid UTF-8
  127  pub fn from_byte(b: u8) -> ~str {
  128      assert!(b < 128u8);
  129      unsafe { ::cast::transmute(box [b]) }
  130  }
  131
  132  /// Convert a char to a string
  133  pub fn from_char(ch: char) -> ~str {
  134      let mut buf = StrBuf::new();
  135      buf.push_char(ch);
  136      buf.into_owned()
  137  }
  138
  139  /// Convert a vector of chars to a string
  140  pub fn from_chars(chs: &[char]) -> ~str {
  141      chs.iter().map(|c| *c).collect()
  142  }
  143
  144  /// Methods for vectors of strings
  145  pub trait StrVector {
  146      /// Concatenate a vector of strings.
  147      fn concat(&self) -> ~str;
  148
  149      /// Concatenate a vector of strings, placing a given separator between each.
  150      fn connect(&self, sep: &str) -> ~str;
  151  }
  152
  153  impl<'a, S: Str> StrVector for &'a [S] {
  154      fn concat(&self) -> ~str {
  155          if self.is_empty() { return "".to_owned(); }
  156
  157          // `len` calculation may overflow but push_str but will check boundaries
  158          let len = self.iter().map(|s| s.as_slice().len()).sum();
  159
  160          let mut result = StrBuf::with_capacity(len);
  161
  162          for s in self.iter() {
  163              result.push_str(s.as_slice())
  164          }
  165
  166          result.into_owned()
  167      }
  168
  169      fn connect(&self, sep: &str) -> ~str {
  170          if self.is_empty() { return "".to_owned(); }
  171
  172          // concat is faster
  173          if sep.is_empty() { return self.concat(); }
  174
  175          // this is wrong without the guarantee that `self` is non-empty
  176          // `len` calculation may overflow but push_str but will check boundaries
  177          let len = sep.len() * (self.len() - 1)
  178              + self.iter().map(|s| s.as_slice().len()).sum();
  179          let mut result = StrBuf::with_capacity(len);
  180          let mut first = true;
  181
  182          for s in self.iter() {
  183              if first {
  184                  first = false;
  185              } else {
  186                  result.push_str(sep);
  187              }
  188              result.push_str(s.as_slice());
  189          }
  190          result.into_owned()
  191      }
  192  }
  193
  194  impl<'a, S: Str> StrVector for Vec<S> {
  195      #[inline]
  196      fn concat(&self) -> ~str {
  197          self.as_slice().concat()
  198      }
  199
  200      #[inline]
  201      fn connect(&self, sep: &str) -> ~str {
  202          self.as_slice().connect(sep)
  203      }
  204  }
  205
  206  /*
  207  Section: Iterators
  208  */
  209
  210  // Helper functions used for Unicode normalization
  211  fn canonical_sort(comb: &mut [(char, u8)]) {
  212      use iter::range;
  213      use tuple::Tuple2;
  214
  215      let len = comb.len();
  216      for i in range(0, len) {
  217          let mut swapped = false;
  218          for j in range(1, len-i) {
  219              let class_a = *comb[j-1].ref1();
  220              let class_b = *comb[j].ref1();
  221              if class_a != 0 && class_b != 0 && class_a > class_b {
  222                  comb.swap(j-1, j);
  223                  swapped = true;
  224              }
  225          }
  226          if !swapped { break; }
  227      }
  228  }
  229
  230  #[deriving(Clone)]
  231  enum NormalizationForm {
  232      NFD,
  233      NFKD
  234  }
  235
  236  /// External iterator for a string's normalization's characters.
  237  /// Use with the `std::iter` module.
  238  #[deriving(Clone)]
  239  pub struct Normalizations<'a> {
  240      kind: NormalizationForm,
  241      iter: Chars<'a>,
  242      buffer: Vec<(char, u8)>,
  243      sorted: bool
  244  }
  245
  246  impl<'a> Iterator<char> for Normalizations<'a> {
  247      #[inline]
  248      fn next(&mut self) -> Option<char> {
  249          use unicode::decompose::canonical_combining_class;
  250
  251          match self.buffer.as_slice().head() {
  252              Some(&(c, 0)) => {
  253                  self.sorted = false;
  254                  self.buffer.shift();
  255                  return Some(c);
  256              }
  257              Some(&(c, _)) if self.sorted => {
  258                  self.buffer.shift();
  259                  return Some(c);
  260              }
  261              _ => self.sorted = false
  262          }
  263
  264          let decomposer = match self.kind {
  265              NFD => char::decompose_canonical,
  266              NFKD => char::decompose_compatible
  267          };
  268
  269          if !self.sorted {
  270              for ch in self.iter {
  271                  let buffer = &mut self.buffer;
  272                  let sorted = &mut self.sorted;
  273                  decomposer(ch, |d| {
  274                      let class = canonical_combining_class(d);
  275                      if class == 0 && !*sorted {
  276                          canonical_sort(buffer.as_mut_slice());
  277                          *sorted = true;
  278                      }
  279                      buffer.push((d, class));
  280                  });
  281                  if *sorted { break }
  282              }
  283          }
  284
  285          if !self.sorted {
  286              canonical_sort(self.buffer.as_mut_slice());
  287              self.sorted = true;
  288          }
  289
  290          match self.buffer.shift() {
  291              Some((c, 0)) => {
  292                  self.sorted = false;
  293                  Some(c)
  294              }
  295              Some((c, _)) => Some(c),
  296              None => None
  297          }
  298      }
  299
  300      fn size_hint(&self) -> (uint, Option<uint>) {
  301          let (lower, _) = self.iter.size_hint();
  302          (lower, None)
  303      }
  304  }
  305
  306  /// Replace all occurrences of one string with another
  307  ///
  308  /// # Arguments
  309  ///
  310  /// * s - The string containing substrings to replace
  311  /// * from - The string to replace
  312  /// * to - The replacement string
  313  ///
  314  /// # Return value
  315  ///
  316  /// The original string with all occurrences of `from` replaced with `to`
  317  pub fn replace(s: &str, from: &str, to: &str) -> ~str {
  318      let mut result = StrBuf::new();
  319      let mut last_end = 0;
  320      for (start, end) in s.match_indices(from) {
  321          result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
  322          result.push_str(to);
  323          last_end = end;
  324      }
  325      result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
  326      result.into_owned()
  327  }
  328
  329  /*
  330  Section: Misc
  331  */
  332
  333  /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
  334  /// if `v` contains any invalid data.
  335  ///
  336  /// # Example
  337  ///
  338  /// ```rust
  339  /// use std::str;
  340  ///
  341  /// // ðmusic
  342  /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
  343  ///              0x0073, 0x0069, 0x0063];
  344  /// assert_eq!(str::from_utf16(v), Some("ðmusic".to_owned()));
  345  ///
  346  /// // ðmu<invalid>ic
  347  /// v[4] = 0xD800;
  348  /// assert_eq!(str::from_utf16(v), None);
  349  /// ```
  350  pub fn from_utf16(v: &[u16]) -> Option<~str> {
  351      let mut s = StrBuf::with_capacity(v.len() / 2);
  352      for c in utf16_items(v) {
  353          match c {
  354              ScalarValue(c) => s.push_char(c),
  355              LoneSurrogate(_) => return None
  356          }
  357      }
  358      Some(s.into_owned())
  359  }
  360
  361  /// Decode a UTF-16 encoded vector `v` into a string, replacing
  362  /// invalid data with the replacement character (U+FFFD).
  363  ///
  364  /// # Example
  365  /// ```rust
  366  /// use std::str;
  367  ///
  368  /// // ðmus<invalid>ic<invalid>
  369  /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
  370  ///          0x0073, 0xDD1E, 0x0069, 0x0063,
  371  ///          0xD834];
  372  ///
  373  /// assert_eq!(str::from_utf16_lossy(v),
  374  ///            "ðmus\uFFFDic\uFFFD".to_owned());
  375  /// ```
  376  pub fn from_utf16_lossy(v: &[u16]) -> ~str {
  377      utf16_items(v).map(|c| c.to_char_lossy()).collect()
  378  }
  379
  380  // Return the initial codepoint accumulator for the first byte.
  381  // The first byte is special, only want bottom 5 bits for width 2, 4 bits
  382  // for width 3, and 3 bits for width 4
  383  macro_rules! utf8_first_byte(
  384      ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
  385  )
  386
  387  // return the value of $ch updated with continuation byte $byte
  388  macro_rules! utf8_acc_cont_byte(
  389      ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
  390  )
  391
  392  static TAG_CONT_U8: u8 = 128u8;
  393
  394  /// Converts a vector of bytes to a new utf-8 string.
  395  /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
  396  ///
  397  /// # Example
  398  ///
  399  /// ```rust
  400  /// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
  401  /// let output = std::str::from_utf8_lossy(input);
  402  /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
  403  /// ```
  404  pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
  405      if is_utf8(v) {
  406          return Slice(unsafe { cast::transmute(v) })
  407      }
  408
  409      static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
  410      let mut i = 0;
  411      let total = v.len();
  412      fn unsafe_get(xs: &[u8], i: uint) -> u8 {
  413          unsafe { *xs.unsafe_ref(i) }
  414      }
  415      fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
  416          if i >= total {
  417              0
  418          } else {
  419              unsafe_get(xs, i)
  420          }
  421      }
  422
  423      let mut res = StrBuf::with_capacity(total);
  424
  425      if i > 0 {
  426          unsafe {
  427              res.push_bytes(v.slice_to(i))
  428          };
  429      }
  430
  431      // subseqidx is the index of the first byte of the subsequence we're looking at.
  432      // It's used to copy a bunch of contiguous good codepoints at once instead of copying
  433      // them one by one.
  434      let mut subseqidx = 0;
  435
  436      while i < total {
  437          let i_ = i;
  438          let byte = unsafe_get(v, i);
  439          i += 1;
  440
  441          macro_rules! error(() => ({
  442              unsafe {
  443                  if subseqidx != i_ {
  444                      res.push_bytes(v.slice(subseqidx, i_));
  445                  }
  446                  subseqidx = i;
  447                  res.push_bytes(REPLACEMENT);
  448              }
  449          }))
  450
  451          if byte < 128u8 {
  452              // subseqidx handles this
  453          } else {
  454              let w = utf8_char_width(byte);
  455
  456              match w {
  457                  2 => {
  458                      if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
  459                          error!();
  460                          continue;
  461                      }
  462                      i += 1;
  463                  }
  464                  3 => {
  465                      match (byte, safe_get(v, i, total)) {
  466                          (0xE0        , 0xA0 .. 0xBF) => (),
  467                          (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
  468                          (0xED        , 0x80 .. 0x9F) => (),
  469                          (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
  470                          _ => {
  471                              error!();
  472                              continue;
  473                          }
  474                      }
  475                      i += 1;
  476                      if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
  477                          error!();
  478                          continue;
  479                      }
  480                      i += 1;
  481                  }
  482                  4 => {
  483                      match (byte, safe_get(v, i, total)) {
  484                          (0xF0        , 0x90 .. 0xBF) => (),
  485                          (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
  486                          (0xF4        , 0x80 .. 0x8F) => (),
  487                          _ => {
  488                              error!();
  489                              continue;
  490                          }
  491                      }
  492                      i += 1;
  493                      if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
  494                          error!();
  495                          continue;
  496                      }
  497                      i += 1;
  498                      if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
  499                          error!();
  500                          continue;
  501                      }
  502                      i += 1;
  503                  }
  504                  _ => {
  505                      error!();
  506                      continue;
  507                  }
  508              }
  509          }
  510      }
  511      if subseqidx < total {
  512          unsafe {
  513              res.push_bytes(v.slice(subseqidx, total))
  514          };
  515      }
  516      Owned(res.into_owned())
  517  }
  518
  519  /*
  520  Section: MaybeOwned
  521  */
  522
  523  /// A MaybeOwned is a string that can hold either a ~str or a &str.
  524  /// This can be useful as an optimization when an allocation is sometimes
  525  /// needed but not always.
  526  pub enum MaybeOwned<'a> {
  527      /// A borrowed string
  528      Slice(&'a str),
  529      /// An owned string
  530      Owned(~str)
  531  }
  532
  533  /// SendStr is a specialization of `MaybeOwned` to be sendable
  534  pub type SendStr = MaybeOwned<'static>;
  535
  536  impl<'a> MaybeOwned<'a> {
  537      /// Returns `true` if this `MaybeOwned` wraps an owned string
  538      #[inline]
  539      pub fn is_owned(&self) -> bool {
  540          match *self {
  541              Slice(_) => false,
  542              Owned(_) => true
  543          }
  544      }
  545
  546      /// Returns `true` if this `MaybeOwned` wraps a borrowed string
  547      #[inline]
  548      pub fn is_slice(&self) -> bool {
  549          match *self {
  550              Slice(_) => true,
  551              Owned(_) => false
  552          }
  553      }
  554  }
  555
  556  /// Trait for moving into a `MaybeOwned`
  557  pub trait IntoMaybeOwned<'a> {
  558      /// Moves self into a `MaybeOwned`
  559      fn into_maybe_owned(self) -> MaybeOwned<'a>;
  560  }
  561
  562  impl<'a> IntoMaybeOwned<'a> for ~str {
  563      #[inline]
  564      fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self) }
  565  }
  566
  567  impl<'a> IntoMaybeOwned<'a> for &'a str {
  568      #[inline]
  569      fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
  570  }
  571
  572  impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
  573      #[inline]
  574      fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
  575  }
  576
  577  impl<'a> Eq for MaybeOwned<'a> {
  578      #[inline]
  579      fn eq(&self, other: &MaybeOwned) -> bool {
  580          self.as_slice() == other.as_slice()
  581      }
  582  }
  583
  584  impl<'a> TotalEq for MaybeOwned<'a> {}
  585
  586  impl<'a> Ord for MaybeOwned<'a> {
  587      #[inline]
  588      fn lt(&self, other: &MaybeOwned) -> bool {
  589          self.as_slice().lt(&other.as_slice())
  590      }
  591  }
  592
  593  impl<'a> TotalOrd for MaybeOwned<'a> {
  594      #[inline]
  595      fn cmp(&self, other: &MaybeOwned) -> Ordering {
  596          self.as_slice().cmp(&other.as_slice())
  597      }
  598  }
  599
  600  impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
  601      #[inline]
  602      fn equiv(&self, other: &S) -> bool {
  603          self.as_slice() == other.as_slice()
  604      }
  605  }
  606
  607  impl<'a> Str for MaybeOwned<'a> {
  608      #[inline]
  609      fn as_slice<'b>(&'b self) -> &'b str {
  610          match *self {
  611              Slice(s) => s,
  612              Owned(ref s) => s.as_slice()
  613          }
  614      }
  615  }
  616
  617  impl<'a> StrAllocating for MaybeOwned<'a> {
  618      #[inline]
  619      fn into_owned(self) -> ~str {
  620          match self {
  621              Slice(s) => s.to_owned(),
  622              Owned(s) => s
  623          }
  624      }
  625  }
  626
  627  impl<'a> Container for MaybeOwned<'a> {
  628      #[inline]
  629      fn len(&self) -> uint { self.as_slice().len() }
  630  }
  631
  632  impl<'a> Clone for MaybeOwned<'a> {
  633      #[inline]
  634      fn clone(&self) -> MaybeOwned<'a> {
  635          match *self {
  636              Slice(s) => Slice(s),
  637              Owned(ref s) => Owned(s.to_owned())
  638          }
  639      }
  640  }
  641
  642  impl<'a> Default for MaybeOwned<'a> {
  643      #[inline]
  644      fn default() -> MaybeOwned<'a> { Slice("") }
  645  }
  646
  647  impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
  648      #[inline]
  649      fn hash(&self, hasher: &mut H) {
  650          match *self {
  651              Slice(s) => s.hash(hasher),
  652              Owned(ref s) => s.hash(hasher),
  653          }
  654      }
  655  }
  656
  657  impl<'a> fmt::Show for MaybeOwned<'a> {
  658      #[inline]
  659      fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  660          match *self {
  661              Slice(ref s) => s.fmt(f),
  662              Owned(ref s) => s.fmt(f)
  663          }
  664      }
  665  }
  666
  667  /// Unsafe operations
  668  pub mod raw {
  669      use cast;
  670      use libc;
  671      use ptr::RawPtr;
  672      use raw::Slice;
  673      use slice::CloneableVector;
  674      use str::{is_utf8, StrAllocating};
  675
  676      pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
  677      pub use core::str::raw::{slice_unchecked};
  678
  679      /// Create a Rust string from a *u8 buffer of the given length
  680      pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
  681          let v = Slice { data: buf, len: len };
  682          let bytes: &[u8] = ::cast::transmute(v);
  683          assert!(is_utf8(bytes));
  684          let s: &str = ::cast::transmute(bytes);
  685          s.to_owned()
  686      }
  687
  688      #[lang="strdup_uniq"]
  689      #[cfg(not(test))]
  690      #[inline]
  691      unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
  692          from_buf_len(ptr, len)
  693      }
  694
  695      /// Create a Rust string from a null-terminated C string
  696      pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
  697          let mut curr = buf;
  698          let mut i = 0;
  699          while *curr != 0 {
  700              i += 1;
  701              curr = buf.offset(i);
  702          }
  703          from_buf_len(buf as *u8, i as uint)
  704      }
  705
  706      /// Converts an owned vector of bytes to a new owned string. This assumes
  707      /// that the utf-8-ness of the vector has already been validated
  708      #[inline]
  709      pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
  710          cast::transmute(v)
  711      }
  712
  713      /// Converts a byte to a string.
  714      pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
  715
  716      /// Access the str in its vector representation.
  717      /// The caller must preserve the valid UTF-8 property when modifying.
  718      #[inline]
  719      pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
  720          cast::transmute(s)
  721      }
  722
  723      /// Sets the length of a string
  724      ///
  725      /// This will explicitly set the size of the string, without actually
  726      /// modifying its buffers, so it is up to the caller to ensure that
  727      /// the string is actually the specified size.
  728      #[test]
  729      fn test_from_buf_len() {
  730          use slice::ImmutableVector;
  731          use str::StrAllocating;
  732
  733          unsafe {
  734              let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
  735              let b = a.as_ptr();
  736              let c = from_buf_len(b, 3u);
  737              assert_eq!(c, "AAA".to_owned());
  738          }
  739      }
  740  }
  741
  742  /*
  743  Section: Trait implementations
  744  */
  745
  746  /// Any string that can be represented as a slice
  747  pub trait StrAllocating: Str {
  748      /// Convert `self` into a ~str, not making a copy if possible.
  749      fn into_owned(self) -> ~str;
  750
  751      /// Convert `self` into a `StrBuf`.
  752      #[inline]
  753      fn to_strbuf(&self) -> StrBuf {
  754          StrBuf::from_str(self.as_slice())
  755      }
  756
  757      /// Convert `self` into a `StrBuf`, not making a copy if possible.
  758      #[inline]
  759      fn into_strbuf(self) -> StrBuf {
  760          StrBuf::from_owned_str(self.into_owned())
  761      }
  762
  763      /// Escape each char in `s` with `char::escape_default`.
  764      fn escape_default(&self) -> ~str {
  765          let me = self.as_slice();
  766          let mut out = StrBuf::with_capacity(me.len());
  767          for c in me.chars() {
  768              c.escape_default(|c| out.push_char(c));
  769          }
  770          out.into_owned()
  771      }
  772
  773      /// Escape each char in `s` with `char::escape_unicode`.
  774      fn escape_unicode(&self) -> ~str {
  775          let me = self.as_slice();
  776          let mut out = StrBuf::with_capacity(me.len());
  777          for c in me.chars() {
  778              c.escape_unicode(|c| out.push_char(c));
  779          }
  780          out.into_owned()
  781      }
  782
  783      /// Replace all occurrences of one string with another.
  784      ///
  785      /// # Arguments
  786      ///
  787      /// * `from` - The string to replace
  788      /// * `to` - The replacement string
  789      ///
  790      /// # Return value
  791      ///
  792      /// The original string with all occurrences of `from` replaced with `to`.
  793      ///
  794      /// # Example
  795      ///
  796      /// ```rust
  797      /// let s = "Do you know the muffin man,
  798      /// The muffin man, the muffin man, ...".to_owned();
  799      ///
  800      /// assert_eq!(s.replace("muffin man", "little lamb"),
  801      ///            "Do you know the little lamb,
  802      /// The little lamb, the little lamb, ...".to_owned());
  803      ///
  804      /// // not found, so no change.
  805      /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
  806      /// ```
  807      fn replace(&self, from: &str, to: &str) -> ~str {
  808          let me = self.as_slice();
  809          let mut result = StrBuf::new();
  810          let mut last_end = 0;
  811          for (start, end) in me.match_indices(from) {
  812              result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
  813              result.push_str(to);
  814              last_end = end;
  815          }
  816          result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
  817          result.into_owned()
  818      }
  819
  820      /// Copy a slice into a new owned str.
  821      #[inline]
  822      fn to_owned(&self) -> ~str {
  823          use slice::Vector;
  824
  825          unsafe {
  826              ::cast::transmute(self.as_slice().as_bytes().to_owned())
  827          }
  828      }
  829
  830      /// Converts to a vector of `u16` encoded as UTF-16.
  831      fn to_utf16(&self) -> Vec<u16> {
  832          let me = self.as_slice();
  833          let mut u = Vec::new();
  834          for ch in me.chars() {
  835              let mut buf = [0u16, ..2];
  836              let n = ch.encode_utf16(buf /* as mut slice! */);
  837              u.push_all(buf.slice_to(n));
  838          }
  839          u
  840      }
  841
  842      /// Given a string, make a new string with repeated copies of it.
  843      fn repeat(&self, nn: uint) -> ~str {
  844          let me = self.as_slice();
  845          let mut ret = StrBuf::with_capacity(nn * me.len());
  846          for _ in range(0, nn) {
  847              ret.push_str(me);
  848          }
  849          ret.into_owned()
  850      }
  851
  852      /// Levenshtein Distance between two strings.
  853      fn lev_distance(&self, t: &str) -> uint {
  854          let me = self.as_slice();
  855          let slen = me.len();
  856          let tlen = t.len();
  857
  858          if slen == 0 { return tlen; }
  859          if tlen == 0 { return slen; }
  860
  861          let mut dcol = Vec::from_fn(tlen + 1, |x| x);
  862
  863          for (i, sc) in me.chars().enumerate() {
  864
  865              let mut current = i;
  866              *dcol.get_mut(0) = current + 1;
  867
  868              for (j, tc) in t.chars().enumerate() {
  869
  870                  let next = *dcol.get(j + 1);
  871
  872                  if sc == tc {
  873                      *dcol.get_mut(j + 1) = current;
  874                  } else {
  875                      *dcol.get_mut(j + 1) = ::cmp::min(current, next);
  876                      *dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
  877                                                        *dcol.get(j)) + 1;
  878                  }
  879
  880                  current = next;
  881              }
  882          }
  883
  884          return *dcol.get(tlen);
  885      }
  886
  887      /// An Iterator over the string in Unicode Normalization Form D
  888      /// (canonical decomposition).
  889      #[inline]
  890      fn nfd_chars<'a>(&'a self) -> Normalizations<'a> {
  891          Normalizations {
  892              iter: self.as_slice().chars(),
  893              buffer: Vec::new(),
  894              sorted: false,
  895              kind: NFD
  896          }
  897      }
  898
  899      /// An Iterator over the string in Unicode Normalization Form KD
  900      /// (compatibility decomposition).
  901      #[inline]
  902      fn nfkd_chars<'a>(&'a self) -> Normalizations<'a> {
  903          Normalizations {
  904              iter: self.as_slice().chars(),
  905              buffer: Vec::new(),
  906              sorted: false,
  907              kind: NFKD
  908          }
  909      }
  910  }
  911
  912  impl<'a> StrAllocating for &'a str {
  913      #[inline]
  914      fn into_owned(self) -> ~str { self.to_owned() }
  915  }
  916
  917  impl<'a> StrAllocating for ~str {
  918      #[inline]
  919      fn into_owned(self) -> ~str { self }
  920  }
  921
  922  /// Methods for owned strings
  923  pub trait OwnedStr {
  924      /// Consumes the string, returning the underlying byte buffer.
  925      ///
  926      /// The buffer does not have a null terminator.
  927      fn into_bytes(self) -> ~[u8];
  928
  929      /// Pushes the given string onto this string, returning the concatenation of the two strings.
  930      fn append(self, rhs: &str) -> ~str;
  931  }
  932
  933  impl OwnedStr for ~str {
  934      #[inline]
  935      fn into_bytes(self) -> ~[u8] {
  936          unsafe { cast::transmute(self) }
  937      }
  938
  939      #[inline]
  940      fn append(self, rhs: &str) -> ~str {
  941          let mut new_str = StrBuf::from_owned_str(self);
  942          new_str.push_str(rhs);
  943          new_str.into_owned()
  944      }
  945  }
  946
  947  #[cfg(test)]
  948  mod tests {
  949      use iter::AdditiveIterator;
  950      use default::Default;
  951      use prelude::*;
  952      use str::*;
  953      use strbuf::StrBuf;
  954
  955      #[test]
  956      fn test_eq() {
  957          assert!((eq(&"".to_owned(), &"".to_owned())));
  958          assert!((eq(&"foo".to_owned(), &"foo".to_owned())));
  959          assert!((!eq(&"foo".to_owned(), &"bar".to_owned())));
  960      }
  961
  962      #[test]
  963      fn test_eq_slice() {
  964          assert!((eq_slice("foobar".slice(0, 3), "foo")));
  965          assert!((eq_slice("barfoo".slice(3, 6), "foo")));
  966          assert!((!eq_slice("foo1", "foo2")));
  967      }
  968
  969      #[test]
  970      fn test_le() {
  971          assert!("" <= "");
  972          assert!("" <= "foo");
  973          assert!("foo" <= "foo");
  974          assert!("foo" != "bar");
  975      }
  976
  977      #[test]
  978      fn test_len() {
  979          assert_eq!("".len(), 0u);
  980          assert_eq!("hello world".len(), 11u);
  981          assert_eq!("\x63".len(), 1u);
  982          assert_eq!("\xa2".len(), 2u);
  983          assert_eq!("\u03c0".len(), 2u);
  984          assert_eq!("\u2620".len(), 3u);
  985          assert_eq!("\U0001d11e".len(), 4u);
  986
  987          assert_eq!("".char_len(), 0u);
  988          assert_eq!("hello world".char_len(), 11u);
  989          assert_eq!("\x63".char_len(), 1u);
  990          assert_eq!("\xa2".char_len(), 1u);
  991          assert_eq!("\u03c0".char_len(), 1u);
  992          assert_eq!("\u2620".char_len(), 1u);
  993          assert_eq!("\U0001d11e".char_len(), 1u);
  994          assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".char_len(), 19u);
  995      }
  996
  997      #[test]
  998      fn test_find() {
  999          assert_eq!("hello".find('l'), Some(2u));
1000          assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
1001          assert!("hello".find('x').is_none());
1002          assert!("hello".find(|c:char| c == 'x').is_none());
1003          assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".find('å'), Some(30u));
1004          assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".find(|c: char| c == 'å'), Some(30u));
1005      }
1006
1007      #[test]
1008      fn test_rfind() {
1009          assert_eq!("hello".rfind('l'), Some(3u));
1010          assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
1011          assert!("hello".rfind('x').is_none());
1012          assert!("hello".rfind(|c:char| c == 'x').is_none());
1013          assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".rfind('å'), Some(30u));
1014          assert_eq!("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".rfind(|c: char| c == 'å'), Some(30u));
1015      }
1016
1017      #[test]
1018      fn test_collect() {
1019          let empty = "".to_owned();
1020          let s: ~str = empty.chars().collect();
1021          assert_eq!(empty, s);
1022          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸".to_owned();
1023          let s: ~str = data.chars().collect();
1024          assert_eq!(data, s);
1025      }
1026
1027      #[test]
1028      fn test_into_bytes() {
1029          let data = "asdf".to_owned();
1030          let buf = data.into_bytes();
1031          assert_eq!(bytes!("asdf"), buf.as_slice());
1032      }
1033
1034      #[test]
1035      fn test_find_str() {
1036          // byte positions
1037          assert_eq!("".find_str(""), Some(0u));
1038          assert!("banana".find_str("apple pie").is_none());
1039
1040          let data = "abcabc";
1041          assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1042          assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1043          assert!(data.slice(2u, 4u).find_str("ab").is_none());
1044
1045          let mut data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1046          data = data + data;
1047          assert!(data.find_str("à¹à¸å").is_none());
1048          assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1049          assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1050
1051          assert_eq!(data.slice(0u, 43u).find_str("à¸à¸£à¸°"), Some( 0u));
1052          assert_eq!(data.slice(0u, 43u).find_str("à¸à¸¨à¹"), Some(12u));
1053          assert_eq!(data.slice(0u, 43u).find_str("à¸¢ä¸"), Some(24u));
1054          assert_eq!(data.slice(0u, 43u).find_str("iá»t"), Some(34u));
1055          assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1056
1057          assert_eq!(data.slice(43u, 86u).find_str("à¸à¸£à¸°"), Some(43u - 43u));
1058          assert_eq!(data.slice(43u, 86u).find_str("à¸à¸¨à¹"), Some(55u - 43u));
1059          assert_eq!(data.slice(43u, 86u).find_str("à¸¢ä¸"), Some(67u - 43u));
1060          assert_eq!(data.slice(43u, 86u).find_str("iá»t"), Some(77u - 43u));
1061          assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1062      }
1063
1064      #[test]
1065      fn test_slice_chars() {
1066          fn t(a: &str, b: &str, start: uint) {
1067              assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1068          }
1069          t("", "", 0);
1070          t("hello", "llo", 2);
1071          t("hello", "el", 1);
1072          t("Î±Î²Î»", "Î²", 1);
1073          t("Î±Î²Î»", "", 3);
1074          assert_eq!("à¸°à¹à¸à¸¨à¹à¸", "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".slice_chars(2, 8));
1075      }
1076
1077      #[test]
1078      fn test_concat() {
1079          fn t(v: &[~str], s: &str) {
1080              assert_eq!(v.concat(), s.to_str());
1081          }
1082          t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1083            "no".to_owned(), "good".to_owned()], "youknowI'mnogood");
1084          let v: &[~str] = [];
1085          t(v, "");
1086          t(["hi".to_owned()], "hi");
1087      }
1088
1089      #[test]
1090      fn test_connect() {
1091          fn t(v: &[~str], sep: &str, s: &str) {
1092              assert_eq!(v.connect(sep), s.to_str());
1093          }
1094          t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1095             "no".to_owned(), "good".to_owned()],
1096            " ", "you know I'm no good");
1097          let v: &[~str] = [];
1098          t(v, " ", "");
1099          t(["hi".to_owned()], " ", "hi");
1100      }
1101
1102      #[test]
1103      fn test_concat_slices() {
1104          fn t(v: &[&str], s: &str) {
1105              assert_eq!(v.concat(), s.to_str());
1106          }
1107          t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1108          let v: &[&str] = [];
1109          t(v, "");
1110          t(["hi"], "hi");
1111      }
1112
1113      #[test]
1114      fn test_connect_slices() {
1115          fn t(v: &[&str], sep: &str, s: &str) {
1116              assert_eq!(v.connect(sep), s.to_str());
1117          }
1118          t(["you", "know", "I'm", "no", "good"],
1119            " ", "you know I'm no good");
1120          t([], " ", "");
1121          t(["hi"], " ", "hi");
1122      }
1123
1124      #[test]
1125      fn test_repeat() {
1126          assert_eq!("x".repeat(4), "xxxx".to_owned());
1127          assert_eq!("hi".repeat(4), "hihihihi".to_owned());
1128          assert_eq!("à¹à¸å".repeat(3), "à¹à¸åà¹à¸åà¹à¸å".to_owned());
1129          assert_eq!("".repeat(4), "".to_owned());
1130          assert_eq!("hi".repeat(0), "".to_owned());
1131      }
1132
1133      #[test]
1134      fn test_unsafe_slice() {
1135          assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1136          assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1137          assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1138          fn a_million_letter_a() -> ~str {
1139              let mut i = 0;
1140              let mut rs = StrBuf::new();
1141              while i < 100000 {
1142                  rs.push_str("aaaaaaaaaa");
1143                  i += 1;
1144              }
1145              rs.into_owned()
1146          }
1147          fn half_a_million_letter_a() -> ~str {
1148              let mut i = 0;
1149              let mut rs = StrBuf::new();
1150              while i < 100000 {
1151                  rs.push_str("aaaaa");
1152                  i += 1;
1153              }
1154              rs.into_owned()
1155          }
1156          let letters = a_million_letter_a();
1157          assert!(half_a_million_letter_a() ==
1158              unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
1159      }
1160
1161      #[test]
1162      fn test_starts_with() {
1163          assert!(("".starts_with("")));
1164          assert!(("abc".starts_with("")));
1165          assert!(("abc".starts_with("a")));
1166          assert!((!"a".starts_with("abc")));
1167          assert!((!"".starts_with("abc")));
1168          assert!((!"Ã¶dd".starts_with("-")));
1169          assert!(("Ã¶dd".starts_with("Ã¶d")));
1170      }
1171
1172      #[test]
1173      fn test_ends_with() {
1174          assert!(("".ends_with("")));
1175          assert!(("abc".ends_with("")));
1176          assert!(("abc".ends_with("c")));
1177          assert!((!"a".ends_with("abc")));
1178          assert!((!"".ends_with("abc")));
1179          assert!((!"ddÃ¶".ends_with("-")));
1180          assert!(("ddÃ¶".ends_with("dÃ¶")));
1181      }
1182
1183      #[test]
1184      fn test_is_empty() {
1185          assert!("".is_empty());
1186          assert!(!"a".is_empty());
1187      }
1188
1189      #[test]
1190      fn test_replace() {
1191          let a = "a";
1192          assert_eq!("".replace(a, "b"), "".to_owned());
1193          assert_eq!("a".replace(a, "b"), "b".to_owned());
1194          assert_eq!("ab".replace(a, "b"), "bb".to_owned());
1195          let test = "test";
1196          assert!(" test test ".replace(test, "toast") ==
1197              " toast toast ".to_owned());
1198          assert_eq!(" test test ".replace(test, ""), "   ".to_owned());
1199      }
1200
1201      #[test]
1202      fn test_replace_2a() {
1203          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1204          let repl = "Ø¯ÙÙØ© Ø§ÙÙÙÙØª".to_owned();
1205
1206          let a = "à¸à¸£à¸°à¹".to_owned();
1207          let a2 = "Ø¯ÙÙØ© Ø§ÙÙÙÙØªà¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1208          assert_eq!(data.replace(a, repl), a2);
1209      }
1210
1211      #[test]
1212      fn test_replace_2b() {
1213          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1214          let repl = "Ø¯ÙÙØ© Ø§ÙÙÙÙØª".to_owned();
1215
1216          let b = "à¸°à¹".to_owned();
1217          let b2 = "à¸à¸£Ø¯ÙÙØ© Ø§ÙÙÙÙØªà¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1218          assert_eq!(data.replace(b, repl), b2);
1219      }
1220
1221      #[test]
1222      fn test_replace_2c() {
1223          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1224          let repl = "Ø¯ÙÙØ© Ø§ÙÙÙÙØª".to_owned();
1225
1226          let c = "ä¸å".to_owned();
1227          let c2 = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢Ø¯ÙÙØ© Ø§ÙÙÙÙØª".to_owned();
1228          assert_eq!(data.replace(c, repl), c2);
1229      }
1230
1231      #[test]
1232      fn test_replace_2d() {
1233          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å".to_owned();
1234          let repl = "Ø¯ÙÙØ© Ø§ÙÙÙÙØª".to_owned();
1235
1236          let d = "à¹à¸å".to_owned();
1237          assert_eq!(data.replace(d, repl), data);
1238      }
1239
1240      #[test]
1241      fn test_slice() {
1242          assert_eq!("ab", "abc".slice(0, 2));
1243          assert_eq!("bc", "abc".slice(1, 3));
1244          assert_eq!("", "abc".slice(1, 1));
1245          assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1246
1247          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸å";
1248          assert_eq!("à¸", data.slice(0, 3));
1249          assert_eq!("à¸£", data.slice(3, 6));
1250          assert_eq!("", data.slice(3, 3));
1251          assert_eq!("å", data.slice(30, 33));
1252
1253          fn a_million_letter_X() -> ~str {
1254              let mut i = 0;
1255              let mut rs = StrBuf::new();
1256              while i < 100000 {
1257                  rs.push_str("åååååååååå");
1258                  i += 1;
1259              }
1260              rs.into_owned()
1261          }
1262          fn half_a_million_letter_X() -> ~str {
1263              let mut i = 0;
1264              let mut rs = StrBuf::new();
1265              while i < 100000 {
1266                  rs.push_str("ååååå");
1267                  i += 1;
1268              }
1269              rs.into_owned()
1270          }
1271          let letters = a_million_letter_X();
1272          assert!(half_a_million_letter_X() ==
1273              letters.slice(0u, 3u * 500000u).to_owned());
1274      }
1275
1276      #[test]
1277      fn test_slice_2() {
1278          let ss = "ä¸åViá»t Nam";
1279
1280          assert_eq!("å", ss.slice(3u, 6u));
1281          assert_eq!("Viá»t Nam", ss.slice(6u, 16u));
1282
1283          assert_eq!("ab", "abc".slice(0u, 2u));
1284          assert_eq!("bc", "abc".slice(1u, 3u));
1285          assert_eq!("", "abc".slice(1u, 1u));
1286
1287          assert_eq!("ä¸", ss.slice(0u, 3u));
1288          assert_eq!("åV", ss.slice(3u, 7u));
1289          assert_eq!("", ss.slice(3u, 3u));
1290          /*0: ä¸
1291            3: å
1292            6: V
1293            7: i
1294            8: á»
1295           11: t
1296           12:
1297           13: N
1298           14: a
1299           15: m */
1300      }
1301
1302      #[test]
1303      #[should_fail]
1304      fn test_slice_fail() {
1305          "ä¸åViá»t Nam".slice(0u, 2u);
1306      }
1307
1308      #[test]
1309      fn test_slice_from() {
1310          assert_eq!("abcd".slice_from(0), "abcd");
1311          assert_eq!("abcd".slice_from(2), "cd");
1312          assert_eq!("abcd".slice_from(4), "");
1313      }
1314      #[test]
1315      fn test_slice_to() {
1316          assert_eq!("abcd".slice_to(0), "");
1317          assert_eq!("abcd".slice_to(2), "ab");
1318          assert_eq!("abcd".slice_to(4), "abcd");
1319      }
1320
1321      #[test]
1322      fn test_trim_left_chars() {
1323          let v: &[char] = &[];
1324          assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1325          assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1326          assert_eq!(" ***  *** ".trim_left_chars(&['*', ' ']), "");
1327          assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1328
1329          assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1330          assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1331          assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1332      }
1333
1334      #[test]
1335      fn test_trim_right_chars() {
1336          let v: &[char] = &[];
1337          assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1338          assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1339          assert_eq!(" ***  *** ".trim_right_chars(&['*', ' ']), "");
1340          assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1341
1342          assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1343          assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1344          assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1345      }
1346
1347      #[test]
1348      fn test_trim_chars() {
1349          let v: &[char] = &[];
1350          assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1351          assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1352          assert_eq!(" ***  *** ".trim_chars(&['*', ' ']), "");
1353          assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1354
1355          assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1356          assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1357          assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1358      }
1359
1360      #[test]
1361      fn test_trim_left() {
1362          assert_eq!("".trim_left(), "");
1363          assert_eq!("a".trim_left(), "a");
1364          assert_eq!("    ".trim_left(), "");
1365          assert_eq!("     blah".trim_left(), "blah");
1366          assert_eq!("   \u3000  wut".trim_left(), "wut");
1367          assert_eq!("hey ".trim_left(), "hey ");
1368      }
1369
1370      #[test]
1371      fn test_trim_right() {
1372          assert_eq!("".trim_right(), "");
1373          assert_eq!("a".trim_right(), "a");
1374          assert_eq!("    ".trim_right(), "");
1375          assert_eq!("blah     ".trim_right(), "blah");
1376          assert_eq!("wut   \u3000  ".trim_right(), "wut");
1377          assert_eq!(" hey".trim_right(), " hey");
1378      }
1379
1380      #[test]
1381      fn test_trim() {
1382          assert_eq!("".trim(), "");
1383          assert_eq!("a".trim(), "a");
1384          assert_eq!("    ".trim(), "");
1385          assert_eq!("    blah     ".trim(), "blah");
1386          assert_eq!("\nwut   \u3000  ".trim(), "wut");
1387          assert_eq!(" hey dude ".trim(), "hey dude");
1388      }
1389
1390      #[test]
1391      fn test_is_whitespace() {
1392          assert!("".is_whitespace());
1393          assert!(" ".is_whitespace());
1394          assert!("\u2009".is_whitespace()); // Thin space
1395          assert!("  \n\t   ".is_whitespace());
1396          assert!(!"   _   ".is_whitespace());
1397      }
1398
1399      #[test]
1400      fn test_slice_shift_char() {
1401          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸";
1402          assert_eq!(data.slice_shift_char(), (Some('à¸'), "à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸"));
1403      }
1404
1405      #[test]
1406      fn test_slice_shift_char_2() {
1407          let empty = "";
1408          assert_eq!(empty.slice_shift_char(), (None, ""));
1409      }
1410
1411      #[test]
1412      fn test_is_utf8() {
1413          // deny overlong encodings
1414          assert!(!is_utf8([0xc0, 0x80]));
1415          assert!(!is_utf8([0xc0, 0xae]));
1416          assert!(!is_utf8([0xe0, 0x80, 0x80]));
1417          assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1418          assert!(!is_utf8([0xe0, 0x81, 0x81]));
1419          assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1420          assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1421
1422          // deny surrogates
1423          assert!(!is_utf8([0xED, 0xA0, 0x80]));
1424          assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1425
1426          assert!(is_utf8([0xC2, 0x80]));
1427          assert!(is_utf8([0xDF, 0xBF]));
1428          assert!(is_utf8([0xE0, 0xA0, 0x80]));
1429          assert!(is_utf8([0xED, 0x9F, 0xBF]));
1430          assert!(is_utf8([0xEE, 0x80, 0x80]));
1431          assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1432          assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1433          assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1434      }
1435
1436      #[test]
1437      fn test_is_utf16() {
1438          macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1439
1440          // non-surrogates
1441          pos!([0x0000],
1442               [0x0001, 0x0002],
1443               [0xD7FF],
1444               [0xE000]);
1445
1446          // surrogate pairs (randomly generated with Python 3's
1447          // .encode('utf-16be'))
1448          pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1449               [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1450               [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1451
1452          // mixtures (also random)
1453          pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1454               [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1455               [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1456
1457          // negative tests
1458          macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1459
1460          neg!(
1461              // surrogate + regular unit
1462              [0xdb45, 0x0000],
1463              // surrogate + lead surrogate
1464              [0xd900, 0xd900],
1465              // unterminated surrogate
1466              [0xd8ff],
1467              // trail surrogate without a lead
1468              [0xddb7]);
1469
1470          // random byte sequences that Python 3's .decode('utf-16be')
1471          // failed on
1472          neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1473               [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1474               [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1475               [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1476               [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1477               [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1478               [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1479               [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1480               [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1481               [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1482               [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1483               [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1484               [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1485               [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1486               [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1487               [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1488               [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1489               [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1490               [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1491               [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1492               [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1493      }
1494
1495      #[test]
1496      fn test_raw_from_c_str() {
1497          unsafe {
1498              let a = box [65, 65, 65, 65, 65, 65, 65, 0];
1499              let b = a.as_ptr();
1500              let c = raw::from_c_str(b);
1501              assert_eq!(c, "AAAAAAA".to_owned());
1502          }
1503      }
1504
1505      #[test]
1506      fn test_as_bytes() {
1507          // no null
1508          let v = [
1509              224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1510              184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1511              109
1512          ];
1513          assert_eq!("".as_bytes(), &[]);
1514          assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1515          assert_eq!("à¸¨à¹à¸à¸¢ä¸åViá»t Nam".as_bytes(), v.as_slice());
1516      }
1517
1518      #[test]
1519      #[should_fail]
1520      fn test_as_bytes_fail() {
1521          // Don't double free. (I'm not sure if this exercises the
1522          // original problem code path anymore.)
1523          let s = "".to_owned();
1524          let _bytes = s.as_bytes();
1525          fail!();
1526      }
1527
1528      #[test]
1529      fn test_as_ptr() {
1530          let buf = "hello".as_ptr();
1531          unsafe {
1532              assert_eq!(*buf.offset(0), 'h' as u8);
1533              assert_eq!(*buf.offset(1), 'e' as u8);
1534              assert_eq!(*buf.offset(2), 'l' as u8);
1535              assert_eq!(*buf.offset(3), 'l' as u8);
1536              assert_eq!(*buf.offset(4), 'o' as u8);
1537          }
1538      }
1539
1540      #[test]
1541      fn test_subslice_offset() {
1542          let a = "kernelsprite";
1543          let b = a.slice(7, a.len());
1544          let c = a.slice(0, a.len() - 6);
1545          assert_eq!(a.subslice_offset(b), 7);
1546          assert_eq!(a.subslice_offset(c), 0);
1547
1548          let string = "a\nb\nc";
1549          let lines: Vec<&str> = string.lines().collect();
1550          let lines = lines.as_slice();
1551          assert_eq!(string.subslice_offset(lines[0]), 0);
1552          assert_eq!(string.subslice_offset(lines[1]), 2);
1553          assert_eq!(string.subslice_offset(lines[2]), 4);
1554      }
1555
1556      #[test]
1557      #[should_fail]
1558      fn test_subslice_offset_2() {
1559          let a = "alchemiter";
1560          let b = "cruxtruder";
1561          a.subslice_offset(b);
1562      }
1563
1564      #[test]
1565      fn vec_str_conversions() {
1566          let s1: ~str = "All mimsy were the borogoves".to_owned();
1567
1568          let v: ~[u8] = s1.as_bytes().to_owned();
1569          let s2: ~str = from_utf8(v).unwrap().to_owned();
1570          let mut i: uint = 0u;
1571          let n1: uint = s1.len();
1572          let n2: uint = v.len();
1573          assert_eq!(n1, n2);
1574          while i < n1 {
1575              let a: u8 = s1[i];
1576              let b: u8 = s2[i];
1577              debug!("{}", a);
1578              debug!("{}", b);
1579              assert_eq!(a, b);
1580              i += 1u;
1581          }
1582      }
1583
1584      #[test]
1585      fn test_contains() {
1586          assert!("abcde".contains("bcd"));
1587          assert!("abcde".contains("abcd"));
1588          assert!("abcde".contains("bcde"));
1589          assert!("abcde".contains(""));
1590          assert!("".contains(""));
1591          assert!(!"abcde".contains("def"));
1592          assert!(!"".contains("a"));
1593
1594          let data = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1595          assert!(data.contains("à¸à¸£à¸°à¹"));
1596          assert!(data.contains("à¸°à¹"));
1597          assert!(data.contains("ä¸å"));
1598          assert!(!data.contains("à¹à¸å"));
1599      }
1600
1601      #[test]
1602      fn test_contains_char() {
1603          assert!("abc".contains_char('b'));
1604          assert!("a".contains_char('a'));
1605          assert!(!"abc".contains_char('d'));
1606          assert!(!"".contains_char('a'));
1607      }
1608
1609      #[test]
1610      fn test_utf16() {
1611          let pairs =
1612              [("ðð¿ð»ðð¹ð»ð°\n".to_owned(),
1613                vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1614                  0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1615                  0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1616                  0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1617
1618               ("ððð®ðð²ð ðð²ð\n".to_owned(),
1619                vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1620                  0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1621                  0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1622                  0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1623                  0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1624                  0x000a_u16]),
1625
1626               ("ððððððÂ·ððððððð\n".to_owned(),
1627                vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1628                  0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1629                  0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1630                  0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1631                  0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1632                  0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1633                  0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1634
1635               ("ðððððð ðð ððð ððððð ðð\n".to_owned(),
1636                vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1637                  0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1638                  0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1639                  0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1640                  0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1641                  0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1642                  0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1643                  0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1644                  0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1645                  0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1646                  0x000a_u16 ]),
1647               // Issue #12318, even-numbered non-BMP planes
1648               ("\U00020000".to_owned(),
1649                vec![0xD840, 0xDC00])];
1650
1651          for p in pairs.iter() {
1652              let (s, u) = (*p).clone();
1653              assert!(is_utf16(u.as_slice()));
1654              assert_eq!(s.to_utf16(), u);
1655
1656              assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1657              assert_eq!(from_utf16_lossy(u.as_slice()), s);
1658
1659              assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1660              assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1661          }
1662      }
1663
1664      #[test]
1665      fn test_utf16_invalid() {
1666          // completely positive cases tested above.
1667          // lead + eof
1668          assert_eq!(from_utf16([0xD800]), None);
1669          // lead + lead
1670          assert_eq!(from_utf16([0xD800, 0xD800]), None);
1671
1672          // isolated trail
1673          assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1674
1675          // general
1676          assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1677      }
1678
1679      #[test]
1680      fn test_utf16_lossy() {
1681          // completely positive cases tested above.
1682          // lead + eof
1683          assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_owned());
1684          // lead + lead
1685          assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_owned());
1686
1687          // isolated trail
1688          assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_owned());
1689
1690          // general
1691          assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), "\uFFFDð\uFFFD".to_owned());
1692      }
1693
1694      #[test]
1695      fn test_truncate_utf16_at_nul() {
1696          let v = [];
1697          assert_eq!(truncate_utf16_at_nul(v), &[]);
1698
1699          let v = [0, 2, 3];
1700          assert_eq!(truncate_utf16_at_nul(v), &[]);
1701
1702          let v = [1, 0, 3];
1703          assert_eq!(truncate_utf16_at_nul(v), &[1]);
1704
1705          let v = [1, 2, 0];
1706          assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1707
1708          let v = [1, 2, 3];
1709          assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1710      }
1711
1712      #[test]
1713      fn test_char_at() {
1714          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1715          let v = box ['à¸¨','à¹','à¸','à¸¢','ä¸','å','V','i','á»','t',' ','N','a','m'];
1716          let mut pos = 0;
1717          for ch in v.iter() {
1718              assert!(s.char_at(pos) == *ch);
1719              pos += from_char(*ch).len();
1720          }
1721      }
1722
1723      #[test]
1724      fn test_char_at_reverse() {
1725          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1726          let v = box ['à¸¨','à¹','à¸','à¸¢','ä¸','å','V','i','á»','t',' ','N','a','m'];
1727          let mut pos = s.len();
1728          for ch in v.iter().rev() {
1729              assert!(s.char_at_reverse(pos) == *ch);
1730              pos -= from_char(*ch).len();
1731          }
1732      }
1733
1734      #[test]
1735      fn test_escape_unicode() {
1736          assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_owned());
1737          assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_owned());
1738          assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_owned());
1739          assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_owned());
1740          assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_owned());
1741          assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_owned());
1742          assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_owned());
1743          assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_owned());
1744          assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_owned());
1745      }
1746
1747      #[test]
1748      fn test_escape_default() {
1749          assert_eq!("abc".escape_default(), "abc".to_owned());
1750          assert_eq!("a c".escape_default(), "a c".to_owned());
1751          assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_owned());
1752          assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_owned());
1753          assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_owned());
1754          assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_owned());
1755          assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_owned());
1756          assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_owned());
1757      }
1758
1759      #[test]
1760      fn test_total_ord() {
1761          "1234".cmp(&("123")) == Greater;
1762          "123".cmp(&("1234")) == Less;
1763          "1234".cmp(&("1234")) == Equal;
1764          "12345555".cmp(&("123456")) == Less;
1765          "22".cmp(&("1234")) == Greater;
1766      }
1767
1768      #[test]
1769      fn test_char_range_at() {
1770          let data = "bÂ¢â¬ð¤¢ð¤¢â¬Â¢b".to_owned();
1771          assert_eq!('b', data.char_range_at(0).ch);
1772          assert_eq!('Â¢', data.char_range_at(1).ch);
1773          assert_eq!('â¬', data.char_range_at(3).ch);
1774          assert_eq!('ð¤¢', data.char_range_at(6).ch);
1775          assert_eq!('ð¤¢', data.char_range_at(10).ch);
1776          assert_eq!('â¬', data.char_range_at(14).ch);
1777          assert_eq!('Â¢', data.char_range_at(17).ch);
1778          assert_eq!('b', data.char_range_at(19).ch);
1779      }
1780
1781      #[test]
1782      fn test_char_range_at_reverse_underflow() {
1783          assert_eq!("abc".char_range_at_reverse(0).next, 0);
1784      }
1785
1786      #[test]
1787      fn test_add() {
1788          #![allow(unnecessary_allocation)]
1789          macro_rules! t (
1790              ($s1:expr, $s2:expr, $e:expr) => { {
1791                  let s1 = $s1;
1792                  let s2 = $s2;
1793                  let e = $e;
1794                  assert_eq!(s1 + s2, e.to_owned());
1795                  assert_eq!(s1.to_owned() + s2, e.to_owned());
1796              } }
1797          );
1798
1799          t!("foo",  "bar", "foobar");
1800          t!("foo", "bar".to_owned(), "foobar");
1801          t!("à¸¨à¹à¸à¸¢ä¸",  "åViá»t Nam", "à¸¨à¹à¸à¸¢ä¸åViá»t Nam");
1802          t!("à¸¨à¹à¸à¸¢ä¸", "åViá»t Nam".to_owned(), "à¸¨à¹à¸à¸¢ä¸åViá»t Nam");
1803      }
1804
1805      #[test]
1806      fn test_iterator() {
1807          use iter::*;
1808          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1809          let v = box ['à¸¨','à¹','à¸','à¸¢','ä¸','å','V','i','á»','t',' ','N','a','m'];
1810
1811          let mut pos = 0;
1812          let mut it = s.chars();
1813
1814          for c in it {
1815              assert_eq!(c, v[pos]);
1816              pos += 1;
1817          }
1818          assert_eq!(pos, v.len());
1819      }
1820
1821      #[test]
1822      fn test_rev_iterator() {
1823          use iter::*;
1824          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1825          let v = box ['m', 'a', 'N', ' ', 't', 'á»','i','V','å','ä¸','à¸¢','à¸','à¹','à¸¨'];
1826
1827          let mut pos = 0;
1828          let mut it = s.chars().rev();
1829
1830          for c in it {
1831              assert_eq!(c, v[pos]);
1832              pos += 1;
1833          }
1834          assert_eq!(pos, v.len());
1835      }
1836
1837      #[test]
1838      fn test_iterator_clone() {
1839          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
1840          let mut it = s.chars();
1841          it.next();
1842          assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1843      }
1844
1845      #[test]
1846      fn test_bytesator() {
1847          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1848          let v = [
1849              224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1850              184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1851              109
1852          ];
1853          let mut pos = 0;
1854
1855          for b in s.bytes() {
1856              assert_eq!(b, v[pos]);
1857              pos += 1;
1858          }
1859      }
1860
1861      #[test]
1862      fn test_bytes_revator() {
1863          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned();
1864          let v = [
1865              224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1866              184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1867              109
1868          ];
1869          let mut pos = v.len();
1870
1871          for b in s.bytes().rev() {
1872              pos -= 1;
1873              assert_eq!(b, v[pos]);
1874          }
1875      }
1876
1877      #[test]
1878      fn test_char_indicesator() {
1879          use iter::*;
1880          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
1881          let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1882          let v = ['à¸¨','à¹','à¸','à¸¢','ä¸','å','V','i','á»','t',' ','N','a','m'];
1883
1884          let mut pos = 0;
1885          let mut it = s.char_indices();
1886
1887          for c in it {
1888              assert_eq!(c, (p[pos], v[pos]));
1889              pos += 1;
1890          }
1891          assert_eq!(pos, v.len());
1892          assert_eq!(pos, p.len());
1893      }
1894
1895      #[test]
1896      fn test_char_indices_revator() {
1897          use iter::*;
1898          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
1899          let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1900          let v = ['m', 'a', 'N', ' ', 't', 'á»','i','V','å','ä¸','à¸¢','à¸','à¹','à¸¨'];
1901
1902          let mut pos = 0;
1903          let mut it = s.char_indices().rev();
1904
1905          for c in it {
1906              assert_eq!(c, (p[pos], v[pos]));
1907              pos += 1;
1908          }
1909          assert_eq!(pos, v.len());
1910          assert_eq!(pos, p.len());
1911      }
1912
1913      #[test]
1914      fn test_split_char_iterator() {
1915          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\nLittle lÃ¤mb\n";
1916
1917          let split: Vec<&str> = data.split(' ').collect();
1918          assert_eq!( split, vec!["\nMÃ¤ry", "hÃ¤d", "Ã¤", "little", "lÃ¤mb\nLittle", "lÃ¤mb\n"]);
1919
1920          let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1921          rsplit.reverse();
1922          assert_eq!(rsplit, vec!["\nMÃ¤ry", "hÃ¤d", "Ã¤", "little", "lÃ¤mb\nLittle", "lÃ¤mb\n"]);
1923
1924          let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1925          assert_eq!( split, vec!["\nMÃ¤ry", "hÃ¤d", "Ã¤", "little", "lÃ¤mb\nLittle", "lÃ¤mb\n"]);
1926
1927          let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1928          rsplit.reverse();
1929          assert_eq!(rsplit, vec!["\nMÃ¤ry", "hÃ¤d", "Ã¤", "little", "lÃ¤mb\nLittle", "lÃ¤mb\n"]);
1930
1931          // Unicode
1932          let split: Vec<&str> = data.split('Ã¤').collect();
1933          assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1934
1935          let mut rsplit: Vec<&str> = data.split('Ã¤').rev().collect();
1936          rsplit.reverse();
1937          assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1938
1939          let split: Vec<&str> = data.split(|c: char| c == 'Ã¤').collect();
1940          assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1941
1942          let mut rsplit: Vec<&str> = data.split(|c: char| c == 'Ã¤').rev().collect();
1943          rsplit.reverse();
1944          assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1945      }
1946
1947      #[test]
1948      fn test_splitn_char_iterator() {
1949          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\nLittle lÃ¤mb\n";
1950
1951          let split: Vec<&str> = data.splitn(' ', 3).collect();
1952          assert_eq!(split, vec!["\nMÃ¤ry", "hÃ¤d", "Ã¤", "little lÃ¤mb\nLittle lÃ¤mb\n"]);
1953
1954          let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1955          assert_eq!(split, vec!["\nMÃ¤ry", "hÃ¤d", "Ã¤", "little lÃ¤mb\nLittle lÃ¤mb\n"]);
1956
1957          // Unicode
1958          let split: Vec<&str> = data.splitn('Ã¤', 3).collect();
1959          assert_eq!(split, vec!["\nM", "ry h", "d ", " little lÃ¤mb\nLittle lÃ¤mb\n"]);
1960
1961          let split: Vec<&str> = data.splitn(|c: char| c == 'Ã¤', 3).collect();
1962          assert_eq!(split, vec!["\nM", "ry h", "d ", " little lÃ¤mb\nLittle lÃ¤mb\n"]);
1963      }
1964
1965      #[test]
1966      fn test_rsplitn_char_iterator() {
1967          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\nLittle lÃ¤mb\n";
1968
1969          let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1970          split.reverse();
1971          assert_eq!(split, vec!["\nMÃ¤ry hÃ¤d Ã¤", "little", "lÃ¤mb\nLittle", "lÃ¤mb\n"]);
1972
1973          let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1974          split.reverse();
1975          assert_eq!(split, vec!["\nMÃ¤ry hÃ¤d Ã¤", "little", "lÃ¤mb\nLittle", "lÃ¤mb\n"]);
1976
1977          // Unicode
1978          let mut split: Vec<&str> = data.rsplitn('Ã¤', 3).collect();
1979          split.reverse();
1980          assert_eq!(split, vec!["\nMÃ¤ry hÃ¤d ", " little l", "mb\nLittle l", "mb\n"]);
1981
1982          let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'Ã¤', 3).collect();
1983          split.reverse();
1984          assert_eq!(split, vec!["\nMÃ¤ry hÃ¤d ", " little l", "mb\nLittle l", "mb\n"]);
1985      }
1986
1987      #[test]
1988      fn test_split_char_iterator_no_trailing() {
1989          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\nLittle lÃ¤mb\n";
1990
1991          let split: Vec<&str> = data.split('\n').collect();
1992          assert_eq!(split, vec!["", "MÃ¤ry hÃ¤d Ã¤ little lÃ¤mb", "Little lÃ¤mb", ""]);
1993
1994          let split: Vec<&str> = data.split_terminator('\n').collect();
1995          assert_eq!(split, vec!["", "MÃ¤ry hÃ¤d Ã¤ little lÃ¤mb", "Little lÃ¤mb"]);
1996      }
1997
1998      #[test]
1999      fn test_rev_split_char_iterator_no_trailing() {
2000          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\nLittle lÃ¤mb\n";
2001
2002          let mut split: Vec<&str> = data.split('\n').rev().collect();
2003          split.reverse();
2004          assert_eq!(split, vec!["", "MÃ¤ry hÃ¤d Ã¤ little lÃ¤mb", "Little lÃ¤mb", ""]);
2005
2006          let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
2007          split.reverse();
2008          assert_eq!(split, vec!["", "MÃ¤ry hÃ¤d Ã¤ little lÃ¤mb", "Little lÃ¤mb"]);
2009      }
2010
2011      #[test]
2012      fn test_words() {
2013          let data = "\n \tMÃ¤ry   hÃ¤d\tÃ¤  little lÃ¤mb\nLittle lÃ¤mb\n";
2014          let words: Vec<&str> = data.words().collect();
2015          assert_eq!(words, vec!["MÃ¤ry", "hÃ¤d", "Ã¤", "little", "lÃ¤mb", "Little", "lÃ¤mb"])
2016      }
2017
2018      #[test]
2019      fn test_nfd_chars() {
2020          assert_eq!("abc".nfd_chars().collect::<~str>(), "abc".to_owned());
2021          assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), "d\u0307\u01c4".to_owned());
2022          assert_eq!("\u2026".nfd_chars().collect::<~str>(), "\u2026".to_owned());
2023          assert_eq!("\u2126".nfd_chars().collect::<~str>(), "\u03a9".to_owned());
2024          assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2025          assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2026          assert_eq!("a\u0301".nfd_chars().collect::<~str>(), "a\u0301".to_owned());
2027          assert_eq!("\u0301a".nfd_chars().collect::<~str>(), "\u0301a".to_owned());
2028          assert_eq!("\ud4db".nfd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2029          assert_eq!("\uac1c".nfd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2030      }
2031
2032      #[test]
2033      fn test_nfkd_chars() {
2034          assert_eq!("abc".nfkd_chars().collect::<~str>(), "abc".to_owned());
2035          assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), "d\u0307DZ\u030c".to_owned());
2036          assert_eq!("\u2026".nfkd_chars().collect::<~str>(), "...".to_owned());
2037          assert_eq!("\u2126".nfkd_chars().collect::<~str>(), "\u03a9".to_owned());
2038          assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2039          assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2040          assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), "a\u0301".to_owned());
2041          assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), "\u0301a".to_owned());
2042          assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2043          assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2044      }
2045
2046      #[test]
2047      fn test_lines() {
2048          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\n\nLittle lÃ¤mb\n";
2049          let lines: Vec<&str> = data.lines().collect();
2050          assert_eq!(lines, vec!["", "MÃ¤ry hÃ¤d Ã¤ little lÃ¤mb", "", "Little lÃ¤mb"]);
2051
2052          let data = "\nMÃ¤ry hÃ¤d Ã¤ little lÃ¤mb\n\nLittle lÃ¤mb"; // no trailing \n
2053          let lines: Vec<&str> = data.lines().collect();
2054          assert_eq!(lines, vec!["", "MÃ¤ry hÃ¤d Ã¤ little lÃ¤mb", "", "Little lÃ¤mb"]);
2055      }
2056
2057      #[test]
2058      fn test_split_strator() {
2059          fn t(s: &str, sep: &str, u: &[&str]) {
2060              let v: Vec<&str> = s.split_str(sep).collect();
2061              assert_eq!(v.as_slice(), u.as_slice());
2062          }
2063          t("--1233345--", "12345", ["--1233345--"]);
2064          t("abc::hello::there", "::", ["abc", "hello", "there"]);
2065          t("::hello::there", "::", ["", "hello", "there"]);
2066          t("hello::there::", "::", ["hello", "there", ""]);
2067          t("::hello::there::", "::", ["", "hello", "there", ""]);
2068          t("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam", "ä¸å", ["à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢", "Viá»t Nam"]);
2069          t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2070          t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2071          t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2072          t("", ".", [""]);
2073          t("zz", "zz", ["",""]);
2074          t("ok", "z", ["ok"]);
2075          t("zzz", "zz", ["","z"]);
2076          t("zzzzz", "zz", ["","","z"]);
2077      }
2078
2079      #[test]
2080      fn test_str_default() {
2081          use default::Default;
2082          fn t<S: Default + Str>() {
2083              let s: S = Default::default();
2084              assert_eq!(s.as_slice(), "");
2085          }
2086
2087          t::<&str>();
2088          t::<~str>();
2089      }
2090
2091      #[test]
2092      fn test_str_container() {
2093          fn sum_len<S: Container>(v: &[S]) -> uint {
2094              v.iter().map(|x| x.len()).sum()
2095          }
2096
2097          let s = "01234".to_owned();
2098          assert_eq!(5, sum_len(["012", "", "34"]));
2099          assert_eq!(5, sum_len(["01".to_owned(), "2".to_owned(), "34".to_owned(), "".to_owned()]));
2100          assert_eq!(5, sum_len([s.as_slice()]));
2101      }
2102
2103      #[test]
2104      fn test_str_from_utf8() {
2105          let xs = bytes!("hello");
2106          assert_eq!(from_utf8(xs), Some("hello"));
2107
2108          let xs = bytes!("à¸¨à¹à¸à¸¢ä¸åViá»t Nam");
2109          assert_eq!(from_utf8(xs), Some("à¸¨à¹à¸à¸¢ä¸åViá»t Nam"));
2110
2111          let xs = bytes!("hello", 0xff);
2112          assert_eq!(from_utf8(xs), None);
2113      }
2114
2115      #[test]
2116      fn test_str_from_utf8_owned() {
2117          let xs = bytes!("hello").to_owned();
2118          assert_eq!(from_utf8_owned(xs), Some("hello".to_owned()));
2119
2120          let xs = bytes!("à¸¨à¹à¸à¸¢ä¸åViá»t Nam").to_owned();
2121          assert_eq!(from_utf8_owned(xs), Some("à¸¨à¹à¸à¸¢ä¸åViá»t Nam".to_owned()));
2122
2123          let xs = bytes!("hello", 0xff).to_owned();
2124          assert_eq!(from_utf8_owned(xs), None);
2125      }
2126
2127      #[test]
2128      fn test_str_from_utf8_lossy() {
2129          let xs = bytes!("hello");
2130          assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2131
2132          let xs = bytes!("à¸¨à¹à¸à¸¢ä¸åViá»t Nam");
2133          assert_eq!(from_utf8_lossy(xs), Slice("à¸¨à¹à¸à¸¢ä¸åViá»t Nam"));
2134
2135          let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
2136          assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_owned()));
2137
2138          let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2139          assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_owned()));
2140
2141          let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
2142          assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_owned()));
2143
2144          let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
2145          assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_owned()));
2146
2147          let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
2148          assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_owned()));
2149
2150          let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
2151          assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2152                                                 foo\U00010000bar".to_owned()));
2153
2154          // surrogates
2155          let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
2156          assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2157                                                 \uFFFD\uFFFD\uFFFDbar".to_owned()));
2158      }
2159
2160      #[test]
2161      fn test_from_str() {
2162        let owned: Option<~str> = from_str("string");
2163        assert_eq!(owned, Some("string".to_owned()));
2164      }
2165
2166      #[test]
2167      fn test_maybe_owned_traits() {
2168          let s = Slice("abcde");
2169          assert_eq!(s.len(), 5);
2170          assert_eq!(s.as_slice(), "abcde");
2171          assert_eq!(s.to_str(), "abcde".to_owned());
2172          assert_eq!(format!("{}", s), "abcde".to_owned());
2173          assert!(s.lt(&Owned("bcdef".to_owned())));
2174          assert_eq!(Slice(""), Default::default());
2175
2176          let o = Owned("abcde".to_owned());
2177          assert_eq!(o.len(), 5);
2178          assert_eq!(o.as_slice(), "abcde");
2179          assert_eq!(o.to_str(), "abcde".to_owned());
2180          assert_eq!(format!("{}", o), "abcde".to_owned());
2181          assert!(o.lt(&Slice("bcdef")));
2182          assert_eq!(Owned("".to_owned()), Default::default());
2183
2184          assert!(s.cmp(&o) == Equal);
2185          assert!(s.equiv(&o));
2186
2187          assert!(o.cmp(&s) == Equal);
2188          assert!(o.equiv(&s));
2189      }
2190
2191      #[test]
2192      fn test_maybe_owned_methods() {
2193          let s = Slice("abcde");
2194          assert!(s.is_slice());
2195          assert!(!s.is_owned());
2196
2197          let o = Owned("abcde".to_owned());
2198          assert!(!o.is_slice());
2199          assert!(o.is_owned());
2200      }
2201
2202      #[test]
2203      fn test_maybe_owned_clone() {
2204          assert_eq!(Owned("abcde".to_owned()), Slice("abcde").clone());
2205          assert_eq!(Owned("abcde".to_owned()), Owned("abcde".to_owned()).clone());
2206          assert_eq!(Slice("abcde"), Slice("abcde").clone());
2207          assert_eq!(Slice("abcde"), Owned("abcde".to_owned()).clone());
2208      }
2209
2210      #[test]
2211      fn test_maybe_owned_into_owned() {
2212          assert_eq!(Slice("abcde").into_owned(), "abcde".to_owned());
2213          assert_eq!(Owned("abcde".to_owned()).into_owned(), "abcde".to_owned());
2214      }
2215
2216      #[test]
2217      fn test_into_maybe_owned() {
2218          assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2219          assert_eq!(("abcde".to_owned()).into_maybe_owned(), Slice("abcde"));
2220          assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_owned()));
2221          assert_eq!(("abcde".to_owned()).into_maybe_owned(), Owned("abcde".to_owned()));
2222      }
2223  }
2224
2225  #[cfg(test)]
2226  mod bench {
2227      extern crate test;
2228      use self::test::Bencher;
2229      use super::*;
2230      use prelude::*;
2231
2232      #[bench]
2233      fn char_iterator(b: &mut Bencher) {
2234          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2235          let len = s.char_len();
2236
2237          b.iter(|| assert_eq!(s.chars().len(), len));
2238      }
2239
2240      #[bench]
2241      fn char_iterator_ascii(b: &mut Bencher) {
2242          let s = "Mary had a little lamb, Little lamb
2243          Mary had a little lamb, Little lamb
2244          Mary had a little lamb, Little lamb
2245          Mary had a little lamb, Little lamb
2246          Mary had a little lamb, Little lamb
2247          Mary had a little lamb, Little lamb";
2248          let len = s.char_len();
2249
2250          b.iter(|| assert_eq!(s.chars().len(), len));
2251      }
2252
2253      #[bench]
2254      fn char_iterator_rev(b: &mut Bencher) {
2255          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2256          let len = s.char_len();
2257
2258          b.iter(|| assert_eq!(s.chars().rev().len(), len));
2259      }
2260
2261      #[bench]
2262      fn char_indicesator(b: &mut Bencher) {
2263          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2264          let len = s.char_len();
2265
2266          b.iter(|| assert_eq!(s.char_indices().len(), len));
2267      }
2268
2269      #[bench]
2270      fn char_indicesator_rev(b: &mut Bencher) {
2271          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2272          let len = s.char_len();
2273
2274          b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
2275      }
2276
2277      #[bench]
2278      fn split_unicode_ascii(b: &mut Bencher) {
2279          let s = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Namà¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
2280
2281          b.iter(|| assert_eq!(s.split('V').len(), 3));
2282      }
2283
2284      #[bench]
2285      fn split_unicode_not_ascii(b: &mut Bencher) {
2286          struct NotAscii(char);
2287          impl CharEq for NotAscii {
2288              fn matches(&mut self, c: char) -> bool {
2289                  let NotAscii(cc) = *self;
2290                  cc == c
2291              }
2292              fn only_ascii(&self) -> bool { false }
2293          }
2294          let s = "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Namà¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam";
2295
2296          b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
2297      }
2298
2299
2300      #[bench]
2301      fn split_ascii(b: &mut Bencher) {
2302          let s = "Mary had a little lamb, Little lamb, little-lamb.";
2303          let len = s.split(' ').len();
2304
2305          b.iter(|| assert_eq!(s.split(' ').len(), len));
2306      }
2307
2308      #[bench]
2309      fn split_not_ascii(b: &mut Bencher) {
2310          struct NotAscii(char);
2311          impl CharEq for NotAscii {
2312              #[inline]
2313              fn matches(&mut self, c: char) -> bool {
2314                  let NotAscii(cc) = *self;
2315                  cc == c
2316              }
2317              fn only_ascii(&self) -> bool { false }
2318          }
2319          let s = "Mary had a little lamb, Little lamb, little-lamb.";
2320          let len = s.split(' ').len();
2321
2322          b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
2323      }
2324
2325      #[bench]
2326      fn split_extern_fn(b: &mut Bencher) {
2327          let s = "Mary had a little lamb, Little lamb, little-lamb.";
2328          let len = s.split(' ').len();
2329          fn pred(c: char) -> bool { c == ' ' }
2330
2331          b.iter(|| assert_eq!(s.split(pred).len(), len));
2332      }
2333
2334      #[bench]
2335      fn split_closure(b: &mut Bencher) {
2336          let s = "Mary had a little lamb, Little lamb, little-lamb.";
2337          let len = s.split(' ').len();
2338
2339          b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
2340      }
2341
2342      #[bench]
2343      fn split_slice(b: &mut Bencher) {
2344          let s = "Mary had a little lamb, Little lamb, little-lamb.";
2345          let len = s.split(' ').len();
2346
2347          b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
2348      }
2349
2350      #[bench]
2351      fn is_utf8_100_ascii(b: &mut Bencher) {
2352
2353          let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2354                          Lorem ipsum dolor sit amet, consectetur. ");
2355
2356          assert_eq!(100, s.len());
2357          b.iter(|| {
2358              is_utf8(s)
2359          });
2360      }
2361
2362      #[bench]
2363      fn is_utf8_100_multibyte(b: &mut Bencher) {
2364          let s = bytes!("ððððððà¸à¸£Ø¯ÙÙØ© Ø§ÙÙÙÙØªà¸à¸¨à¹à¸à¸¢ä¸åðð¿ð»ðð¹ð»ð°");
2365          assert_eq!(100, s.len());
2366          b.iter(|| {
2367              is_utf8(s)
2368          });
2369      }
2370
2371      #[bench]
2372      fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2373          let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2374                          Lorem ipsum dolor sit amet, consectetur. ");
2375
2376          assert_eq!(100, s.len());
2377          b.iter(|| {
2378              let _ = from_utf8_lossy(s);
2379          });
2380      }
2381
2382      #[bench]
2383      fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2384          let s = bytes!("ððððððà¸à¸£Ø¯ÙÙØ© Ø§ÙÙÙÙØªà¸à¸¨à¹à¸à¸¢ä¸åðð¿ð»ðð¹ð»ð°");
2385          assert_eq!(100, s.len());
2386          b.iter(|| {
2387              let _ = from_utf8_lossy(s);
2388          });
2389      }
2390
2391      #[bench]
2392      fn from_utf8_lossy_invalid(b: &mut Bencher) {
2393          let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2394          b.iter(|| {
2395              let _ = from_utf8_lossy(s);
2396          });
2397      }
2398
2399      #[bench]
2400      fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2401          let s = Vec::from_elem(100, 0xF5u8);
2402          b.iter(|| {
2403              let _ = from_utf8_lossy(s.as_slice());
2404          });
2405      }
2406
2407      #[bench]
2408      fn bench_connect(b: &mut Bencher) {
2409          let s = "à¸¨à¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
2410          let sep = "â";
2411          let v = [s, s, s, s, s, s, s, s, s, s];
2412          b.iter(|| {
2413              assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2414          })
2415      }
2416  }

libstd/str.rs:415:4-415:4 -fn- definition:
    fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
        if i >= total {
            0
references:- 6
464:                 3 => {
465:                     match (byte, safe_get(v, i, total)) {
466:                         (0xE0        , 0xA0 .. 0xBF) => (),
--
482:                 4 => {
483:                     match (byte, safe_get(v, i, total)) {
484:                         (0xF0        , 0x90 .. 0xBF) => (),
--
497:                     i += 1;
498:                     if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
499:                         error!();

libstd/str.rs:230:19-230:19 -enum- definition:
enum NormalizationForm {
    NFD,
    NFKD
references:- 3
231: enum NormalizationForm {
--
239: pub struct Normalizations<'a> {
240:     kind: NormalizationForm,
241:     iter: Chars<'a>,

libstd/str.rs:144:35-144:35 -trait- definition:
/// Methods for vectors of strings
pub trait StrVector {
    /// Concatenate a vector of strings.
references:- 2
153: impl<'a, S: Str> StrVector for &'a [S] {
154:     fn concat(&self) -> ~str {
--
194: impl<'a, S: Str> StrVector for Vec<S> {
195:     #[inline]

libstd/str.rs:709:4-709:4 -fn- definition:
    pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
        cast::transmute(v)
    }
references:- 3
713:     /// Converts a byte to a string.
714:     pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
libstd/ascii.rs:
417:     str::raw::from_utf8_owned(bytes)
418: }
libstd/str.rs:
110:     if is_utf8(vv) {
111:         Some(unsafe { raw::from_utf8_owned(vv) })
112:     } else {

libstd/str.rs:412:4-412:4 -fn- definition:
    fn unsafe_get(xs: &[u8], i: uint) -> u8 {
        unsafe { *xs.unsafe_ref(i) }
    }
references:- 2
437:         let i_ = i;
438:         let byte = unsafe_get(v, i);
439:         i += 1;

libstd/str.rs:525:27-525:27 -enum- definition:
/// needed but not always.
pub enum MaybeOwned<'a> {
    /// A borrowed string
references:- 28
568:     #[inline]
569:     fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
570: }
--
643:     #[inline]
644:     fn default() -> MaybeOwned<'a> { Slice("") }
645: }
--
657: impl<'a> fmt::Show for MaybeOwned<'a> {
658:     #[inline]
libstd/path/mod.rs:
496:     #[inline]
497:     pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
498:         from_utf8_lossy(if self.filename {
--
583: impl<'a> BytesContainer for str::MaybeOwned<'a> {
584:     #[inline]
--
592:     #[inline]
593:     fn is_str(_: Option<str::MaybeOwned>) -> bool { true }
594: }
libstd/str.rs:
642: impl<'a> Default for MaybeOwned<'a> {
643:     #[inline]

libstd/str.rs:746:50-746:50 -trait- definition:
/// Any string that can be represented as a slice
pub trait StrAllocating: Str {
    /// Convert `self` into a ~str, not making a copy if possible.
references:- 5
917: impl<'a> StrAllocating for ~str {
918:     #[inline]
libstd/strbuf.rs:
273: impl StrAllocating for StrBuf {
274:     #[inline]
libstd/path/windows.rs:
687:     fn normalize_<S: StrAllocating>(s: S) -> (Option<PathPrefix>, StrBuf) {
688:         // make borrowck happy
libstd/str.rs:
617: impl<'a> StrAllocating for MaybeOwned<'a> {
618:     #[inline]

libstd/str.rs:210:51-210:51 -fn- definition:
// Helper functions used for Unicode normalization
fn canonical_sort(comb: &mut [(char, u8)]) {
    use iter::range;
references:- 2
275:                     if class == 0 && !*sorted {
276:                         canonical_sort(buffer.as_mut_slice());
277:                         *sorted = true;
--
285:         if !self.sorted {
286:             canonical_sort(self.buffer.as_mut_slice());
287:             self.sorted = true;

libstd/str.rs:533:63-533:63 -NK_AS_STR_TODO- definition:
/// SendStr is a specialization of `MaybeOwned` to be sendable
pub type SendStr = MaybeOwned<'static>;
impl<'a> MaybeOwned<'a> {
references:- 2
libstd/task.rs:
67:     /// A name for the task-to-be, for identification in failure messages
68:     pub name: Option<SendStr>,
69:     /// The size of the stack for the spawned task
libstd/rt/task.rs:
52:     pub destroyed: bool,
53:     pub name: Option<SendStr>,

libstd/str.rs:696:4-696:4 -fn- definition:
    pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
        let mut curr = buf;
        let mut i = 0;
references:- 2
libstd/os.rs:
708:             str::raw::from_c_str(p as *c_char)
709:         }
libstd/unstable/dynamic_lib.rs:
189:             } else {
190:                 Err(str::raw::from_c_str(last_error))
191:             };

libstd/str.rs:403:8-403:8 -fn- definition:
/// ```
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
    if is_utf8(v) {
references:- 5
libstd/os.rs:
174:         let k = str::from_utf8_lossy(k).into_owned();
175:         let v = str::from_utf8_lossy(v).into_owned();
176:         (k,v)
--
827: fn real_args() -> Vec<~str> {
828:     real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
829: }
libstd/path/mod.rs:
497:     pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
498:         from_utf8_lossy(if self.filename {
499:             match self.path.filename() {
libstd/os.rs:
173:     env_as_bytes().move_iter().map(|(k,v)| {
174:         let k = str::from_utf8_lossy(k).into_owned();
175:         let v = str::from_utf8_lossy(v).into_owned();

libstd/str.rs:238:19-238:19 -struct- definition:
pub struct Normalizations<'a> {
    kind: NormalizationForm,
    iter: Chars<'a>,
references:- 9
902:     fn nfkd_chars<'a>(&'a self) -> Normalizations<'a> {
903:         Normalizations {
904:             iter: self.as_slice().chars(),

libstd/str.rs:556:41-556:41 -trait- definition:
/// Trait for moving into a `MaybeOwned`
pub trait IntoMaybeOwned<'a> {
    /// Moves self into a `MaybeOwned`
references:- 4
567: impl<'a> IntoMaybeOwned<'a> for &'a str {
568:     #[inline]
--
572: impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
573:     #[inline]
libstd/task.rs:
136:     /// only in failure messages.
137:     pub fn named<S: IntoMaybeOwned<'static>>(mut self, name: S) -> TaskBuilder {
138:         self.opts.name = Some(name.into_maybe_owned());

libstd/str.rs:680:4-680:4 -fn- definition:
    pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
        let v = Slice { data: buf, len: len };
        let bytes: &[u8] = ::cast::transmute(v);
references:- 2
691:     unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
692:         from_buf_len(ptr, len)
693:     }
--
702:         }
703:         from_buf_len(buf as *u8, i as uint)
704:     }