(index<- ) ./libstd/strbuf.rs
git branch: * master 5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
modified: Fri May 9 13:02:28 2014
1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! An owned, growable string that enforces that its contents are valid UTF-8.
12
13 use c_vec::CVec;
14 use cast;
15 use char::Char;
16 use container::Container;
17 use fmt;
18 use io::Writer;
19 use iter::{Extendable, FromIterator, Iterator, range};
20 use option::{None, Option, Some};
21 use ptr::RawPtr;
22 use slice::{OwnedVector, Vector, CloneableVector};
23 use str::{OwnedStr, Str, StrSlice, StrAllocating};
24 use str;
25 use vec::Vec;
26
27 /// A growable string stored as a UTF-8 encoded buffer.
28 #[deriving(Clone, Eq, Ord, TotalEq, TotalOrd)]
29 pub struct StrBuf {
30 vec: Vec<u8>,
31 }
32
33 impl StrBuf {
34 /// Creates a new string buffer initialized with the empty string.
35 #[inline]
36 pub fn new() -> StrBuf {
37 StrBuf {
38 vec: Vec::new(),
39 }
40 }
41
42 /// Creates a new string buffer with the given capacity.
43 #[inline]
44 pub fn with_capacity(capacity: uint) -> StrBuf {
45 StrBuf {
46 vec: Vec::with_capacity(capacity),
47 }
48 }
49
50 /// Creates a new string buffer from length, capacity, and a pointer.
51 #[inline]
52 pub unsafe fn from_raw_parts(length: uint, capacity: uint, ptr: *mut u8) -> StrBuf {
53 StrBuf {
54 vec: Vec::from_raw_parts(length, capacity, ptr),
55 }
56 }
57
58 /// Creates a new string buffer from the given string.
59 #[inline]
60 pub fn from_str(string: &str) -> StrBuf {
61 StrBuf {
62 vec: Vec::from_slice(string.as_bytes())
63 }
64 }
65
66 /// Creates a new string buffer from the given owned string, taking care not to copy it.
67 #[inline]
68 pub fn from_owned_str(string: ~str) -> StrBuf {
69 StrBuf {
70 vec: string.into_bytes().move_iter().collect(),
71 }
72 }
73
74 /// Tries to create a new string buffer from the given byte
75 /// vector, validating that the vector is UTF-8 encoded.
76 #[inline]
77 pub fn from_utf8(vec: Vec<u8>) -> Option<StrBuf> {
78 if str::is_utf8(vec.as_slice()) {
79 Some(StrBuf { vec: vec })
80 } else {
81 None
82 }
83 }
84
85 /// Return the underlying byte buffer, encoded as UTF-8.
86 #[inline]
87 pub fn into_bytes(self) -> Vec<u8> {
88 self.vec
89 }
90
91 /// Pushes the given string onto this buffer; then, returns `self` so that it can be used
92 /// again.
93 #[inline]
94 pub fn append(mut self, second: &str) -> StrBuf {
95 self.push_str(second);
96 self
97 }
98
99 /// Creates a string buffer by repeating a character `length` times.
100 #[inline]
101 pub fn from_char(length: uint, ch: char) -> StrBuf {
102 if length == 0 {
103 return StrBuf::new()
104 }
105
106 let mut buf = StrBuf::new();
107 buf.push_char(ch);
108 let size = buf.len() * length;
109 buf.reserve(size);
110 for _ in range(1, length) {
111 buf.push_char(ch)
112 }
113 buf
114 }
115
116 /// Pushes the given string onto this string buffer.
117 #[inline]
118 pub fn push_str(&mut self, string: &str) {
119 self.vec.push_all(string.as_bytes())
120 }
121
122 /// Push `ch` onto the given string `count` times.
123 #[inline]
124 pub fn grow(&mut self, count: uint, ch: char) {
125 for _ in range(0, count) {
126 self.push_char(ch)
127 }
128 }
129
130 /// Returns the number of bytes that this string buffer can hold without reallocating.
131 #[inline]
132 pub fn byte_capacity(&self) -> uint {
133 self.vec.capacity()
134 }
135
136 /// Reserves capacity for at least `extra` additional bytes in this string buffer.
137 #[inline]
138 pub fn reserve_additional(&mut self, extra: uint) {
139 self.vec.reserve_additional(extra)
140 }
141
142 /// Reserves capacity for at least `capacity` bytes in this string buffer.
143 #[inline]
144 pub fn reserve(&mut self, capacity: uint) {
145 self.vec.reserve(capacity)
146 }
147
148 /// Reserves capacity for exactly `capacity` bytes in this string buffer.
149 #[inline]
150 pub fn reserve_exact(&mut self, capacity: uint) {
151 self.vec.reserve_exact(capacity)
152 }
153
154 /// Shrinks the capacity of this string buffer to match its length.
155 #[inline]
156 pub fn shrink_to_fit(&mut self) {
157 self.vec.shrink_to_fit()
158 }
159
160 /// Adds the given character to the end of the string.
161 #[inline]
162 pub fn push_char(&mut self, ch: char) {
163 let cur_len = self.len();
164 unsafe {
165 // This may use up to 4 bytes.
166 self.vec.reserve_additional(4);
167
168 // Attempt to not use an intermediate buffer by just pushing bytes
169 // directly onto this string.
170 let mut c_vector = CVec::new(self.vec.as_mut_ptr().offset(cur_len as int), 4);
171 let used = ch.encode_utf8(c_vector.as_mut_slice());
172 self.vec.set_len(cur_len + used);
173 }
174 }
175
176 /// Pushes the given bytes onto this string buffer. This is unsafe because it does not check
177 /// to ensure that the resulting string will be valid UTF-8.
178 #[inline]
179 pub unsafe fn push_bytes(&mut self, bytes: &[u8]) {
180 self.vec.push_all(bytes)
181 }
182
183 /// Works with the underlying buffer as a byte slice.
184 #[inline]
185 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
186 self.vec.as_slice()
187 }
188
189 /// Shorten a string to the specified length (which must be <= the current length)
190 #[inline]
191 pub fn truncate(&mut self, len: uint) {
192 assert!(self.as_slice().is_char_boundary(len));
193 self.vec.truncate(len)
194 }
195
196 /// Appends a byte to this string buffer. The caller must preserve the valid UTF-8 property.
197 #[inline]
198 pub unsafe fn push_byte(&mut self, byte: u8) {
199 self.push_bytes([byte])
200 }
201
202 /// Removes the last byte from the string buffer and returns it. Returns `None` if this string
203 /// buffer is empty.
204 ///
205 /// The caller must preserve the valid UTF-8 property.
206 #[inline]
207 pub unsafe fn pop_byte(&mut self) -> Option<u8> {
208 let len = self.len();
209 if len == 0 {
210 return None
211 }
212
213 let byte = self.as_slice()[len - 1];
214 self.vec.set_len(len - 1);
215 Some(byte)
216 }
217
218 /// Removes the first byte from the string buffer and returns it. Returns `None` if this string
219 /// buffer is empty.
220 ///
221 /// The caller must preserve the valid UTF-8 property.
222 pub unsafe fn shift_byte(&mut self) -> Option<u8> {
223 let len = self.len();
224 if len == 0 {
225 return None
226 }
227
228 let byte = self.as_slice()[0];
229 *self = self.as_slice().slice(1, len).into_strbuf();
230 Some(byte)
231 }
232
233 /// Views the string buffer as a mutable sequence of bytes.
234 ///
235 /// Callers must preserve the valid UTF-8 property.
236 pub unsafe fn as_mut_vec<'a>(&'a mut self) -> &'a mut Vec<u8> {
237 &mut self.vec
238 }
239 }
240
241 impl Container for StrBuf {
242 #[inline]
243 fn len(&self) -> uint {
244 self.vec.len()
245 }
246 }
247
248 impl FromIterator<char> for StrBuf {
249 fn from_iter<I:Iterator<char>>(iterator: I) -> StrBuf {
250 let mut buf = StrBuf::new();
251 buf.extend(iterator);
252 buf
253 }
254 }
255
256 impl Extendable<char> for StrBuf {
257 fn extend<I:Iterator<char>>(&mut self, mut iterator: I) {
258 for ch in iterator {
259 self.push_char(ch)
260 }
261 }
262 }
263
264 impl Str for StrBuf {
265 #[inline]
266 fn as_slice<'a>(&'a self) -> &'a str {
267 unsafe {
268 cast::transmute(self.vec.as_slice())
269 }
270 }
271 }
272
273 impl StrAllocating for StrBuf {
274 #[inline]
275 fn into_owned(self) -> ~str {
276 unsafe {
277 cast::transmute(self.vec.as_slice().to_owned())
278 }
279 }
280
281 #[inline]
282 fn into_strbuf(self) -> StrBuf { self }
283 }
284
285 impl fmt::Show for StrBuf {
286 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
287 self.as_slice().fmt(f)
288 }
289 }
290
291 impl<H:Writer> ::hash::Hash<H> for StrBuf {
292 #[inline]
293 fn hash(&self, hasher: &mut H) {
294 self.as_slice().hash(hasher)
295 }
296 }
297
298 #[cfg(test)]
299 mod tests {
300 extern crate test;
301 use self::test::Bencher;
302 use str::{Str, StrSlice};
303 use super::StrBuf;
304
305 #[bench]
306 fn bench_with_capacity(b: &mut Bencher) {
307 b.iter(|| {
308 StrBuf::with_capacity(100)
309 });
310 }
311
312 #[bench]
313 fn bench_push_str(b: &mut Bencher) {
314 let s = "ศà¹à¸à¸¢ä¸åViá»t Nam; Mary had a little lamb, Little lamb";
315 b.iter(|| {
316 let mut r = StrBuf::new();
317 r.push_str(s);
318 });
319 }
320
321 #[test]
322 fn test_push_bytes() {
323 let mut s = StrBuf::from_str("ABC");
324 unsafe {
325 s.push_bytes([ 'D' as u8 ]);
326 }
327 assert_eq!(s.as_slice(), "ABCD");
328 }
329
330 #[test]
331 fn test_push_str() {
332 let mut s = StrBuf::new();
333 s.push_str("");
334 assert_eq!(s.as_slice().slice_from(0), "");
335 s.push_str("abc");
336 assert_eq!(s.as_slice().slice_from(0), "abc");
337 s.push_str("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam");
338 assert_eq!(s.as_slice().slice_from(0), "abcà¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åViá»t Nam");
339 }
340
341 #[test]
342 fn test_push_char() {
343 let mut data = StrBuf::from_str("à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸");
344 data.push_char('å');
345 data.push_char('b'); // 1 byte
346 data.push_char('¢'); // 2 byte
347 data.push_char('â¬'); // 3 byte
348 data.push_char('ð¤¢'); // 4 byte
349 assert_eq!(data.as_slice(), "à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¸¢ä¸åb¢â¬ð¤¢");
350 }
351
352 #[test]
353 fn test_str_truncate() {
354 let mut s = StrBuf::from_str("12345");
355 s.truncate(5);
356 assert_eq!(s.as_slice(), "12345");
357 s.truncate(3);
358 assert_eq!(s.as_slice(), "123");
359 s.truncate(0);
360 assert_eq!(s.as_slice(), "");
361
362 let mut s = StrBuf::from_str("12345");
363 let p = s.as_slice().as_ptr();
364 s.truncate(3);
365 s.push_str("6");
366 let p_ = s.as_slice().as_ptr();
367 assert_eq!(p_, p);
368 }
369
370 #[test]
371 #[should_fail]
372 fn test_str_truncate_invalid_len() {
373 let mut s = StrBuf::from_str("12345");
374 s.truncate(6);
375 }
376
377 #[test]
378 #[should_fail]
379 fn test_str_truncate_split_codepoint() {
380 let mut s = StrBuf::from_str("\u00FC"); // ü
381 s.truncate(1);
382 }
383 }