1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 // An "interner" is a data structure that associates values with uint tags and
12 // allows bidirectional lookup; i.e. given a value, one can easily find the
13 // type, and vice versa.
14
15 use ast::Name;
16
17 use collections::HashMap;
18 use std::cast;
19 use std::cell::RefCell;
20 use std::cmp::Equiv;
21 use std::fmt;
22 use std::hash::Hash;
23 use std::rc::Rc;
24
25 pub struct Interner<T> {
26 map: RefCell<HashMap<T, Name>>,
27 vect: RefCell<Vec<T> >,
28 }
29
30 // when traits can extend traits, we should extend index<Name,T> to get []
31 impl<T: TotalEq + Hash + Clone + 'static> Interner<T> {
32 pub fn new() -> Interner<T> {
33 Interner {
34 map: RefCell::new(HashMap::new()),
35 vect: RefCell::new(Vec::new()),
36 }
37 }
38
39 pub fn prefill(init: &[T]) -> Interner<T> {
40 let rv = Interner::new();
41 for v in init.iter() {
42 rv.intern((*v).clone());
43 }
44 rv
45 }
46
47 pub fn intern(&self, val: T) -> Name {
48 let mut map = self.map.borrow_mut();
49 match (*map).find(&val) {
50 Some(&idx) => return idx,
51 None => (),
52 }
53
54 let mut vect = self.vect.borrow_mut();
55 let new_idx = (*vect).len() as Name;
56 (*map).insert(val.clone(), new_idx);
57 (*vect).push(val);
58 new_idx
59 }
60
61 pub fn gensym(&self, val: T) -> Name {
62 let mut vect = self.vect.borrow_mut();
63 let new_idx = (*vect).len() as Name;
64 // leave out of .map to avoid colliding
65 (*vect).push(val);
66 new_idx
67 }
68
69 pub fn get(&self, idx: Name) -> T {
70 let vect = self.vect.borrow();
71 (*(*vect).get(idx as uint)).clone()
72 }
73
74 pub fn len(&self) -> uint {
75 let vect = self.vect.borrow();
76 (*vect).len()
77 }
78
79 pub fn find_equiv<Q:Hash + Equiv<T>>(&self, val: &Q) -> Option<Name> {
80 let map = self.map.borrow();
81 match (*map).find_equiv(val) {
82 Some(v) => Some(*v),
83 None => None,
84 }
85 }
86
87 pub fn clear(&self) {
88 *self.map.borrow_mut() = HashMap::new();
89 *self.vect.borrow_mut() = Vec::new();
90 }
91 }
92
93 #[deriving(Clone, Eq, Hash, Ord)]
94 pub struct RcStr {
95 string: Rc<StrBuf>,
96 }
97
98 impl TotalEq for RcStr {}
99
100 impl TotalOrd for RcStr {
101 fn cmp(&self, other: &RcStr) -> Ordering {
102 self.as_slice().cmp(&other.as_slice())
103 }
104 }
105
106 impl Str for RcStr {
107 #[inline]
108 fn as_slice<'a>(&'a self) -> &'a str {
109 let s: &'a str = self.string.as_slice();
110 s
111 }
112 }
113
114 impl fmt::Show for RcStr {
115 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
116 use std::fmt::Show;
117 self.as_slice().fmt(f)
118 }
119 }
120
121 impl RcStr {
122 pub fn new(string: &str) -> RcStr {
123 RcStr {
124 string: Rc::new(string.to_strbuf()),
125 }
126 }
127 }
128
129 // A StrInterner differs from Interner<String> in that it accepts
130 // &str rather than RcStr, resulting in less allocation.
131 pub struct StrInterner {
132 map: RefCell<HashMap<RcStr, Name>>,
133 vect: RefCell<Vec<RcStr> >,
134 }
135
136 // when traits can extend traits, we should extend index<Name,T> to get []
137 impl StrInterner {
138 pub fn new() -> StrInterner {
139 StrInterner {
140 map: RefCell::new(HashMap::new()),
141 vect: RefCell::new(Vec::new()),
142 }
143 }
144
145 pub fn prefill(init: &[&str]) -> StrInterner {
146 let rv = StrInterner::new();
147 for &v in init.iter() { rv.intern(v); }
148 rv
149 }
150
151 pub fn intern(&self, val: &str) -> Name {
152 let mut map = self.map.borrow_mut();
153 match map.find_equiv(&val) {
154 Some(&idx) => return idx,
155 None => (),
156 }
157
158 let new_idx = self.len() as Name;
159 let val = RcStr::new(val);
160 map.insert(val.clone(), new_idx);
161 self.vect.borrow_mut().push(val);
162 new_idx
163 }
164
165 pub fn gensym(&self, val: &str) -> Name {
166 let new_idx = self.len() as Name;
167 // leave out of .map to avoid colliding
168 self.vect.borrow_mut().push(RcStr::new(val));
169 new_idx
170 }
171
172 // I want these gensyms to share name pointers
173 // with existing entries. This would be automatic,
174 // except that the existing gensym creates its
175 // own managed ptr using to_managed. I think that
176 // adding this utility function is the most
177 // lightweight way to get what I want, though not
178 // necessarily the cleanest.
179
180 // create a gensym with the same name as an existing
181 // entry.
182 pub fn gensym_copy(&self, idx : Name) -> Name {
183 let new_idx = self.len() as Name;
184 // leave out of map to avoid colliding
185 let mut vect = self.vect.borrow_mut();
186 let existing = (*vect.get(idx as uint)).clone();
187 vect.push(existing);
188 new_idx
189 }
190
191 pub fn get(&self, idx: Name) -> RcStr {
192 (*self.vect.borrow().get(idx as uint)).clone()
193 }
194
195 /// Returns this string with lifetime tied to the interner. Since
196 /// strings may never be removed from the interner, this is safe.
197 pub fn get_ref<'a>(&'a self, idx: Name) -> &'a str {
198 let vect = self.vect.borrow();
199 let s: &str = vect.get(idx as uint).as_slice();
200 unsafe {
201 cast::transmute(s)
202 }
203 }
204
205 pub fn len(&self) -> uint {
206 self.vect.borrow().len()
207 }
208
209 pub fn find_equiv<Q:Hash + Equiv<RcStr>>(&self, val: &Q) -> Option<Name> {
210 match (*self.map.borrow()).find_equiv(val) {
211 Some(v) => Some(*v),
212 None => None,
213 }
214 }
215
216 pub fn clear(&self) {
217 *self.map.borrow_mut() = HashMap::new();
218 *self.vect.borrow_mut() = Vec::new();
219 }
220 }
221
222 #[cfg(test)]
223 mod tests {
224 use super::*;
225 #[test]
226 #[should_fail]
227 fn i1 () {
228 let i : Interner<RcStr> = Interner::new();
229 i.get(13);
230 }
231
232 #[test]
233 fn interner_tests () {
234 let i : Interner<RcStr> = Interner::new();
235 // first one is zero:
236 assert_eq!(i.intern(RcStr::new("dog")), 0);
237 // re-use gets the same entry:
238 assert_eq!(i.intern(RcStr::new("dog")), 0);
239 // different string gets a different #:
240 assert_eq!(i.intern(RcStr::new("cat")), 1);
241 assert_eq!(i.intern(RcStr::new("cat")), 1);
242 // dog is still at zero
243 assert_eq!(i.intern(RcStr::new("dog")), 0);
244 // gensym gets 3
245 assert_eq!(i.gensym(RcStr::new("zebra") ), 2);
246 // gensym of same string gets new number :
247 assert_eq!(i.gensym (RcStr::new("zebra") ), 3);
248 // gensym of *existing* string gets new number:
249 assert_eq!(i.gensym(RcStr::new("dog")), 4);
250 assert_eq!(i.get(0), RcStr::new("dog"));
251 assert_eq!(i.get(1), RcStr::new("cat"));
252 assert_eq!(i.get(2), RcStr::new("zebra"));
253 assert_eq!(i.get(3), RcStr::new("zebra"));
254 assert_eq!(i.get(4), RcStr::new("dog"));
255 }
256
257 #[test]
258 fn i3 () {
259 let i : Interner<RcStr> = Interner::prefill([
260 RcStr::new("Alan"),
261 RcStr::new("Bob"),
262 RcStr::new("Carol")
263 ]);
264 assert_eq!(i.get(0), RcStr::new("Alan"));
265 assert_eq!(i.get(1), RcStr::new("Bob"));
266 assert_eq!(i.get(2), RcStr::new("Carol"));
267 assert_eq!(i.intern(RcStr::new("Bob")), 1);
268 }
269
270 #[test]
271 fn string_interner_tests() {
272 let i : StrInterner = StrInterner::new();
273 // first one is zero:
274 assert_eq!(i.intern("dog"), 0);
275 // re-use gets the same entry:
276 assert_eq!(i.intern ("dog"), 0);
277 // different string gets a different #:
278 assert_eq!(i.intern("cat"), 1);
279 assert_eq!(i.intern("cat"), 1);
280 // dog is still at zero
281 assert_eq!(i.intern("dog"), 0);
282 // gensym gets 3
283 assert_eq!(i.gensym("zebra"), 2);
284 // gensym of same string gets new number :
285 assert_eq!(i.gensym("zebra"), 3);
286 // gensym of *existing* string gets new number:
287 assert_eq!(i.gensym("dog"), 4);
288 // gensym tests again with gensym_copy:
289 assert_eq!(i.gensym_copy(2), 5);
290 assert_eq!(i.get(5), RcStr::new("zebra"));
291 assert_eq!(i.gensym_copy(2), 6);
292 assert_eq!(i.get(6), RcStr::new("zebra"));
293 assert_eq!(i.get(0), RcStr::new("dog"));
294 assert_eq!(i.get(1), RcStr::new("cat"));
295 assert_eq!(i.get(2), RcStr::new("zebra"));
296 assert_eq!(i.get(3), RcStr::new("zebra"));
297 assert_eq!(i.get(4), RcStr::new("dog"));
298 }
299 }