Mercurial > projects > aid
view trunk/mintl/hashaa.d @ 1:5dd9f598bcd8
Update
author | revcompgeek |
---|---|
date | Sat, 29 Mar 2008 12:30:20 -0600 |
parents | |
children |
line wrap: on
line source
/** \file hashaa.d * \brief A hash-based associative array that maintains elements in insertion order * * Written by Ben Hinkle and released to the public domain, as * explained at http://creativecommons.org/licenses/publicdomain * Email comments and bug reports to ben.hinkle@gmail.com * * revision 2.7.1 */ module mintl.hashaa; //debug = dHashAA; // can also pass at command line private { import mintl.share; import mintl.sorting; import mintl.mem; } /** \class HashAA * \brief A hash-based associative array traversed in insertion order. * * A HashAA!(Key,Value) represents an associative array with keys of * type Key and values of type Value that maintains the inserted items * in a linked list sorted by insertion order. If <tt>key1</tt> is * inserted into the array before <tt>key2</tt> then <tt>key1</tt> * will appear before <tt>key2</tt> in foreach statements and in * iterator traversals. * * The optional ReadOnly parameter HashAA!(Key,Value,ReadOnly) forbids * operations that modify the container. The readonly() property returns * a ReadOnly view of the container. * * The optional allocator parameter ArrayList!(Value,false,Allocator) is used * to allocate and free memory. The GC is the default allocator. */ struct HashAA(Key,Value, bit ReadOnly = false, Alloc = GCAllocator) { alias HashAA ContainerType; alias HashAA SliceType; alias Value ValueType; alias Key IndexType; alias Key SortType; alias ReadOnly isReadOnly; /** Get the kays in the array. The operation is O(n) where n is the number of * elements in the array. */ Key[] keys() { Key[] res; if (head_ is null) return res; res.length = length; size_t n = 0; foreach(Key k,Value v;*this) { res[n++] = k; } return res; } /** Get the values in the array. The operation is O(n) where n is * the number of elements in the array. */ Value[] values() { Value[] res; if (head_ is null) return res; res.length = length; size_t n = 0; foreach(Key k,Value v;*this) { res[n++] = v; } return res; } /** Property for the default value of the array when a key is missing. */ void missing(Value val) { if (data.length == 0) initDataArray(); data[0].val = val; } Value missing() { if (data.length == 0) return Value.init; return data[0].val; } /** Length of array. The operation is O(n) where n is the number of * elements in the array. */ size_t length() { if (head_ is null) return 0; if (head_.prev is null && tail_.next is null) return dlength(); Node* t = head_; int n = 1; while (t !is null && t !is tail_) { t = t.next; ++n; } return n; } /** Return true if array is empty. */ bool isEmpty() { return head_ is null; } private void initDataArray() { size_t s = prime_list[0]+1; static if (is(Alloc == GCAllocator)) { data.length = s; } else { Node** p = cast(Node**)Alloc.malloc((Node*).sizeof*s); data = p[0 .. s]; } data[0] = allocNode; data[0].len = 0; } private enum {InsertOnMiss, ThrowOnMiss, NullOnMiss} // helper functions for indexing. private Node** getNode(Key key, int failureAction) { Node* t; TypeInfo ti = typeid(Key); uint hash = ti.getHash(&key); if (data.length == 0) { switch (failureAction) { case InsertOnMiss: initDataArray(); break; case ThrowOnMiss: GetActionThrow: throw new IndexOutOfBoundsException("Key not in container"); case NullOnMiss: return null; } } uint i = (hash % (data.length-1))+1; Node**p = &data[i]; while (*p !is null) { if ((*p).hash == hash && ti.equals(&(*p).key,&key)) { // found key return p; } p = &(*p).nextHash; } if (failureAction == ThrowOnMiss) { goto GetActionThrow; } else if (failureAction == NullOnMiss) { return null; } // lookup Node *p = t = allocNode(); t.hash = hash; t.key = key; if (head_ is null) { head_ = t; tail_ = t; } else { link(tail_,t); tail_ = t; } data[0].len++; static if (!ReadOnly) { if (data[0].len > .75*data.length) { this.rehash(); p = getNode(key,NullOnMiss); } } return p; } /** Find the element with a given key and return a pointer to the * value. If the key is not in the array null is returned or if * throwOnMiss is true an exception is thrown. The target array can * be a sub-array though the key may fall outside of the sub-array * range. */ Value* get(Key key, bool throwOnMiss = false) { Node** t = getNode(key,throwOnMiss ? ThrowOnMiss : NullOnMiss); if (t) return &(*t).val; else return null; } /** Create a sub-array from key a to b (exclusive). */ HashAA opSlice(Key a, Key b) { HashAA res; res.head_ = *getNode(a,ThrowOnMiss); res.tail_ = (*getNode(b,ThrowOnMiss)).prev; // will at least have a in there res.data = data; return res; } /** Create a sub-array from the first key in a to the last key in b (inclusive). */ HashAA opSlice(HashAA a, HashAA b) { HashAA res; res.head_ = a.head_; res.tail_ = b.tail_; res.data = data; return res; } /** Get a ReadOnly view of the container */ .HashAA!(Key,Value,true,Alloc) readonly() { .HashAA!(Key,Value,true,Alloc) res; res = *cast(typeof(&res))this; return res; } /** Get a read-write view of the container */ .HashAA!(Key,Value,false,Alloc) readwrite() { .HashAA!(Key,Value,false,Alloc) res; res = *cast(typeof(&res))this; return res; } static if (ReadOnly) { /** Duplicates the array. The operation is O(n) where n is length. */ /* private bug HashAA dup() { .HashAA!(Key,Value,false) res; res.data.length = data.length; res.data[0] = cast(res.Node*)allocNode; res.data[0].len = 0; res.missing = missing; foreach(Key k,Value v;*this) res[k] = v; return res.readonly; } */ } else { /** Clear all contents. */ void clear() { static if (is(Alloc == GCAllocator)) { } else { foreach ( Node* t; data) { while (t) { Node* next = t.nextHash; Alloc.gcFree(t); t = next; } } Alloc.free(data.ptr); } *this = HashAA.init; } /** Duplicates the array. The operation is O(n) where n is length. */ HashAA dup() { HashAA res; res.data.length = data.length; res.data[0] = allocNode; res.data[0].len = 0; res.missing = missing; foreach(Key k,Value v;*this) { res[k] = v; } return res; } /** Rehash the array. */ HashAA rehash() { uint k; uint len = data.length; if (len == 0) return *this; for (k=0;k<prime_list.length;k++) { if (prime_list[k] > len) break; } Node* n = data[0]; size_t s = prime_list[k]+1; static if (is(Alloc == GCAllocator)) { data = new Node*[s]; } else { Node** p = cast(Node**)Alloc.malloc((Node*).sizeof * s); data = p[0 .. s]; } data[0] = n; Node* t = head_; while (t) { uint i = (t.hash %(data.length-1))+1; t.nextHash = data[i]; data[i] = t; t = t.next; } return *this; } /** Find a key in the array and return a pointer to the associated value. * Insert the key and initialize with Value.init if the key is not * in the array. */ Value* put(Key key) { debug (dHashAA) printf("put %d\n",key); Node* t = *getNode(key, InsertOnMiss); return &t.val; } /** Store a value with a key, overwriting any previous value. The * target array can be a sub-array though the key may fall outside of the * sub-array range. */ void opIndexAssign(Value val, Key key) { Node* t = *getNode(key, InsertOnMiss); t.val = val; } // helper for remove/take private Node* takeHelper(Key key) { Node** p = getNode(key,NullOnMiss); if (!p) return null; Node* n = *p; if (n is head_) head_ = n.next; if (n is tail_) tail_ = n.prev; link(n.prev, n.next); *p = n.nextHash; return n; } /** Remove a key from the array. The target array can be a sub-array though * the key may fall outside of the sub-array range. */ void remove(Key key) { Node* n = takeHelper(key); if (n) { static if (is(Alloc == GCAllocator)) { static if (Node.sizeof < AllocBlockCutoff) { n.next = data[0].next; data[0].next = n; n.prev = null; n.val = Value.init; n.key = Key.init; } } else { Alloc.gcFree(n); } data[0].len--; } } /** Remove the value stored with the given key and return it, if present. * If not present return the missing default. */ Value take(Key key) { Node* n = takeHelper(key); if (!n) return missing; Value val = n.val; static if (is(Alloc == GCAllocator)) { static if (Node.sizeof <= AllocBlockCutoff) { n.next = data[0].next; data[0].next = n; n.val = Value.init; n.key = Key.init; n.prev = null; } } else { Alloc.gcFree(n); } data[0].len--; return val; } /** Remove a sub-array from the array. The operation is O(max(log(m),n)) * where m is the size of the target array and n is the number of * elements in the sub-array. */ void remove(HashAA subarray) { if (subarray.head_ is subarray.tail_) { remove(subarray.key); } else { Key[] keylist = subarray.keys; foreach(Key key;keylist) remove(key); } } mixin MAddAA!(HashAA); // mixin add function } // !ReadOnly private void link(Node* a, Node* b) { if (a) a.next = b; if (b) b.prev = a; } // remove extra capacity void trim() { if (data.length > 0 && data[0]) data[0].next = null; } // Parameters for controlling block allocations private const int AllocBlockSize = 10; // number of nodes in block private const int AllocBlockCutoff = 96; // max node size to allow blocks private Node* allocNode() { Node* p; static if (is(Alloc == GCAllocator)) { static if (Node.sizeof > AllocBlockCutoff) { return new Node; } else { if (data[0] is null) return new Node; p = data[0].next; if (p) { data[0].next = p.next; p.next = null; return p; } p = (new Node[AllocBlockSize]).ptr; for (int k=1;k<AllocBlockSize-1;k++) p[k].next = &p[k+1]; data[0].next = &p[1]; return &p[0]; } } else { p = cast(Node*)Alloc.gcMalloc(Node.sizeof); } return p; } /** Find the element with a given key and return the value. If the * key is not in the map the default for the array is returned. The * target array can be a sub-array though the key may fall outside * of the sub-array range. */ Value opIndex(Key key) { Value* t = get(key); if (t) return *t; else return missing; } /** Returns the value of a one-item array. */ Value value() { if (head_ is null && tail_ is null) return Value.init; return head_.val; } /** Returns the key of a one-item array. */ Key key() { if (head_ is null && tail_ is null) return Key.init; return head_.key; } /** Return a one-item slice of the head (oldest insertion). */ HashAA head() { HashAA res = *this; res.tail_ = res.head_; return res; } /** Return a one-item slice of the tail (more recent insertion). */ HashAA tail() { HashAA res = *this; res.head_ = res.tail_; return res; } /** Move a slice by n. By default moves to the next item. */ void next(int n = 1, int end = 0) { void doNext(inout Node* node, int m) { while (m--) node = node.next; } void doPrev(inout Node* node, int m) { while (m--) node = node.prev; } if (n > 0) { if (end >= 0) doNext(tail_,n); if (end <= 0) doNext(head_,n); } else { n = -n; if (end >= 0) doPrev(tail_,n); if (end <= 0) doPrev(head_,n); } } /** Test for equality of two arrays. The operation is O(n) where n * is length of the array. */ int opEquals(HashAA c) { Node* i = head_; Node* j = c.head_; Node* end = tail_; Node* cend = c.tail_; TypeInfo ti_k = typeid(Key); TypeInfo ti_v = typeid(Value); int do_test(Node*p1,Node*p2) { if (!ti_k.equals(&p1.key,&p2.key)) return 0; if (!ti_v.equals(&p1.val,&p2.val)) return 0; return 1; } while (i !is null && j !is null) { if (!do_test(i,j)) return 0; if (i is end || j is cend) { return (i is end && j is cend); } i = i.next; j = j.next; } return (i is null && j is null); } /** Test if a key is in the array. The target array can be a sub-array * but the key may fall outside of the sub-array range. */ bool contains(Key key) { return get(key) !is null; } /** Test if a key is in the array and set value if it is. */ bool contains(Key key,out Value value) { Value* node = get(key); if (node) value = *node; return node !is null; } /** Iterate over the array calling delegate to perform an action. A * one-element sub-array is passed to the delegate. */ int opApplyNoKeyStep(int delegate(inout Value val) dg, int step = 1) { int dg2(inout HashAA itr) { Value value = itr.value; return dg(value); } return opApplyIterStep(&dg2,step); } /** Iterate over the array calling delegate to perform an action. A * one-element sub-array is passed to the delegate. */ int opApplyWithKeyStep(int delegate(inout Key key, inout Value val) dg, int step = 1) { int dg2(inout HashAA itr) { Key key = itr.key; Value value = itr.value; return dg(key,value); } return opApplyIterStep(&dg2,step); } /** Iterate over the array calling delegate to perform an action. A * one-element sub-array is passed to the delegate. */ int opApplyIterStep(int delegate(inout HashAA itr) dg,int step=1) { int res = 0; HashAA itr; Node* x = step>0?head_:tail_; Node* end = step>0?tail_:head_; while (x !is null) { itr.head_ = itr.tail_ = x; res = dg(itr); if (res || x is end) return res; x = step>0?x.next:x.prev; } return res; } /** Iterate backwards over the array (from last to first key). The target * array can be a sub-array. This should only be called as the * iteration parameter in a <tt>foreach</tt> statement */ LAReverseIter!(Key,Value,ReadOnly,Alloc) backwards() { LAReverseIter!(Key,Value,ReadOnly,Alloc) res; res.list = this; return res; } /** Helper functions for opApply */ mixin MOpApplyImpl!(HashAA); alias opApplyNoKey opApply; alias opApplyWithKey opApply; alias opApplyIter opApply; Node* getHead(){return head_;} Node* getTail(){return tail_;} mixin MSequentialSort!(HashAA, getHead,getTail); void sort(int delegate(Key*a, Key*b) cmp = null) { Node* newhead, newtail; dosort(newhead,newtail,cmp); head_ = newhead; tail_ = newtail; } private { struct Node { Node* next, prev, nextHash; union { uint len; uint hash; } Key key; Value val; Key* sortLookup(){return &key;} } Node*[] data; Node* head_, tail_; } private uint dlength() { return data[0].len; } // size primes from aaA.d and planetmath.org private static uint[] prime_list = [97u, 389u, 1543u, 6151u, 24593u, 98317u, 393241u, 1572869u, 6291469u, 25165843u, 100663319u, 402653189u, 1610612741u, 4294967291u ]; } // internal helper struct for backwards iteration struct LAReverseIter(Key,Value,bit ReadOnly,Alloc) { mixin MReverseImpl!(HashAA!(Key,Value,ReadOnly,Alloc)); } //version = MinTLVerboseUnittest; //version = MinTLUnittest; version (MinTLUnittest) { private import std.random; unittest { version (MinTLVerboseUnittest) printf("starting mintl.hashaa unittest\n"); HashAA!(int,int) m; m[4] = 100; m[-10] = 200; m[17] = 300; assert( m.length == 3 ); assert( m[-10] == 200 ); HashAA!(int,int) mm; mm.add(4,100, -10,200, 17,300); assert( m == mm ); // test foreach int[] res; res.length = 3; int n=0; foreach(int val; m) { res[n++] = val; } assert( res[0] == 100 ); assert( res[1] == 200 ); assert( res[2] == 300 ); // test removing an item m.remove(-10); n = 0; foreach(int val; m) { res[n++] = val; } assert( res[0] == 100 ); assert( res[1] == 300 ); // test assigning to an item already in array m[22] = 400; m[17] = 500; n = 0; foreach(int val; m) { res[n++] = val; } assert( res[0] == 100 ); assert( res[1] == 500 ); assert( res[2] == 400 ); // test backwards foreach n = 0; foreach(int k,int val; m.backwards()) { res[n++] = val; } assert( res[0] == 400 ); assert( res[1] == 500 ); assert( res[2] == 100 ); // test slicing HashAA!(int, int) m2 = HashAA!(int,int).make(400,4,100,1,500,5,300,3, 200,2,600,6); HashAA!(int, int) m3; m3 = m2[500 .. 600]; assert( m3.length == 3 ); n = 0; foreach(int k,int val; m3) { res[n++] = val; } assert( res[0] == 5 ); assert( res[1] == 3 ); assert( res[2] == 2 ); // test keys int[] keys = m3.keys; assert( keys[0] == 500 ); assert( keys[1] == 300 ); assert( keys[2] == 200 ); // test rehash for (int k=0; k<1000; k++) m3[k] = k; HashAA!(int,int) m4 = m3.rehash; assert( m4 == m3 ); // test simple sorting HashAA!(int,int) s1,s12; s1.add(40,1,300,2,-20,3,100,4,400,5,200,6); s12 = s1.dup; s1.sort(); assert( s1 == HashAA!(int,int).make(-20,3,40,1,100,4,200,6,300,2,400,5) ); // sort a slice in-place HashAA!(int,int) slice1 = s12[300 .. 200]; slice1.sort(); assert( s12 == HashAA!(int,int).make(40,1,-20,3,100,4,300,2,400,5,200,6)); // test a large sort with default order HashAA!(double,int) s3; for (int k=0;k<1000;k++) { s3[1.0*rand()/100000.0 - 500000.0] = k; } HashAA!(double,int) s4 = s3.dup; s3.sort(); double[] keys2 = s3.keys; for (int k=0;k<999;k++) { assert( keys2[k] <= keys2[k+1] ); } // test a large sort with custom order int cmp(double*x,double*y){return *x>*y?-1:*x==*y?0:1;} s4.sort(&cmp); keys2 = s4.keys; for (int k=0;k<999;k++) { assert( keys2[k] >= keys2[k+1] ); } version (MinTLVerboseUnittest) printf("finished mintl.hashaa unittest\n"); } }