ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GHash.hh
Revision: 174
Committed: Wed Feb 15 21:04:38 2012 UTC (12 years, 6 months ago) by gpertea
File size: 16845 byte(s)
Log Message:
wip fqtrim

Line File contents
1 /********************************************************************************
2 * Hash table class template (char* based) *
3 *********************************************************************************/
4
5 #ifndef GHash_HH
6 #define GHash_HH
7 #include "GBase.h"
8 /**
9 * This class maintains a fast-access hash table of entities
10 * indexed by a character string (essentially, maps strings to pointers)
11 */
12
13 typedef struct {
14 char* key; // Key string
15 bool keyalloc; //shared key flag (to not free the key chars)
16 int hash; // Hash value of key
17 pointer data; // Data
18 bool mark; // Entry is marked
19 } GHashEntry;
20
21 template <class OBJ> class GHash {
22 protected:
23 GHashEntry* hash; // Hash
24 int fCapacity; // table size
25 int fCount; // number of valid entries
26 int fCurrentEntry;
27 char* lastkeyptr; //pointer to last key string added
28 //---------- Raw data retrieval (including empty entries
29 // Return key at position pos.
30 const char* Key(uint pos) const { return hash[pos].key; }
31 // return data OBJ* at given position
32 OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
33 // Return mark flag of entry at position pos.
34 bool Mark(uint pos) const { return hash[pos].mark; }
35 // Return position of first filled slot, or >= fCapacity
36 int First() const;
37 // Return position of last filled slot or -1
38 int Last() const;
39 // Return position of next filled slot in hash table
40 // or a value greater than or equal to fCapacity if no filled
41 // slot was found
42 int Next(int pos) const;
43 //Return position of previous filled slot in hash table
44 //or a -1 if no filled slot was found
45 int Prev(int pos) const;
46
47 private:
48 GHash(const GHash&);
49 GHash &operator=(const GHash&);
50 GFreeProc* fFreeProc; //procedure to free item data
51 protected:
52 public:
53 static void DefaultFreeProc(pointer item) {
54 delete (OBJ*)item;
55 item=NULL;
56 }
57 public:
58 GHash(GFreeProc* freeProc); // constructs of an empty hash
59 GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
60 void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
61 void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
62 int Capacity() const { return fCapacity; } // table's size, including the empty slots.
63 void Resize(int m); // Resize the table to the given size.
64 int Count() const { return fCount; }// the total number of entries in the table.
65 // Insert a new entry into the table given key and mark.
66 // If there is already an entry with that key, leave it unchanged,
67 const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
68 //same as Add, but the key pointer is stored directly, no string duplicate
69 //is made (shared-key-Add)
70 const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
71
72 // Replace data at key, if the entry's mark is less than
73 // or equal to the given mark. If there was no existing entry,
74 // a new entry is inserted with the given mark.
75 OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
76 // Remove a given key and its data
77 OBJ* Remove(const char* ky);
78 // Find data OBJ* given key.
79 OBJ* Find(const char* ky, char** keyptr=NULL);
80 bool hasKey(const char* ky);
81 char* getLastKey() { return lastkeyptr; }
82 OBJ* operator[](const char* ky) { return Find(ky); }
83 void startIterate(); //iterator-like initialization
84 char* NextKey(); //returns next valid key in the table (NULL if no more)
85 OBJ* NextData(); //returns next valid hash[].data
86 OBJ* NextData(char*& nextkey); //returns next valid hash[].data
87 //or NULL if no more
88 //nextkey is SET to the corresponding key
89 GHashEntry* NextEntry(); //returns a pointer to a GHashEntry
90
91 /// Clear all entries
92 void Clear();
93
94 /// Destructor
95 virtual ~GHash();
96 };
97 //
98 //======================== method definitions ========================
99 //
100 /*
101 Notes:
102 - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
103 GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
104 - Since the algorithm doubles the table size when exceeding MAX_LOAD,
105 it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
106 otherwise, the algorithm might hip-hop between halving and doubling,
107 which would be quite expensive!!
108 - Not many people seem to know that hash tables don't have to be prime
109 numbers; in fact, a table size of 2**n and odd probe distance are very
110 easy to arrange, and this works just as well!
111 - We store the hash key, so that 99.999% of the time we can compare hash numbers;
112 only when hash numbers match do we need to compare keys.
113 Thus, with a good hash function, the fCount of calls to strcmp() should be
114 roughly the same as the fCount of successful lookups.
115 - The hash table should NEVER get full, or stuff will loop forever!!
116 */
117
118 // Initial table size (MUST be power of 2)
119 #define DEF_HASH_SIZE 32
120 // Maximum hash table load factor (%)
121 #define MAX_LOAD 80
122 // Minimum hash table load factor (%)
123 #define MIN_LOAD 10
124 // Probe Position [0..n-1]
125 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
126 // Probe Distance [1..n-1]
127 #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
128
129 #define FREEDATA (fFreeProc!=NULL)
130
131 /*******************************************************************************/
132 // Construct empty hash
133 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
134 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
135 fCurrentEntry=-1;
136 fFreeProc=freeProc;
137 lastkeyptr=NULL;
138 for (uint i=0; i<DEF_HASH_SIZE; i++)
139 hash[i].hash=-1; //this will be an indicator for 'empty' entries
140 fCapacity=DEF_HASH_SIZE;
141 fCount=0;
142 }
143
144 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
145 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
146 fCurrentEntry=-1;
147 lastkeyptr=NULL;
148 fFreeProc = (doFree)?&DefaultFreeProc : NULL;
149 for (uint i=0; i<DEF_HASH_SIZE; i++)
150 hash[i].hash=-1; //this will be an indicator for 'empty' entries
151 fCapacity=DEF_HASH_SIZE;
152 fCount=0;
153 }
154
155
156 // Resize table
157 template <class OBJ> void GHash<OBJ>::Resize(int m){
158 register int i,n,p,x,h;
159 GHashEntry *k;
160 GASSERT(fCount<=fCapacity);
161 if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
162 n=fCapacity;
163 while((n>>2)>m) n>>=1; // Shrink until n/4 <= m
164 while((n>>1)<m) n<<=1; // Grow until m <= n/2
165 GASSERT(m<=(n>>1));
166 GASSERT(DEF_HASH_SIZE<=n);
167 if(n!=fCapacity){
168 GASSERT(m<=n);
169 GMALLOC(k, sizeof(GHashEntry)*n);
170 for(i=0; i<n; i++) k[i].hash=-1;
171 for(i=0; i<fCapacity; i++){
172 h=hash[i].hash;
173 if(0<=h){
174 p=HASH1(h,n);
175 GASSERT(0<=p && p<n);
176 x=HASH2(h,n);
177 GASSERT(1<=x && x<n);
178 while(k[p].hash!=-1) p=(p+x)%n;
179 GASSERT(k[p].hash<0);
180 k[p]=hash[i];
181 }
182 }
183 GFREE(hash);
184 hash=k;
185 fCapacity=n;
186 }
187 }
188
189 // add a new entry, or update it if it already exists
190 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
191 const OBJ* pdata,bool mrk){
192 register int p,i,x,h,n;
193 if(!ky) GError("GHash::insert: NULL key argument.\n");
194 GASSERT(fCount<fCapacity);
195 h=strhash(ky);
196 GASSERT(0<=h);
197 p=HASH1(h,fCapacity);
198 GASSERT(0<=p && p<fCapacity);
199 x=HASH2(h,fCapacity);
200 GASSERT(1<=x && x<fCapacity);
201 i=-1;
202 n=fCapacity;
203 while(n && hash[p].hash!=-1){
204 if ((i==-1)&&(hash[p].hash==-2)) i=p;
205 if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
206 //replace hash data for this key!
207 lastkeyptr=hash[p].key;
208 hash[p].data = (void*) pdata;
209 return (OBJ*)hash[p].data;
210 }
211 p=(p+x)%fCapacity;
212 n--;
213 }
214 if(i==-1) i=p;
215 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
216 //GMessage("GHash::insert: key=\"%s\"\n",ky);
217 GASSERT(0<=i && i<fCapacity);
218 GASSERT(hash[i].hash<0);
219 hash[i].hash=h;
220 hash[i].mark=mrk;
221 hash[i].key=Gstrdup(ky);
222 hash[i].keyalloc=true;
223 lastkeyptr=hash[i].key;
224 hash[i].data= (void*) pdata;
225 fCount++;
226 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
227 GASSERT(fCount<fCapacity);
228 return pdata;
229 }
230
231 template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
232 const OBJ* pdata,bool mrk){
233 register int p,i,x,h,n;
234 if(!ky) GError("GHash::insert: NULL key argument.\n");
235 GASSERT(fCount<fCapacity);
236 h=strhash(ky);
237 GASSERT(0<=h);
238 p=HASH1(h,fCapacity);
239 GASSERT(0<=p && p<fCapacity);
240 x=HASH2(h,fCapacity);
241 GASSERT(1<=x && x<fCapacity);
242 i=-1;
243 n=fCapacity;
244 while(n && hash[p].hash!=-1){
245 if((i==-1)&&(hash[p].hash==-2)) i=p;
246 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
247 //replace hash data for this key!
248 lastkeyptr=hash[p].key;
249 hash[p].data = (void*) pdata;
250 return (OBJ*)hash[p].data;
251 }
252 p=(p+x)%fCapacity;
253 n--;
254 }
255 if(i==-1) i=p;
256 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
257 //GMessage("GHash::insert: key=\"%s\"\n",ky);
258 GASSERT(0<=i && i<fCapacity);
259 GASSERT(hash[i].hash<0);
260 hash[i].hash=h;
261 hash[i].mark=mrk;
262 hash[i].key=(char *)ky;
263 lastkeyptr=hash[i].key;
264 hash[i].keyalloc=false;
265 hash[i].data= (void*) pdata;
266 fCount++;
267 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
268 GASSERT(fCount<fCapacity);
269 return pdata;
270 }
271
272
273 // Add or replace entry
274 template <class OBJ> OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
275 register int p,i,x,h,n;
276 if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
277 GASSERT(fCount<fCapacity);
278 h=strhash(ky);
279 GASSERT(0<=h);
280 p=HASH1(h,fCapacity);
281 GASSERT(0<=p && p<fCapacity);
282 x=HASH2(h,fCapacity);
283 GASSERT(1<=x && x<fCapacity);
284 i=-1;
285 n=fCapacity;
286 while(n && hash[p].hash!=-1){
287 if((i==-1)&&(hash[p].hash==-2)) i=p;
288 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
289 if(hash[p].mark<=mrk){
290 GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
291 if (FREEDATA) (*fFreeProc)(hash[p].data);
292 hash[p].mark=mrk;
293 hash[p].data=pdata;
294 }
295 return hash[p].data;
296 }
297 p=(p+x)%fCapacity;
298 n--;
299 }
300 if(i==-1) i=p;
301 GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
302 GASSERT(0<=i && i<fCapacity);
303 GASSERT(hash[i].hash<0);
304 hash[i].hash=h;
305 hash[i].mark=mrk;
306 hash[i].key=Gstrdup(ky);
307 hash[i].data=pdata;
308 fCount++;
309 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
310 GASSERT(fCount<fCapacity);
311 return pdata;
312 }
313
314
315 // Remove entry
316 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
317 register int p,x,h,n;
318 if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
319 if(0<fCount){
320 h=strhash(ky);
321 GASSERT(0<=h);
322 p=HASH1(h,fCapacity);
323 GASSERT(0<=p && p<fCapacity);
324 x=HASH2(h,fCapacity);
325 GASSERT(1<=x && x<fCapacity);
326 GASSERT(fCount<fCapacity);
327 n=fCapacity;
328 while(n && hash[p].hash!=-1){
329 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
330 GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
331 hash[p].hash=-2;
332 hash[p].mark=false;
333 if (hash[p].keyalloc) GFREE((hash[p].key));
334 if (FREEDATA) (*fFreeProc)(hash[p].data);
335 hash[p].key=NULL;
336 hash[p].data=NULL;
337 fCount--;
338 if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
339 GASSERT(fCount<fCapacity);
340 return NULL;
341 }
342 p=(p+x)%fCapacity;
343 n--;
344 }
345 }
346 return NULL;
347 }
348
349
350 // Find entry
351 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
352 register int p,x,h,n;
353 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
354 if(0<fCount){
355 h=strhash(ky);
356 GASSERT(0<=h);
357 p=HASH1(h,fCapacity);
358 GASSERT(0<=p && p<fCapacity);
359 x=HASH2(h,fCapacity);
360 GASSERT(1<=x && x<fCapacity);
361 GASSERT(fCount<fCapacity);
362 n=fCapacity;
363 while(n && hash[p].hash!=-1){
364 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
365 return true;
366 }
367 p=(p+x)%fCapacity;
368 n--;
369 }
370 }
371 return false;
372 }
373
374 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
375 register int p,x,h,n;
376 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
377 if(0<fCount){
378 h=strhash(ky);
379 GASSERT(0<=h);
380 p=HASH1(h,fCapacity);
381 GASSERT(0<=p && p<fCapacity);
382 x=HASH2(h,fCapacity);
383 GASSERT(1<=x && x<fCapacity);
384 GASSERT(fCount<fCapacity);
385 n=fCapacity;
386 while(n && hash[p].hash!=-1){
387 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
388 if (keyptr!=NULL) *keyptr = hash[p].key;
389 return (OBJ*)hash[p].data;
390 }
391 p=(p+x)%fCapacity;
392 n--;
393 }
394 }
395 return NULL;
396 }
397
398
399 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
400 fCurrentEntry=0;
401 }
402
403 template <class OBJ> char* GHash<OBJ>::NextKey() {
404 register int pos=fCurrentEntry;
405 while (pos<fCapacity && hash[pos].hash<0) pos++;
406 if (pos==fCapacity) {
407 fCurrentEntry=fCapacity;
408 return NULL;
409 }
410 else {
411 fCurrentEntry=pos+1;
412 return hash[pos].key;
413 }
414 }
415
416 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
417 register int pos=fCurrentEntry;
418 while (pos<fCapacity && hash[pos].hash<0) pos++;
419 if (pos==fCapacity) {
420 fCurrentEntry=fCapacity;
421 return NULL;
422 }
423 else {
424 fCurrentEntry=pos+1;
425 return (OBJ*)hash[pos].data;
426 }
427
428 }
429
430 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
431 register int pos=fCurrentEntry;
432 while (pos<fCapacity && hash[pos].hash<0) pos++;
433 if (pos==fCapacity) {
434 fCurrentEntry=fCapacity;
435 nextkey=NULL;
436 return NULL;
437 }
438 else {
439 fCurrentEntry=pos+1;
440 nextkey=hash[pos].key;
441 return (OBJ*)hash[pos].data;
442 }
443
444 }
445
446 template <class OBJ> GHashEntry* GHash<OBJ>::NextEntry() {
447 register int pos=fCurrentEntry;
448 while (pos<fCapacity && hash[pos].hash<0) pos++;
449 if (pos==fCapacity) {
450 fCurrentEntry=fCapacity;
451 return NULL;
452 }
453 else {
454 fCurrentEntry=pos+1;
455 return &hash[pos];
456 }
457 }
458
459
460 // Get first non-empty entry
461 template <class OBJ> int GHash<OBJ>::First() const {
462 register int pos=0;
463 while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
464 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
465 return pos;
466 }
467
468 // Get last non-empty entry
469 template <class OBJ> int GHash<OBJ>::Last() const {
470 register int pos=fCapacity-1;
471 while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
472 GASSERT(pos<0 || 0<=hash[pos].hash);
473 return pos;
474 }
475
476
477 // Find next valid entry
478 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
479 GASSERT(0<=pos && pos<fCapacity);
480 while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
481 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
482 return pos;
483 }
484
485
486 // Find previous valid entry
487 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
488 GASSERT(0<=pos && pos<fCapacity);
489 while(--pos >= 0){ if(0<=hash[pos].hash) break; }
490 GASSERT(pos<0 || 0<=hash[pos].hash);
491 return pos;
492 }
493
494
495 // Remove all
496 template <class OBJ> void GHash<OBJ>::Clear(){
497 register int i;
498 for(i=0; i<fCapacity; i++){
499 if(hash[i].hash>=0){
500 if (hash[i].keyalloc) GFREE((hash[i].key));
501 if (FREEDATA)
502 (*fFreeProc)(hash[i].data);
503 }
504 }
505 GFREE(hash);
506 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
507 //reinitialize it
508 for (i=0; i<DEF_HASH_SIZE; i++)
509 hash[i].hash=-1; //this will be an indicator for 'empty' entries
510 fCapacity=DEF_HASH_SIZE;
511 fCount=0;
512 }
513
514
515 // Save data
516 /*
517 void GHash::Save(Stream& store) const {
518 Object::save(store);
519 store << fCapacity;
520 store << fCount;
521 for(int i=0; i<fCapacity; i++){
522 store << hash[i].hash;
523 if(hash[i].hash>=0){
524 uint len=strlen(hash[i].key);
525 store << len;
526 store << hash[i].mark;
527 store.save(hash[i].key,len);
528 }
529 }
530 }
531
532
533 // Load data
534 void GHash::Load(Stream& store){
535 Object::load(store);
536 store >> fCapacity;
537 store >> fCount;
538 for(int i=0; i<fCapacity; i++){
539 store >> hash[i].hash;
540 if(hash[i].hash>=0){
541 uint len;
542 store >> len;
543 store >> hash[i].mark;
544 GMALLOC(hash[i].key,len+1);
545 store.load(hash[i].key,len);
546 hash[i].key[len]='\0';
547 }
548 }
549 }
550 */
551
552 // Destroy table
553 template <class OBJ> GHash<OBJ>::~GHash(){
554 register int i;
555 for(i=0; i<fCapacity; i++){
556 if(hash[i].hash>=0){
557 if (hash[i].keyalloc) GFREE((hash[i].key));
558 if (FREEDATA) (*fFreeProc)(hash[i].data);
559 }
560 }
561 GFREE(hash);
562 }
563
564 #endif