1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36: #define LIBGAUCHE_BODY
37: #include "gauche.h"
38: #include "gauche/class.h"
39:
40:
41:
42:
43:
44:
45:
46: typedef struct EntryRec {
47: intptr_t key;
48: intptr_t value;
49: struct EntryRec *next;
50: u_long hashval;
51: } Entry;
52:
53: #define BUCKETS(hc) ((Entry**)hc->buckets)
54:
55: #define DEFAULT_NUM_BUCKETS 4
56: #define MAX_AVG_CHAIN_LIMITS 3
57: #define EXTEND_BITS 2
58:
59:
60:
61: #define HASHMASK 0xffffffffUL
62:
63: typedef Entry *SearchProc(ScmHashCore *core, intptr_t key, ScmDictOp op);
64:
65: static unsigned int round2up(unsigned int val);
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85: #define STRING_HASH(hv, chars, size) \
86: do { \
87: int i_ = (size); \
88: (hv) = 0; \
89: while (i_-- > 0) { \
90: (hv) = ((hv)<<5) - (hv) + ((unsigned char)*chars++); \
91: } \
92: } while (0)
93:
94:
95:
96:
97:
98:
99: #define SMALL_INT_HASH(result, val) \
100: (result) = ((val)*2654435761UL)
101:
102: #define ADDRESS_HASH(result, val) \
103: (result) = (u_long)((SCM_WORD(val) >> 3)*2654435761UL)
104:
105:
106:
107:
108:
109: #define HASH2INDEX(tabsiz, bits, hashval) \
110: (((hashval)+((hashval)>>(32-(bits)))) & ((tabsiz) - 1))
111:
112:
113: #define COMBINE(hv1, hv2) ((hv1)*5+(hv2))
114:
115: u_long Scm_EqHash(ScmObj obj)
116: {
117: u_long hashval;
118: ADDRESS_HASH(hashval, obj);
119: return hashval&HASHMASK;
120: }
121:
122: u_long Scm_EqvHash(ScmObj obj)
123: {
124: u_long hashval;
125: if (SCM_NUMBERP(obj)) {
126: if (SCM_INTP(obj)) {
127: SMALL_INT_HASH(hashval, SCM_INT_VALUE(obj));
128: } else if (SCM_BIGNUMP(obj)) {
129: u_int i;
130: u_long u = 0;
131: for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
132: u += SCM_BIGNUM(obj)->values[i];
133: }
134: SMALL_INT_HASH(hashval, u);
135: } else if (SCM_FLONUMP(obj)) {
136:
137: hashval = (u_long)(SCM_FLONUM_VALUE(obj)*2654435761UL);
138: } else if (SCM_RATNUMP(obj)) {
139:
140:
141: u_long h1 = Scm_EqvHash(SCM_RATNUM_NUMER(obj));
142: u_long h2 = Scm_EqvHash(SCM_RATNUM_DENOM(obj));
143: hashval = COMBINE(h1, h2);
144: } else {
145:
146: hashval = (u_long)((SCM_COMPNUM_REAL(obj)+SCM_COMPNUM_IMAG(obj))*2654435761UL);
147: }
148: } else {
149: ADDRESS_HASH(hashval, obj);
150: }
151: return hashval&HASHMASK;
152: }
153:
154:
155: u_long Scm_Hash(ScmObj obj)
156: {
157: u_long hashval;
158: if (!SCM_PTRP(obj)) {
159: SMALL_INT_HASH(hashval, (u_long)SCM_WORD(obj));
160: return hashval;
161: } else if (SCM_NUMBERP(obj)) {
162: return Scm_EqvHash(obj);
163: } else if (SCM_STRINGP(obj)) {
164: goto string_hash;
165: } else if (SCM_PAIRP(obj)) {
166: u_long h = 0, h2;
167: ScmObj cp;
168: SCM_FOR_EACH(cp, obj) {
169: h2 = Scm_Hash(SCM_CAR(cp));
170: h = COMBINE(h, h2);
171: }
172: h2 = Scm_Hash(cp);
173: h = COMBINE(h, h2);
174: return h;
175: } else if (SCM_VECTORP(obj)) {
176: int i, siz = SCM_VECTOR_SIZE(obj);
177: u_long h = 0, h2;
178: for (i=0; i<siz; i++) {
179: h2 = Scm_Hash(SCM_VECTOR_ELEMENT(obj, i));
180: h = COMBINE(h, h2);
181: }
182: return h;
183: } else if (SCM_SYMBOLP(obj)) {
184: obj = SCM_OBJ(SCM_SYMBOL_NAME(obj));
185: goto string_hash;
186: } else if (SCM_KEYWORDP(obj)) {
187: obj = SCM_OBJ(SCM_KEYWORD_NAME(obj));
188: goto string_hash;
189: } else {
190:
191: ScmObj r = Scm_ApplyRec(SCM_OBJ(&Scm_GenericObjectHash),
192: SCM_LIST1(obj));
193: if (SCM_INTP(r)) {
194: return (u_long)SCM_INT_VALUE(r);
195: }
196: if (SCM_BIGNUMP(r)) {
197:
198:
199: return SCM_BIGNUM(r)->values[0];
200: }
201: Scm_Error("object-hash returned non-integer: %S", r);
202: return 0;
203: }
204: string_hash:
205: {
206: const char *p;
207: const ScmStringBody *b = SCM_STRING_BODY(obj);
208: p = SCM_STRING_BODY_START(b);
209: STRING_HASH(hashval, p, SCM_STRING_BODY_SIZE(b));
210: return hashval;
211: }
212: }
213:
214: u_long Scm_HashString(ScmString *str, u_long modulo)
215: {
216: u_long hashval;
217: const char *p;
218: const ScmStringBody *b = SCM_STRING_BODY(str);
219: p = SCM_STRING_BODY_START(b);
220: STRING_HASH(hashval, p, SCM_STRING_BODY_SIZE(b));
221: return (hashval % modulo);
222: }
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245:
246:
247:
248:
249:
250:
251:
252:
253:
254:
255:
256: static Entry *insert_entry(ScmHashCore *table,
257: intptr_t key,
258: u_long hashval,
259: int index)
260: {
261: Entry *e = SCM_NEW(Entry);
262: Entry **buckets = BUCKETS(table);
263: e->key = key;
264: e->value = 0;
265: e->next = buckets[index];
266: e->hashval = hashval;
267: buckets[index] = e;
268: table->numEntries++;
269:
270: if (table->numEntries > table->numBuckets*MAX_AVG_CHAIN_LIMITS) {
271:
272: Entry **newb, *f;
273: ScmHashIter iter;
274: int i, newsize = (table->numBuckets << EXTEND_BITS);
275: int newbits = table->numBucketsLog2 + EXTEND_BITS;
276:
277: newb = SCM_NEW_ARRAY(Entry*, newsize);
278: for (i=0; i<newsize; i++) newb[i] = NULL;
279:
280: Scm_HashIterInit(&iter, table);
281: while ((f = (Entry*)Scm_HashIterNext(&iter)) != NULL) {
282: index = HASH2INDEX(newsize, newbits, f->hashval);
283: f->next = newb[index];
284: newb[index] = f;
285: }
286: table->numBuckets = newsize;
287: table->numBucketsLog2 = newbits;
288: table->buckets = (void**)newb;
289: }
290: return e;
291: }
292:
293:
294:
295:
296:
297:
298: static Entry *delete_entry(ScmHashCore *table,
299: Entry *entry, Entry *prev,
300: int index)
301: {
302: if (prev) prev->next = entry->next;
303: else table->buckets[index] = (void*)entry->next;
304: table->numEntries--;
305: SCM_ASSERT(table->numEntries >= 0);
306: entry->next = NULL;
307: return entry;
308: }
309:
310: #define FOUND(table, op, e, p, index) \
311: do { \
312: switch (op) { \
313: case SCM_DICT_GET:; \
314: case SCM_DICT_CREATE:; \
315: return e; \
316: case SCM_DICT_DELETE:; \
317: return delete_entry(table, e, p, index); \
318: } \
319: } while (0)
320:
321: #define NOTFOUND(table, op, key, hashval, index) \
322: do { \
323: if (op == SCM_DICT_CREATE) { \
324: return insert_entry(table, key, hashval, index); \
325: } else { \
326: return NULL; \
327: } \
328: } while (0)
329:
330:
331:
332:
333: static Entry *address_access(ScmHashCore *table,
334: intptr_t key,
335: ScmDictOp op)
336: {
337: u_long hashval, index;
338: Entry *e, *p, **buckets = (Entry**)table->buckets;
339:
340: ADDRESS_HASH(hashval, key);
341: index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
342:
343: for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
344: if (e->key == key) FOUND(table, op, e, p, index);
345: }
346: NOTFOUND(table, op, key, hashval, index);
347: }
348:
349: static u_long address_hash(const ScmHashCore *ht, intptr_t obj)
350: {
351: u_long hashval;
352: ADDRESS_HASH(hashval, obj);
353: return hashval;
354: }
355:
356: static int address_cmp(const ScmHashCore *ht, intptr_t key, intptr_t k2)
357: {
358: return (key == k2);
359: }
360:
361:
362:
363:
364:
365: static u_long eqv_hash(const ScmHashCore *table, intptr_t key)
366: {
367: return Scm_EqvHash(SCM_OBJ(key));
368: }
369:
370: static int eqv_cmp(const ScmHashCore *table, intptr_t key, intptr_t k2)
371: {
372: return Scm_EqvP(SCM_OBJ(key), SCM_OBJ(k2));
373: }
374:
375: static u_long equal_hash(const ScmHashCore *table, intptr_t key)
376: {
377: return Scm_Hash(SCM_OBJ(key));
378: }
379:
380: static int equal_cmp(const ScmHashCore *table, intptr_t key, intptr_t k2)
381: {
382: return Scm_EqualP(SCM_OBJ(key), SCM_OBJ(k2));
383: }
384:
385:
386:
387:
388:
389: static Entry *string_access(ScmHashCore *table, intptr_t k, ScmDictOp op)
390: {
391: u_long hashval, index;
392: int size;
393: const char *s;
394: ScmObj key = SCM_OBJ(k);
395: Entry *e, *p, **buckets;
396: const ScmStringBody *keyb;
397:
398: if (!SCM_STRINGP(key)) {
399: Scm_Error("Got non-string key %S to the string hashtable.", key);
400: }
401: keyb = SCM_STRING_BODY(key);
402: s = SCM_STRING_BODY_START(keyb);
403: size = SCM_STRING_BODY_SIZE(keyb);
404: STRING_HASH(hashval, s, size);
405: index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
406: buckets = (Entry**)table->buckets;
407:
408: for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
409: ScmObj ee = SCM_OBJ(e->key);
410: const ScmStringBody *eeb = SCM_STRING_BODY(ee);
411: int eesize = SCM_STRING_BODY_SIZE(eeb);
412: if (size == eesize
413: && memcmp(SCM_STRING_BODY_START(keyb),
414: SCM_STRING_BODY_START(eeb), eesize) == 0){
415: FOUND(table, op, e, p, index);
416: }
417: }
418: NOTFOUND(table, op, k, hashval, index);
419: }
420:
421: static u_long string_hash(const ScmHashCore *table, intptr_t key)
422: {
423: u_long hashval;
424: const char *p;
425: const ScmStringBody *b = SCM_STRING_BODY(key);
426: p = SCM_STRING_BODY_START(b);
427: STRING_HASH(hashval, p, SCM_STRING_BODY_SIZE(b));
428: return hashval;
429: }
430:
431: static int string_cmp(const ScmHashCore *table, intptr_t k1, intptr_t k2)
432: {
433: const ScmStringBody *b1 = SCM_STRING_BODY(k1);
434: const ScmStringBody *b2 = SCM_STRING_BODY(k2);
435: return ((SCM_STRING_BODY_SIZE(b1) == SCM_STRING_BODY_SIZE(b2))
436: && (memcmp(SCM_STRING_BODY_START(b1),
437: SCM_STRING_BODY_START(b2),
438: SCM_STRING_BODY_SIZE(b1)) == 0));
439: }
440:
441:
442:
443:
444:
445: static u_long multiword_hash(const ScmHashCore *table, intptr_t key)
446: {
447: ScmWord keysize = (ScmWord)table->data;
448: ScmWord *keyarray = (ScmWord*)key;
449: u_long h = 0, h1;
450: int i;
451: for (i=0; i<keysize; i++) {
452: ADDRESS_HASH(h1, keyarray[i]);
453: h = COMBINE(h, h1);
454: }
455: return h;
456: }
457:
458: #if 0
459: static Entry *multiword_access(ScmHashCore *table, intptr_t k, ScmDictOp op)
460: {
461: u_long hashval, index;
462: ScmWord keysize = (ScmWord)table->data;
463: Entry *e, *p, **buckets;
464:
465: hashval = multiword_hash(table, k);
466: index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
467: buckets = (Entry**)table->buckets;
468:
469: for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
470: if (memcmp((void*)k, (void*)e->key, keysize*sizeof(ScmWord)) == 0)
471: FOUND(table, op, e, p, index);
472: }
473: NOTFOUND(table, op, k, hashval, index);
474: }
475: #endif
476:
477:
478:
479:
480:
481:
482: static Entry *general_access(ScmHashCore *table, intptr_t key, ScmDictOp op)
483: {
484: u_long hashval, index;
485: Entry *e, *p, **buckets;
486:
487: hashval = table->hashfn(table, key);
488: index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
489: buckets = (Entry**)table->buckets;
490:
491: for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
492: if (table->cmpfn(table, key, e->key)) FOUND(table, op, e, p, index);
493: }
494: NOTFOUND(table, op, key, hashval, index);
495: }
496:
497:
498:
499:
500:
501: static void hash_core_init(ScmHashCore *table,
502: SearchProc *accessfn,
503: ScmHashProc *hashfn,
504: ScmHashCompareProc *cmpfn,
505: unsigned int initSize,
506: void *data)
507: {
508: Entry **b;
509: u_int i;
510:
511: if (initSize != 0) initSize = round2up(initSize);
512: else initSize = DEFAULT_NUM_BUCKETS;
513:
514: b = SCM_NEW_ARRAY(Entry*, initSize);
515: table->buckets = (void**)b;
516: table->numBuckets = initSize;
517: table->numEntries = 0;
518: table->accessfn = (void*)accessfn;
519: table->hashfn = hashfn;
520: table->cmpfn = cmpfn;
521: table->data = data;
522: for (i=initSize, table->numBucketsLog2=0; i > 1; i /= 2) {
523: table->numBucketsLog2++;
524: }
525: for (i=0; i<initSize; i++) table->buckets[i] = NULL;
526: }
527:
528:
529: int hash_core_predef_procs(ScmHashType type,
530: SearchProc **accessfn,
531: ScmHashProc **hashfn,
532: ScmHashCompareProc **cmpfn)
533: {
534: switch (type) {
535: case SCM_HASH_EQ:
536: case SCM_HASH_WORD:
537: *accessfn = address_access;
538: *hashfn = address_hash;
539: *cmpfn = address_cmp;
540: return TRUE;
541: case SCM_HASH_EQV:
542: *accessfn = general_access;
543: *hashfn = eqv_hash;
544: *cmpfn = eqv_cmp;
545: return TRUE;
546: case SCM_HASH_EQUAL:
547: *accessfn = general_access;
548: *hashfn = equal_hash;
549: *cmpfn = equal_cmp;
550: return TRUE;
551: case SCM_HASH_STRING:
552: *accessfn = string_access;
553: *hashfn = string_hash;
554: *cmpfn = string_cmp;
555: return TRUE;
556: default:
557: return FALSE;
558: }
559: }
560:
561: void Scm_HashCoreInitSimple(ScmHashCore *core,
562: