From bc0b4f0d2a610059afb95ef0360704714815187d Mon Sep 17 00:00:00 2001 From: Ken Thompson Date: Thu, 13 Nov 2008 10:35:44 -0800 Subject: [PATCH] mike's map code R=r OCL=19146 CL=19146 --- src/runtime/Makefile | 4 +- src/runtime/hashmap.c | 861 ++++++++++++++++++++++++++++++++++++++++++ src/runtime/hashmap.h | 160 ++++++++ src/runtime/map.c | 252 ------------- src/runtime/runtime.c | 44 ++- src/runtime/runtime.h | 1 + 6 files changed, 1059 insertions(+), 263 deletions(-) create mode 100644 src/runtime/hashmap.c create mode 100644 src/runtime/hashmap.h delete mode 100644 src/runtime/map.c diff --git a/src/runtime/Makefile b/src/runtime/Makefile index b9e5770103..df33b58015 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -18,7 +18,7 @@ LIBOFILES=\ rt2_$(GOARCH).$O\ sys_$(GOARCH)_$(GOOS).$O\ runtime.$O\ - map.$O\ + hashmap.$O\ chan.$O\ iface.$O\ array.$O\ @@ -30,7 +30,7 @@ LIBOFILES=\ OFILES=$(RT0OFILES) $(LIBOFILES) OS_H=$(GOARCH)_$(GOOS).h -HFILES=runtime.h $(OS_H_) +HFILES=runtime.h hashmap.h $(OS_H_) install: rt0 $(LIB) runtime.acid cp $(RT0OFILES) $(GOROOT)/lib diff --git a/src/runtime/hashmap.c b/src/runtime/hashmap.c new file mode 100644 index 0000000000..1a8e68c39c --- /dev/null +++ b/src/runtime/hashmap.c @@ -0,0 +1,861 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "hashmap.h" + +/* Return a pointer to the struct/union of type "type" + whose "field" field is addressed by pointer "p". */ + +struct hash { /* a hash table; initialize with hash_init() */ + uint32 count; /* elements in table - must be first */ + + uint8 datasize; /* amount of data to store in entry */ + uint8 max_power; /* max power of 2 to create sub-tables */ + uint8 max_probes; /* max entries to probe before rehashing */ + int32 changes; /* inc'ed whenever a subtable is created/grown */ + hash_hash_t (*data_hash) (uint32, void *a); /* return hash of *a */ + uint32 (*data_eq) (uint32, void *a, void *b); /* return whether *a == *b */ + void (*data_del) (uint32, void *arg, void *data); /* invoked on deletion */ + struct hash_subtable *st; /* first-level table */ + + uint32 keysize; + uint32 valsize; + uint32 ko; + uint32 vo; + uint32 po; + Alg* keyalg; + Alg* valalg; +}; + +struct hash_entry { + hash_hash_t hash; /* hash value of data */ + byte data[1]; /* user data has "datasize" bytes */ +}; + +struct hash_subtable { + uint8 power; /* bits used to index this table */ + uint8 used; /* bits in hash used before reaching this table */ + uint8 datasize; /* bytes of client data in an entry */ + uint8 max_probes; /* max number of probes when searching */ + int16 limit_bytes; /* max_probes * (datasize+sizeof (hash_hash_t)) */ + struct hash_entry *end; /* points just past end of entry[] */ + struct hash_entry entry[1]; /* 2**power+max_probes-1 elements of elemsize bytes */ +}; + +#define HASH_DATA_EQ(h,x,y) ((*h->data_eq) (h->keysize, (x), (y))) + +#define HASH_REHASH 0x2 /* an internal flag */ +/* the number of bits used is stored in the flags word too */ +#define HASH_USED(x) ((x) >> 2) +#define HASH_MAKE_USED(x) ((x) << 2) + +#define HASH_LOW 6 +#define HASH_ONE (((hash_hash_t)1) << HASH_LOW) +#define HASH_MASK (HASH_ONE - 1) +#define HASH_ADJUST(x) (((x) < HASH_ONE) << HASH_LOW) + +#define HASH_BITS (sizeof (hash_hash_t) * 8) + +#define HASH_SUBHASH HASH_MASK +#define HASH_NIL 0 +#define HASH_NIL_MEMSET 0 + +#define HASH_OFFSET(base, byte_offset) \ + ((struct hash_entry *) (((byte *) (base)) + (byte_offset))) + + +/* return a hash layer with 2**power empty entries */ +static struct hash_subtable * +hash_subtable_new (struct hash *h, int32 power, int32 used) +{ + int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + int32 bytes = elemsize << power; + struct hash_subtable *st; + int32 limit_bytes = h->max_probes * elemsize; + int32 max_probes = h->max_probes; + + if (bytes < limit_bytes) { + limit_bytes = bytes; + max_probes = 1 << power; + } + bytes += limit_bytes - elemsize; + st = malloc (offsetof (struct hash_subtable, entry[0]) + bytes); + st->power = power; + st->used = used; + st->datasize = h->datasize; + st->max_probes = max_probes; + st->limit_bytes = limit_bytes; + st->end = HASH_OFFSET (st->entry, bytes); + memset (st->entry, HASH_NIL_MEMSET, bytes); + return (st); +} + +static void +init_sizes (int64 hint, int32 *init_power, int32 *max_power) +{ + int32 log = 0; + int32 i; + + for (i = 32; i != 0; i >>= 1) { + if ((hint >> (log + i)) != 0) { + log += i; + } + } + log += 1 + (((hint << 3) >> log) >= 11); /* round up for utilization */ + if (log <= 14) { + *init_power = log; + } else { + *init_power = 12; + } + *max_power = 12; +} + +static void +hash_init (struct hash *h, + int32 datasize, + hash_hash_t (*data_hash) (uint32, void *), + uint32 (*data_eq) (uint32, void *, void *), + void (*data_del) (uint32, void *, void *), + int64 hint) +{ + int32 init_power; + int32 max_power; + + if(datasize < sizeof (void *)) + datasize = sizeof (void *); + init_sizes (hint, &init_power, &max_power); + h->datasize = datasize; + h->max_power = max_power; + h->max_probes = 15; + assert (h->datasize == datasize); + assert (h->max_power == max_power); + assert (sizeof (void *) <= h->datasize || h->max_power == 255); + h->count = 0; + h->changes = 0; + h->data_hash = data_hash; + h->data_eq = data_eq; + h->data_del = data_del; + h->st = hash_subtable_new (h, init_power, 0); +} + +static void +hash_remove_n (struct hash_subtable *st, struct hash_entry *dst_e, int32 n) +{ + int32 elemsize = st->datasize + offsetof (struct hash_entry, data[0]); + struct hash_entry *src_e = HASH_OFFSET (dst_e, n * elemsize); + struct hash_entry *end_e = st->end; + int32 shift = HASH_BITS - (st->power + st->used); + int32 index_mask = (((hash_hash_t)1) << st->power) - 1; + int32 dst_i = (((byte *) dst_e) - ((byte *) st->entry)) / elemsize; + int32 src_i = dst_i + n; + hash_hash_t hash; + int32 skip; + int32 bytes; + + while (dst_e != src_e) { + if (src_e != end_e) { + struct hash_entry *cp_e = src_e; + int32 save_dst_i = dst_i; + while (cp_e != end_e && (hash = cp_e->hash) != HASH_NIL && + ((hash >> shift) & index_mask) <= dst_i) { + cp_e = HASH_OFFSET (cp_e, elemsize); + dst_i++; + } + bytes = ((byte *) cp_e) - (byte *) src_e; + memmove (dst_e, src_e, bytes); + dst_e = HASH_OFFSET (dst_e, bytes); + src_e = cp_e; + src_i += dst_i - save_dst_i; + if (src_e != end_e && (hash = src_e->hash) != HASH_NIL) { + skip = ((hash >> shift) & index_mask) - dst_i; + } else { + skip = src_i - dst_i; + } + } else { + skip = src_i - dst_i; + } + bytes = skip * elemsize; + memset (dst_e, HASH_NIL_MEMSET, bytes); + dst_e = HASH_OFFSET (dst_e, bytes); + dst_i += skip; + } +} + +static int32 +hash_insert_internal (struct hash_subtable **pst, int32 flags, hash_hash_t hash, + struct hash *h, void *data, void **pres); + +static void +hash_conv (struct hash *h, + struct hash_subtable *st, int32 flags, + hash_hash_t hash, + struct hash_entry *e) +{ + int32 new_flags = (flags + HASH_MAKE_USED (st->power)) | HASH_REHASH; + int32 shift = HASH_BITS - HASH_USED (new_flags); + hash_hash_t prefix_mask = (-(hash_hash_t)1) << shift; + int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + void *dummy_result; + struct hash_entry *de; + int32 index_mask = (1 << st->power) - 1; + hash_hash_t e_hash; + struct hash_entry *pe = HASH_OFFSET (e, -elemsize); + + while (e != st->entry && (e_hash = pe->hash) != HASH_NIL && (e_hash & HASH_MASK) != HASH_SUBHASH) { + e = pe; + pe = HASH_OFFSET (pe, -elemsize); + } + + de = e; + while (e != st->end && + (e_hash = e->hash) != HASH_NIL && + (e_hash & HASH_MASK) != HASH_SUBHASH) { + struct hash_entry *target_e = HASH_OFFSET (st->entry, ((e_hash >> shift) & index_mask) * elemsize); + struct hash_entry *ne = HASH_OFFSET (e, elemsize); + hash_hash_t current = e_hash & prefix_mask; + if (de < target_e) { + memset (de, HASH_NIL_MEMSET, ((byte *) target_e) - (byte *) de); + de = target_e; + } + if ((hash & prefix_mask) == current || + (ne != st->end && (e_hash = ne->hash) != HASH_NIL && + (e_hash & prefix_mask) == current)) { + struct hash_subtable *new_st = hash_subtable_new (h, 1, HASH_USED (new_flags)); + int32 rc = hash_insert_internal (&new_st, new_flags, e->hash, h, e->data, &dummy_result); + assert (rc == 0); + memcpy(dummy_result, e->data, h->datasize); + e = ne; + while (e != st->end && (e_hash = e->hash) != HASH_NIL && (e_hash & prefix_mask) == current) { + assert ((e_hash & HASH_MASK) != HASH_SUBHASH); + rc = hash_insert_internal (&new_st, new_flags, e_hash, h, e->data, &dummy_result); + assert (rc == 0); + memcpy(dummy_result, e->data, h->datasize); + e = HASH_OFFSET (e, elemsize); + } + memset (de->data, HASH_NIL_MEMSET, h->datasize); + *(struct hash_subtable **)de->data = new_st; + de->hash = current | HASH_SUBHASH; + } else { + if (e != de) { + memcpy (de, e, elemsize); + } + e = HASH_OFFSET (e, elemsize); + } + de = HASH_OFFSET (de, elemsize); + } + if (e != de) { + hash_remove_n (st, de, (((byte *) e) - (byte *) de) / elemsize); + } +} + +static void +hash_grow (struct hash *h, struct hash_subtable **pst, int32 flags) +{ + struct hash_subtable *old_st = *pst; + int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + *pst = hash_subtable_new (h, old_st->power + 1, HASH_USED (flags)); + struct hash_entry *end_e = old_st->end; + struct hash_entry *e; + void *dummy_result; + int32 used = 0; + + flags |= HASH_REHASH; + for (e = old_st->entry; e != end_e; e = HASH_OFFSET (e, elemsize)) { + hash_hash_t hash = e->hash; + if (hash != HASH_NIL) { + int32 rc = hash_insert_internal (pst, flags, e->hash, h, e->data, &dummy_result); + assert (rc == 0); + memcpy(dummy_result, e->data, h->datasize); + used++; + } + } + free (old_st); +} + +int32 +hash_lookup (struct hash *h, void *data, void **pres) +{ + int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + hash_hash_t hash = (*h->data_hash) (h->keysize, data) & ~HASH_MASK; + struct hash_subtable *st = h->st; + int32 used = 0; + hash_hash_t e_hash; + struct hash_entry *e; + struct hash_entry *end_e; + + hash += HASH_ADJUST (hash); + for (;;) { + int32 shift = HASH_BITS - (st->power + used); + int32 index_mask = (1 << st->power) - 1; + int32 i = (hash >> shift) & index_mask; /* i is the natural position of hash */ + + e = HASH_OFFSET (st->entry, i * elemsize); /* e points to element i */ + e_hash = e->hash; + if ((e_hash & HASH_MASK) != HASH_SUBHASH) { /* a subtable */ + break; + } + used += st->power; + st = *(struct hash_subtable **)e->data; + } + end_e = HASH_OFFSET (e, st->limit_bytes); + while (e != end_e && (e_hash = e->hash) != HASH_NIL && e_hash < hash) { + e = HASH_OFFSET (e, elemsize); + } + while (e != end_e && ((e_hash = e->hash) ^ hash) < HASH_SUBHASH) { + if (HASH_DATA_EQ (h, data, e->data)) { /* a match */ + *pres = e->data; + return (1); + } + e = HASH_OFFSET (e, elemsize); + } + USED(e_hash); + *pres = 0; + return (0); +} + +int32 +hash_remove (struct hash *h, void *data, void *arg) +{ + int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + hash_hash_t hash = (*h->data_hash) (h->keysize, data) & ~HASH_MASK; + struct hash_subtable *st = h->st; + int32 used = 0; + hash_hash_t e_hash; + struct hash_entry *e; + struct hash_entry *end_e; + + hash += HASH_ADJUST (hash); + for (;;) { + int32 shift = HASH_BITS - (st->power + used); + int32 index_mask = (1 << st->power) - 1; + int32 i = (hash >> shift) & index_mask; /* i is the natural position of hash */ + + e = HASH_OFFSET (st->entry, i * elemsize); /* e points to element i */ + e_hash = e->hash; + if ((e_hash & HASH_MASK) != HASH_SUBHASH) { /* a subtable */ + break; + } + used += st->power; + st = *(struct hash_subtable **)e->data; + } + end_e = HASH_OFFSET (e, st->limit_bytes); + while (e != end_e && (e_hash = e->hash) != HASH_NIL && e_hash < hash) { + e = HASH_OFFSET (e, elemsize); + } + while (e != end_e && ((e_hash = e->hash) ^ hash) < HASH_SUBHASH) { + if (HASH_DATA_EQ (h, data, e->data)) { /* a match */ + (*h->data_del) (h->keysize, arg, e->data); + hash_remove_n (st, e, 1); + h->count--; + return (1); + } + e = HASH_OFFSET (e, elemsize); + } + USED(e_hash); + return (0); +} + +static int32 +hash_insert_internal (struct hash_subtable **pst, int32 flags, hash_hash_t hash, + struct hash *h, void *data, void **pres) +{ + int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + + if ((flags & HASH_REHASH) == 0) { + hash += HASH_ADJUST (hash); + hash &= ~HASH_MASK; + } + for (;;) { + struct hash_subtable *st = *pst; + int32 shift = HASH_BITS - (st->power + HASH_USED (flags)); + int32 index_mask = (1 << st->power) - 1; + int32 i = (hash >> shift) & index_mask; /* i is the natural position of hash */ + struct hash_entry *start_e = + HASH_OFFSET (st->entry, i * elemsize); /* start_e is the pointer to element i */ + struct hash_entry *e = start_e; /* e is going to range over [start_e, end_e) */ + struct hash_entry *end_e; + hash_hash_t e_hash = e->hash; + + if ((e_hash & HASH_MASK) == HASH_SUBHASH) { /* a subtable */ + pst = (struct hash_subtable **) e->data; + flags += HASH_MAKE_USED (st->power); + continue; + } + end_e = HASH_OFFSET (start_e, st->limit_bytes); + while (e != end_e && (e_hash = e->hash) != HASH_NIL && e_hash < hash) { + e = HASH_OFFSET (e, elemsize); + i++; + } + if (e != end_e && e_hash != HASH_NIL) { + /* ins_e ranges over the elements that may match */ + struct hash_entry *ins_e = e; + int32 ins_i = i; + hash_hash_t ins_e_hash; + while (ins_e != end_e && ((e_hash = ins_e->hash) ^ hash) < HASH_SUBHASH) { + if (HASH_DATA_EQ (h, data, ins_e->data)) { /* a match */ + *pres = ins_e->data; + return (1); + } + assert (e_hash != hash || (flags & HASH_REHASH) == 0); + hash += (e_hash == hash); /* adjust hash if it collides */ + ins_e = HASH_OFFSET (ins_e, elemsize); + ins_i++; + if (e_hash <= hash) { /* set e to insertion point */ + e = ins_e; + i = ins_i; + } + } + /* set ins_e to the insertion point for the new element */ + ins_e = e; + ins_i = i; + ins_e_hash = 0; + /* move ins_e to point at the end of the contiguous block, but + stop if any element can't be moved by one up */ + while (ins_e != st->end && (ins_e_hash = ins_e->hash) != HASH_NIL && + ins_i + 1 - ((ins_e_hash >> shift) & index_mask) < st->max_probes && + (ins_e_hash & HASH_MASK) != HASH_SUBHASH) { + ins_e = HASH_OFFSET (ins_e, elemsize); + ins_i++; + } + if (e == end_e || ins_e == st->end || ins_e_hash != HASH_NIL) { + e = end_e; /* can't insert; must grow or convert to subtable */ + } else { /* make space for element */ + memmove (HASH_OFFSET (e, elemsize), e, ((byte *) ins_e) - (byte *) e); + } + } + if (e != end_e) { + e->hash = hash; + *pres = e->data; + return (0); + } + h->changes++; + if (st->power < h->max_power) { + hash_grow (h, pst, flags); + } else { + hash_conv (h, st, flags, hash, start_e); + } + } +} + +int32 +hash_insert (struct hash *h, void *data, void **pres) +{ + int32 rc = hash_insert_internal (&h->st, 0, (*h->data_hash) (h->keysize, data), h, data, pres); + + h->count += (rc == 0); /* increment count if element didn't previously exist */ + return (rc); +} + +uint32 +hash_count (struct hash *h) +{ + return (h->count); +} + +static void +iter_restart (struct hash_iter *it, struct hash_subtable *st, int32 used) +{ + int32 elemsize = it->elemsize; + hash_hash_t last_hash = it->last_hash; + struct hash_entry *e; + hash_hash_t e_hash; + struct hash_iter_sub *sub = &it->subtable_state[it->i]; + struct hash_entry *end; + + for (;;) { + int32 shift = HASH_BITS - (st->power + used); + int32 index_mask = (1 << st->power) - 1; + int32 i = (last_hash >> shift) & index_mask; + + end = st->end; + e = HASH_OFFSET (st->entry, i * elemsize); + sub->start = st->entry; + sub->end = end; + + if ((e->hash & HASH_MASK) != HASH_SUBHASH) { + break; + } + sub->e = HASH_OFFSET (e, elemsize); + sub = &it->subtable_state[++(it->i)]; + used += st->power; + st = *(struct hash_subtable **)e->data; + } + while (e != end && ((e_hash = e->hash) == HASH_NIL || e_hash <= last_hash)) { + e = HASH_OFFSET (e, elemsize); + } + sub->e = e; +} + +void * +hash_next (struct hash_iter *it) +{ + int32 elemsize = it->elemsize; + struct hash_iter_sub *sub = &it->subtable_state[it->i]; + struct hash_entry *e = sub->e; + struct hash_entry *end = sub->end; + hash_hash_t e_hash = 0; + + if (it->changes != it->h->changes) { /* hash table's structure changed; recompute */ + it->changes = it->h->changes; + it->i = 0; + iter_restart (it, it->h->st, 0); + sub = &it->subtable_state[it->i]; + e = sub->e; + end = sub->end; + } + if (e != sub->start && it->last_hash != HASH_OFFSET (e, -elemsize)->hash) { + struct hash_entry *start = HASH_OFFSET (e, -(elemsize * it->h->max_probes)); + struct hash_entry *pe = HASH_OFFSET (e, -elemsize); + hash_hash_t last_hash = it->last_hash; + if (start < sub->start) { + start = sub->start; + } + while (e != start && ((e_hash = pe->hash) == HASH_NIL || last_hash < e_hash)) { + e = pe; + pe = HASH_OFFSET (pe, -elemsize); + } + while (e != end && ((e_hash = e->hash) == HASH_NIL || e_hash <= last_hash)) { + e = HASH_OFFSET (e, elemsize); + } + } + + for (;;) { + while (e != end && (e_hash = e->hash) == HASH_NIL) { + e = HASH_OFFSET (e, elemsize); + } + if (e == end) { + if (it->i == 0) { + it->last_hash = HASH_OFFSET (e, -elemsize)->hash; + sub->e = e; + return (0); + } else { + it->i--; + sub = &it->subtable_state[it->i]; + e = sub->e; + end = sub->end; + } + } else if ((e_hash & HASH_MASK) != HASH_SUBHASH) { + it->last_hash = e->hash; + sub->e = HASH_OFFSET (e, elemsize); + return (e->data); + } else { + struct hash_subtable *st = + *(struct hash_subtable **)e->data; + sub->e = HASH_OFFSET (e, elemsize); + it->i++; + assert (it->i < sizeof (it->subtable_state) / + sizeof (it->subtable_state[0])); + sub = &it->subtable_state[it->i]; + sub->e = e = st->entry; + sub->start = st->entry; + sub->end = end = st->end; + } + } +} + +void +hash_iter_init (struct hash *h, struct hash_iter *it) +{ + it->elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + it->changes = h->changes; + it->i = 0; + it->h = h; + it->last_hash = 0; + it->subtable_state[0].e = h->st->entry; + it->subtable_state[0].start = h->st->entry; + it->subtable_state[0].end = h->st->end; +} + +static void +clean_st (struct hash_subtable *st, int32 *slots, int32 *used) +{ + int32 elemsize = st->datasize + offsetof (struct hash_entry, data[0]); + struct hash_entry *e = st->entry; + struct hash_entry *end = st->end; + int32 lslots = (((byte *) end) - (byte *) e) / elemsize; + int32 lused = 0; + + while (e != end) { + hash_hash_t hash = e->hash; + if ((hash & HASH_MASK) == HASH_SUBHASH) { + clean_st (*(struct hash_subtable **)e->data, slots, used); + } else { + lused += (hash != HASH_NIL); + } + e = HASH_OFFSET (e, elemsize); + } + free (st); + *slots += lslots; + *used += lused; +} + +void +hash_destroy (struct hash *h) +{ + int32 slots = 0; + int32 used = 0; + + clean_st (h->st, &slots, &used); + free (h); +} + +static void +hash_visit_internal (struct hash_subtable *st, + int32 used, int32 level, + void (*data_visit) (void *arg, int32 level, void *data), + void *arg) +{ + int32 elemsize = st->datasize + offsetof (struct hash_entry, data[0]); + struct hash_entry *e = st->entry; + int32 shift = HASH_BITS - (used + st->power); + int32 i = 0; + + while (e != st->end) { + int32 index = ((e->hash >> (shift - 1)) >> 1) & ((1 << st->power) - 1); + if ((e->hash & HASH_MASK) == HASH_SUBHASH) { + (*data_visit) (arg, level, e->data); + hash_visit_internal (*(struct hash_subtable **)e->data, + used + st->power, level + 1, data_visit, arg); + } else { + (*data_visit) (arg, level, e->data); + } + if (e->hash != HASH_NIL) { + assert (i < index + st->max_probes); + assert (index <= i); + } + e = HASH_OFFSET (e, elemsize); + i++; + } +} + +void +hash_visit (struct hash *h, void (*data_visit) (void *arg, int32 level, void *data), void *arg) +{ + hash_visit_internal (h->st, 0, 0, data_visit, arg); +} + +// +/// interfaces to go runtime +// + +static void +donothing(uint32 s, void *a, void *b) +{ + USED(s); + USED(a); + USED(b); +} + +typedef struct hash Hmap; +static int32 debug = 0; + +// newmap(keysize uint32, valsize uint32, +// keyalg uint32, valalg uint32, +// hint uint32) (hmap *map[any]any); +void +sys·newmap(uint32 keysize, uint32 valsize, + uint32 keyalg, uint32 valalg, uint32 hint, + Hmap* ret) +{ + Hmap *h; + + if(keyalg >= 3 || + valalg >= 3) { + prints("0<="); + sys·printint(keyalg); + prints("<"); + sys·printint(nelem(algarray)); + prints("\n0<="); + sys·printint(valalg); + prints("<"); + sys·printint(nelem(algarray)); + prints("\n"); + + throw("sys·newmap: key/val algorithm out of range"); + } + + h = mal(sizeof(*h)); + hash_init(h, keysize+valsize, + algarray[keyalg].hash, + algarray[keyalg].equal, + donothing, + hint); + + h->keysize = keysize; + h->valsize = valsize; + h->keyalg = &algarray[keyalg]; + h->valalg = &algarray[valalg]; + + // these calculations are compiler dependent + h->ko = rnd(sizeof(h), keysize); + h->vo = rnd(h->ko+keysize, valsize); + h->po = rnd(h->vo+valsize, 1); + + ret = h; + FLUSH(&ret); + + if(debug) { + prints("newmap: map="); + sys·printpointer(h); + prints("; keysize="); + sys·printint(keysize); + prints("; valsize="); + sys·printint(valsize); + prints("; keyalg="); + sys·printint(keyalg); + prints("; valalg="); + sys·printint(valalg); + prints("; ko="); + sys·printint(h->ko); + prints("; vo="); + sys·printint(h->vo); + prints("; po="); + sys·printint(h->po); + prints("\n"); + } +} + +// mapaccess1(hmap *map[any]any, key any) (val any); +void +sys·mapaccess1(Hmap *h, ...) +{ + byte *ak, *av; + byte *res; + int32 hit; + + ak = (byte*)&h + h->ko; + av = (byte*)&h + h->vo; + + res = nil; + hit = hash_lookup(h, ak, (void**)&res); + if(!hit) + throw("sys·mapaccess1: key not in map"); + h->valalg->copy(h->valsize, av, res+h->keysize); + + if(debug) { + prints("sys·mapaccess1: map="); + sys·printpointer(h); + prints("; key="); + h->keyalg->print(h->keysize, ak); + prints("; val="); + h->valalg->print(h->valsize, av); + prints("; hit="); + sys·printint(hit); + prints("; res="); + sys·printpointer(res); + prints("\n"); + } +} + +// mapaccess2(hmap *map[any]any, key any) (val any, pres bool); +void +sys·mapaccess2(Hmap *h, ...) +{ + byte *ak, *av, *ap; + byte *res; + int32 hit; + + ak = (byte*)&h + h->ko; + av = (byte*)&h + h->vo; + ap = (byte*)&h + h->po; + + res = nil; + hit = hash_lookup(h, ak, (void**)&res); + if(!hit) { + *ap = false; + h->valalg->copy(h->valsize, av, nil); + } else { + *ap = true; + h->valalg->copy(h->valsize, av, res+h->keysize); + } + + if(debug) { + prints("sys·mapaccess2: map="); + sys·printpointer(h); + prints("; key="); + h->keyalg->print(h->keysize, ak); + prints("; val="); + h->valalg->print(h->valsize, av); + prints("; hit="); + sys·printint(hit); + prints("; res="); + sys·printpointer(res); + prints("; pres="); + sys·printbool(*ap); + prints("\n"); + } +} + +static void +mapassign(Hmap *h, byte *ak, byte *av) +{ + byte *res; + int32 hit; + + res = nil; + hit = hash_insert(h, ak, (void**)&res); + h->keyalg->copy(h->keysize, res, ak); + h->valalg->copy(h->valsize, res+h->keysize, av); + + if(debug) { + prints("mapassign: map="); + sys·printpointer(h); + prints("; key="); + h->keyalg->print(h->keysize, ak); + prints("; val="); + h->valalg->print(h->valsize, av); + prints("; hit="); + sys·printint(hit); + prints("; res="); + sys·printpointer(res); + prints("\n"); + } +} + +// mapassign1(hmap *map[any]any, key any, val any); +void +sys·mapassign1(Hmap *h, ...) +{ + byte *ak, *av; + + ak = (byte*)&h + h->ko; + av = (byte*)&h + h->vo; + + mapassign(h, ak, av); +} + +// mapassign2(hmap *map[any]any, key any, val any, pres bool); +void +sys·mapassign2(Hmap *h, ...) +{ + byte *ak, *av, *ap; + byte *res; + int32 hit; + + ak = (byte*)&h + h->ko; + av = (byte*)&h + h->vo; + ap = (byte*)&h + h->po; + + if(*ap == true) { + // assign + mapassign(h, ak, av); + return; + } + + // delete + hit = hash_remove(h, ak, (void**)&res); + + if(debug) { + prints("mapassign2: map="); + sys·printpointer(h); + prints("; key="); + h->keyalg->print(h->keysize, ak); + prints("; hit="); + sys·printint(hit); + prints("; res="); + sys·printpointer(res); + prints("\n"); + } +} diff --git a/src/runtime/hashmap.h b/src/runtime/hashmap.h new file mode 100644 index 0000000000..04bb732699 --- /dev/null +++ b/src/runtime/hashmap.h @@ -0,0 +1,160 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + + +/* A hash table. + Example, hashing nul-terminated char*s: + hash_hash_t str_hash (void *v) { + char *s; + hash_hash_t hash = 0; + for (s = *(char **)v; *s != 0; s++) { + hash = (hash ^ *s) * 2654435769U; + } + return (hash); + } + int str_eq (void *a, void *b) { + return (strcmp (*(char **)a, *(char **)b) == 0); + } + void str_del (void *arg, void *data) { + *(char **)arg = *(char **)data; + } + + struct hash *h = hash_new (sizeof (char *), &str_hash, &str_eq, &str_del, 3, 12, 15); + ... 3=> 2**3 entries initial size + ... 12=> 2**12 entries before sprouting sub-tables + ... 15=> number of adjacent probes to attempt before growing + + Example lookup: + char *key = "foobar"; + char **result_ptr; + if (hash_lookup (h, &key, (void **) &result_ptr)) { + printf ("found in table: %s\n", *result_ptr); + } else { + printf ("not found in table\n"); + } + + Example insertion: + char *key = strdup ("foobar"); + char **result_ptr; + if (hash_lookup (h, &key, (void **) &result_ptr)) { + printf ("found in table: %s\n", *result_ptr); + printf ("to overwrite, do *result_ptr = key\n"); + } else { + printf ("not found in table; inserted as %s\n", *result_ptr); + assert (*result_ptr == key); + } + + Example deletion: + char *key = "foobar"; + char *result; + if (hash_remove (h, &key, &result)) { + printf ("key found and deleted from table\n"); + printf ("called str_del (&result, data) to copy data to result: %s\n", result); + } else { + printf ("not found in table\n"); + } + + Example iteration over the elements of *h: + char **data; + struct hash_iter it; + hash_iter_init (h, &it); + for (data = hash_next (&it); data != 0; data = hash_next (&it)) { + printf ("%s\n", *data); + } + */ + +#define malloc mal +#define free(a) USED(a) +#define offsetof(s,m) (uint32)(&(((s*)0)->m)) +#define memset(a,b,c) sys·memclr((byte*)(a), (uint32)(c)) +#define memmove(a,b,c) mmov((byte*)(a),(byte*)(b),(uint32)(c)) +#define memcpy(a,b,c) mcpy((byte*)(a),(byte*)(b),(uint32)(c)) +#define assert(a) if(!(a)) throw("assert") + +struct hash; /* opaque */ +struct hash_subtable; /* opaque */ +struct hash_entry; /* opaque */ + +typedef uint64 uintptr_t; +typedef uintptr_t hash_hash_t; + +struct hash_iter { + int32 elemsize; /* size of elements in table */ + int32 changes; /* number of changes observed last time */ + int32 i; /* stack pointer in subtable_state */ + hash_hash_t last_hash; /* last hash value returned */ + struct hash *h; /* the hash table */ + struct hash_iter_sub { + struct hash_entry *e; /* pointer into subtable */ + struct hash_entry *start; /* start of subtable */ + struct hash_entry *end; /* end of subtable */ + } subtable_state[16]; /* Should be large enough unless the hashing is + so bad that many distinct data values hash + to the same hash value. */ +}; + +/* Return a hashtable h 2**init_power empty entries, each with + "datasize" data bytes. + (*data_hash)(a) should return the hash value of data element *a. + (*data_eq)(a,b) should return whether the data at "a" and the data at "b" + are equal. + (*data_del)(arg, a) will be invoked when data element *a is about to be removed + from the table. "arg" is the argument passed to "hash_remove()". + + Growing is accomplished by resizing if the current tables size is less than + a threshold, and by adding subtables otherwise. hint should be set + the expected maximum size of the table. + "datasize" should be in [sizeof (void*), ..., 255]. If you need a + bigger "datasize", store a pointer to another piece of memory. */ + +//struct hash *hash_new (int32 datasize, +// hash_hash_t (*data_hash) (void *), +// int32 (*data_eq) (void *, void *), +// void (*data_del) (void *, void *), +// int64 hint); + +/* Lookup *data in *h. If the data is found, return 1 and place a pointer to + the found element in *pres. Otherwise return 0 and place 0 in *pres. */ +int32 hash_lookup (struct hash *h, void *data, void **pres); + +/* Lookup *data in *h. If the data is found, execute (*data_del) (arg, p) + where p points to the data in the table, then remove it from *h and return + 1. Otherwise return 0. */ +int32 hash_remove (struct hash *h, void *data, void *arg); + +/* Lookup *data in *h. If the data is found, return 1, and place a pointer + to the found element in *pres. Otherwise, return 0, allocate a region + for the data to be inserted, and place a pointer to the inserted element + in *pres; it is the caller's responsibility to copy the data to be + inserted to the pointer returned in *pres in this case. + + If using garbage collection, it is the caller's responsibility to + add references for **pres if HASH_ADDED is returned. */ +int32 hash_insert (struct hash *h, void *data, void **pres); + +/* Return the number of elements in the table. */ +uint32 hash_count (struct hash *h); + +/* The following call is useful only if not using garbage collection on the + table. + Remove all sub-tables associated with *h. + This undoes the effects of hash_init(). + If other memory pointed to by user data must be freed, the caller is + responsible for doiing do by iterating over *h first; see + hash_iter_init()/hash_next(). */ +void hash_destroy (struct hash *h); + +/*----- iteration -----*/ + +/* Initialize *it from *h. */ +void hash_iter_init (struct hash *h, struct hash_iter *it); + +/* Return the next used entry in the table which which *it was initialized. */ +void *hash_next (struct hash_iter *it); + +/*---- test interface ----*/ +/* Call (*data_visit) (arg, level, data) for every data entry in the table, + whether used or not. "level" is the subtable level, 0 means first level. */ +/* TESTING ONLY: DO NOT USE THIS ROUTINE IN NORMAL CODE */ +void hash_visit (struct hash *h, void (*data_visit) (void *arg, int32 level, void *data), void *arg); diff --git a/src/runtime/map.c b/src/runtime/map.c deleted file mode 100644 index 0a4b3b8e84..0000000000 --- a/src/runtime/map.c +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "runtime.h" - -static int32 debug = 0; - -typedef struct Link Link; -typedef struct Hmap Hmap; - -struct Link -{ - Link* link; - byte data[8]; -}; - -struct Hmap -{ - uint32 len; // must be first - uint32 keysize; - uint32 valsize; - uint32 hint; - uint32 valoffset; - uint32 ko; - uint32 vo; - uint32 po; - Alg* keyalg; - Alg* valalg; - Link* link; -}; - -// newmap(keysize uint32, valsize uint32, -// keyalg uint32, valalg uint32, -// hint uint32) (hmap *map[any]any); -void -sys·newmap(uint32 keysize, uint32 valsize, - uint32 keyalg, uint32 valalg, uint32 hint, - Hmap* ret) -{ - Hmap *m; - - if(keyalg >= 3 || - valalg >= 3) { - prints("0<="); - sys·printint(keyalg); - prints("<"); - sys·printint(nelem(algarray)); - prints("\n0<="); - sys·printint(valalg); - prints("<"); - sys·printint(nelem(algarray)); - prints("\n"); - - throw("sys·newmap: key/val algorithm out of range"); - } - - m = mal(sizeof(*m)); - - m->len = 0; - m->keysize = keysize; - m->valsize = valsize; - m->keyalg = &algarray[keyalg]; - m->valalg = &algarray[valalg]; - m->hint = hint; - - // these calculations are compiler dependent - m->valoffset = rnd(keysize, valsize); - m->ko = rnd(sizeof(m), keysize); - m->vo = rnd(m->ko+keysize, valsize); - m->po = rnd(m->vo+valsize, 1); - - ret = m; - FLUSH(&ret); - - if(debug) { - prints("newmap: map="); - sys·printpointer(m); - prints("; keysize="); - sys·printint(keysize); - prints("; valsize="); - sys·printint(valsize); - prints("; keyalg="); - sys·printint(keyalg); - prints("; valalg="); - sys·printint(valalg); - prints("; valoffset="); - sys·printint(m->valoffset); - prints("; ko="); - sys·printint(m->ko); - prints("; vo="); - sys·printint(m->vo); - prints("; po="); - sys·printint(m->po); - prints("\n"); - } -} - -// mapaccess1(hmap *map[any]any, key any) (val any); -void -sys·mapaccess1(Hmap *m, ...) -{ - Link *l; - byte *ak, *av; - - ak = (byte*)&m + m->ko; - av = (byte*)&m + m->vo; - - for(l=m->link; l!=nil; l=l->link) { - if(m->keyalg->equal(m->keysize, ak, l->data)) { - m->valalg->copy(m->valsize, av, l->data+m->valoffset); - goto out; - } - } - - m->valalg->copy(m->valsize, av, 0); - throw("sys·mapaccess1: key not in map"); - -out: - if(debug) { - prints("sys·mapaccess1: map="); - sys·printpointer(m); - prints("; key="); - m->keyalg->print(m->keysize, ak); - prints("; val="); - m->valalg->print(m->valsize, av); - prints("\n"); - } -} - -// mapaccess2(hmap *map[any]any, key any) (val any, pres bool); -void -sys·mapaccess2(Hmap *m, ...) -{ - Link *l; - byte *ak, *av, *ap; - - ak = (byte*)&m + m->ko; - av = (byte*)&m + m->vo; - ap = (byte*)&m + m->po; - - for(l=m->link; l!=nil; l=l->link) { - if(m->keyalg->equal(m->keysize, ak, l->data)) { - *ap = true; - m->valalg->copy(m->valsize, av, l->data+m->valoffset); - goto out; - } - } - - *ap = false; - m->valalg->copy(m->valsize, av, nil); - -out: - if(debug) { - prints("sys·mapaccess2: map="); - sys·printpointer(m); - prints("; key="); - m->keyalg->print(m->keysize, ak); - prints("; val="); - m->valalg->print(m->valsize, av); - prints("; pres="); - sys·printbool(*ap); - prints("\n"); - } -} - -static void -sys·mapassign(Hmap *m, byte *ak, byte *av) -{ - Link *l; - - // mapassign(hmap *map[any]any, key any, val any); - - for(l=m->link; l!=nil; l=l->link) { - if(m->keyalg->equal(m->keysize, ak, l->data)) - goto out; - } - - l = mal((sizeof(*l)-8) + m->keysize + m->valsize); - l->link = m->link; - m->link = l; - m->keyalg->copy(m->keysize, l->data, ak); - m->len++; - -out: - m->valalg->copy(m->valsize, l->data+m->valoffset, av); - - if(debug) { - prints("mapassign: map="); - sys·printpointer(m); - prints("; key="); - m->keyalg->print(m->keysize, ak); - prints("; val="); - m->valalg->print(m->valsize, av); - prints("\n"); - } -} - -// mapassign1(hmap *map[any]any, key any, val any); -void -sys·mapassign1(Hmap *m, ...) -{ - byte *ak, *av; - - ak = (byte*)&m + m->ko; - av = (byte*)&m + m->vo; - - sys·mapassign(m, ak, av); -} - -// mapassign2(hmap *map[any]any, key any, val any, pres bool); -void -sys·mapassign2(Hmap *m, ...) -{ - Link **ll; - byte *ak, *av, *ap; - - ak = (byte*)&m + m->ko; - av = (byte*)&m + m->vo; - ap = (byte*)&m + m->po; - - if(*ap == true) { - // assign - sys·mapassign(m, ak, av); - return; - } - - // delete - for(ll=&m->link; (*ll)!=nil; ll=&(*ll)->link) { - if(m->keyalg->equal(m->keysize, ak, (*ll)->data)) { - m->valalg->copy(m->valsize, (*ll)->data+m->valoffset, nil); - (*ll) = (*ll)->link; - m->len--; - if(debug) { - prints("mapdelete (found): map="); - sys·printpointer(m); - prints("; key="); - m->keyalg->print(m->keysize, ak); - prints("\n"); - } - return; - } - } - - if(debug) { - prints("mapdelete (not found): map="); - sys·printpointer(m); - prints("; key="); - m->keyalg->print(m->keysize, ak); - prints(" *** not found\n"); - } -} diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c index c84b21092e..ea2c432396 100644 --- a/src/runtime/runtime.c +++ b/src/runtime/runtime.c @@ -87,6 +87,28 @@ mcpy(byte *t, byte *f, uint32 n) } } +void +mmov(byte *t, byte *f, uint32 n) +{ + if(t < f) { + while(n > 0) { + *t = *f; + t++; + f++; + n--; + } + } else { + t += n; + f += n; + while(n > 0) { + t--; + f--; + *t = *f; + n--; + } + } +} + uint32 rnd(uint32 n, uint32 m) { @@ -582,9 +604,17 @@ check(void) static uint64 memhash(uint32 s, void *a) { - USED(s, a); - prints("memhash\n"); - return 0x12345; + byte *b; + uint64 hash; + + b = a; + hash = 33054211828000289ULL; + while(s > 0) { + hash = (hash ^ *b) * 23344194077549503ULL; + b++; + s--; + } + return hash; } static uint32 @@ -644,9 +674,7 @@ memcopy(uint32 s, void *a, void *b) static uint64 stringhash(uint32 s, string *a) { - USED(s, a); - prints("stringhash\n"); - return 0x12345; + return memhash((*a)->len, (*a)->str); } static uint32 @@ -677,9 +705,7 @@ stringcopy(uint32 s, string *a, string *b) static uint64 pointerhash(uint32 s, void **a) { - USED(s, a); - prints("pointerhash\n"); - return 0x12345; + return memhash(s, *a); } static uint32 diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 74afa3aef1..5f2ad18b17 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -215,6 +215,7 @@ void throw(int8*); uint32 rnd(uint32, uint32); void prints(int8*); void mcpy(byte*, byte*, uint32); +void mmov(byte*, byte*, uint32); void* mal(uint32); uint32 cmpstring(string, string); void initsig(void);