/* $Header: /CVSROOT/tinolib/old/hash.h,v 1.3 2005/12/05 02:11:13 tino Exp $ * * Hash maps. * * Why re-invent the wheel (as hash maps are part of libstd)? * Well, I like to use algorithms I know and I can alter as I need it. * Like perfect hashing, or boyer moore search. * And I like it to be as simple as it can be. * Especially the memory management. * Usually all the other things out there do not fit in my control paranoia. * * Copyright (C)2004-2005 Valentin Hilbig, webmaster@scylla-charybdis.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * $Log: hash.h,v $ * Revision 1.3 2005/12/05 02:11:13 tino * Copyright and COPYLEFT added * * Revision 1.2 2004/09/04 20:17:23 tino * changes to fulfill include test (which is part of unit tests) * * Revision 1.1 2004/04/20 23:51:38 tino * Hashing added (untested!) */ #ifndef tino_INC_hash_h #define tino_INC_hash_h #include "debug.h" #include "fatal.h" #include "alloc.h" typedef struct tino_hash_map tino_hash_map; typedef union tino_hash_map_val tino_hash_map_val; typedef struct tino_hash_map_raw tino_hash_map_raw; typedef struct tino_hash_map_handle tino_hash_map_handle; struct tino_hash_map_nodes { int nodes; struct tino_hash_map_node *node; }; struct tino_hash_map_raw { size_t len; void *ptr; }; union tino_hash_map_val { char c; int i; unsigned u; long l; unsigned long ul; long long ll; unsigned long long ull; void *ptr; tino_hash_map_raw raw; struct tino_hash_map_nodes hash; }; struct tino_hash_map_node { tino_hash_map_val val; tino_hash_map_raw key; }; struct tino_hash_map { int fn; int len; struct tino_hash_map_nodes hash; }; static void tino_hash_imp_init(struct tino_hash_map_nodes *h, int len) { if (len<7) len = 7; h->node = tino_alloc0((len+1)*sizeof *h->node); h->nodes = len; } static struct tino_hash_map_node * tino_hash_imp_store_key(struct tino_hash_map_node *node, const void *ptr, size_t len) { node->key.ptr = tino_memdup(ptr, len); node->key.len = len; return node; } #include "hasher.h" /* This rehashing is no ordinary rehashing. * * We just move the old data into the NULL bucket. * This way we can ensure that even when we have a double-hash * we will always only extend by one new sub-hash. * * Such that the have a very fast insertion. * Yes, there should be some special algorithm to re-distribute * HASH maps in case we become extremely disorganized * (such like the hash becoming a linked list). * * However leave optimizations to future. */ static void tino_hash_imp_rehash(struct tino_hash_map_nodes *h, struct tino_hash_map_node *node, int fn) { struct tino_hash_map_node old; old = *node; node->key.ptr = 0; node->key.len = fn+1; tino_hash_imp_init(&node->val.hash, h->nodes-1); node->val.hash.node[node->val.hash.nodes] = old; } static struct tino_hash_map_node * tino_hash_imp_parent(tino_hash_map *map, const void *ptr, size_t len, int create) { struct tino_hash_map_nodes *h; struct tino_hash_map_node *node; int fn; if (!map->hash.nodes) { if (!create) return 0; tino_hash_imp_init(&map->hash, map->len); } if (!ptr || !len) { /* NULL case * Hand out the NULL bucket. */ node = &map->hash.node[map->hash.nodes]; if (node->key.len) return node; if (!create) return 0; node->key.len = 1; /* It's used now */ return node; } tino_FATAL(!ptr || !len); fn = map->fn; h = &map->hash; for (;;) { int hash; hash = hasher(ptr, len, fn)%h->nodes; node = &h->node[hash]; if (!node->key.len) { /* Empty bucket */ if (!create) return 0; tino_hash_imp_store_key(node, ptr, len); return node; } if (node->key.ptr) { if (node->key.len==len && !memcmp(ptr, node->key.ptr, len)) return node; /* Match */ if (!create) return 0; /* Well, we have a double hit, but we want to add another entry. * Create a new sub-hash to store the values in. */ tino_hash_imp_rehash(h, node, fn); h = &node->val.hash; continue; } fn = node->key.len; /* Oh yes, well .. dirty reuse */ h = &node->val.hash; /* Test the NULL bucket of the sub-list. * In case we are extremely unlucky * we run through a linked list this way. * However its sure that we always terminate. */ node = &h->node[h->nodes]; if (node->key.len==len && !memcmp(ptr, node->key.ptr, len)) return node; /* Match */ } } static void tino_hash_map_init(tino_hash_map *map, int len, int fn) { map->fn = fn; map->len = len; memset(&map->hash, 0, sizeof map->hash); } #if 0 static int tino_hash_del_ptr(tino_hash_map *map, const void *s, size_t len) { struct tino_hash_map_node *node; node = tino_hash_imp_parent(map, s, len, 0); if (!node) return 0; tino_hash_imp_free(node); return 1; } #endif tino_hash_map_val * tino_hash_get_ptr(tino_hash_map *map, const void *s, size_t len) { struct tino_hash_map_node *node; node = tino_hash_imp_parent(map, s, len, 0); if (!node) return 0; return &node->val; } tino_hash_map_val * tino_hash_add_ptr(tino_hash_map *map, const void *s, size_t len) { struct tino_hash_map_node *node; node = tino_hash_imp_parent(map, s, len, 1); tino_FATAL(!node); return &node->val; } #endif