411 lines
9.8 KiB
C
411 lines
9.8 KiB
C
|
/*
|
||
|
zip_hash.c -- hash table string -> uint64
|
||
|
Copyright (C) 2015-2017 Dieter Baron and Thomas Klausner
|
||
|
|
||
|
This file is part of libzip, a library to manipulate ZIP archives.
|
||
|
The authors can be contacted at <libzip@nih.at>
|
||
|
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions
|
||
|
are met:
|
||
|
1. Redistributions of source code must retain the above copyright
|
||
|
notice, this list of conditions and the following disclaimer.
|
||
|
2. Redistributions in binary form must reproduce the above copyright
|
||
|
notice, this list of conditions and the following disclaimer in
|
||
|
the documentation and/or other materials provided with the
|
||
|
distribution.
|
||
|
3. The names of the authors may not be used to endorse or promote
|
||
|
products derived from this software without specific prior
|
||
|
written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
|
||
|
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
|
||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||
|
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||
|
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||
|
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||
|
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
#include "zipint.h"
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
/* parameter for the string hash function */
|
||
|
#define HASH_MULTIPLIER 33
|
||
|
#define HASH_START 5381
|
||
|
|
||
|
/* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */
|
||
|
#define HASH_MAX_FILL .75
|
||
|
#define HASH_MIN_FILL .01
|
||
|
|
||
|
/* but hash table size is kept between these */
|
||
|
#define HASH_MIN_SIZE 256
|
||
|
#define HASH_MAX_SIZE 0x80000000ul
|
||
|
|
||
|
struct zip_hash_entry {
|
||
|
const zip_uint8_t *name;
|
||
|
zip_int64_t orig_index;
|
||
|
zip_int64_t current_index;
|
||
|
struct zip_hash_entry *next;
|
||
|
zip_uint32_t hash_value;
|
||
|
};
|
||
|
typedef struct zip_hash_entry zip_hash_entry_t;
|
||
|
|
||
|
struct zip_hash {
|
||
|
zip_uint32_t table_size;
|
||
|
zip_uint64_t nentries;
|
||
|
zip_hash_entry_t **table;
|
||
|
};
|
||
|
|
||
|
|
||
|
/* free list of entries */
|
||
|
static void
|
||
|
free_list(zip_hash_entry_t *entry) {
|
||
|
while (entry != NULL) {
|
||
|
zip_hash_entry_t *next = entry->next;
|
||
|
free(entry);
|
||
|
entry = next;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* compute hash of string, full 32 bit value */
|
||
|
static zip_uint32_t
|
||
|
hash_string(const zip_uint8_t *name) {
|
||
|
zip_uint64_t value = HASH_START;
|
||
|
|
||
|
if (name == NULL) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
while (*name != 0) {
|
||
|
value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul);
|
||
|
name++;
|
||
|
}
|
||
|
|
||
|
return (zip_uint32_t)value;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */
|
||
|
static bool
|
||
|
hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error) {
|
||
|
zip_hash_entry_t **new_table;
|
||
|
|
||
|
if (new_size == hash->table_size) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) {
|
||
|
zip_error_set(error, ZIP_ER_MEMORY, 0);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (hash->nentries > 0) {
|
||
|
zip_uint32_t i;
|
||
|
|
||
|
for (i = 0; i < hash->table_size; i++) {
|
||
|
zip_hash_entry_t *entry = hash->table[i];
|
||
|
while (entry) {
|
||
|
zip_hash_entry_t *next = entry->next;
|
||
|
|
||
|
zip_uint32_t new_index = entry->hash_value % new_size;
|
||
|
|
||
|
entry->next = new_table[new_index];
|
||
|
new_table[new_index] = entry;
|
||
|
|
||
|
entry = next;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
free(hash->table);
|
||
|
hash->table = new_table;
|
||
|
hash->table_size = new_size;
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
static zip_uint32_t
|
||
|
size_for_capacity(zip_uint64_t capacity) {
|
||
|
double needed_size = capacity / HASH_MAX_FILL;
|
||
|
zip_uint32_t v;
|
||
|
|
||
|
if (needed_size > ZIP_UINT32_MAX) {
|
||
|
v = ZIP_UINT32_MAX;
|
||
|
}
|
||
|
else {
|
||
|
v = (zip_uint32_t)needed_size;
|
||
|
}
|
||
|
|
||
|
if (v > HASH_MAX_SIZE) {
|
||
|
return HASH_MAX_SIZE;
|
||
|
}
|
||
|
|
||
|
/* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu>
|
||
|
(http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */
|
||
|
|
||
|
v--;
|
||
|
v |= v >> 1;
|
||
|
v |= v >> 2;
|
||
|
v |= v >> 4;
|
||
|
v |= v >> 8;
|
||
|
v |= v >> 16;
|
||
|
v++;
|
||
|
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
|
||
|
zip_hash_t *
|
||
|
_zip_hash_new(zip_error_t *error) {
|
||
|
zip_hash_t *hash;
|
||
|
|
||
|
if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) {
|
||
|
zip_error_set(error, ZIP_ER_MEMORY, 0);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
hash->table_size = 0;
|
||
|
hash->nentries = 0;
|
||
|
hash->table = NULL;
|
||
|
|
||
|
return hash;
|
||
|
}
|
||
|
|
||
|
|
||
|
void
|
||
|
_zip_hash_free(zip_hash_t *hash) {
|
||
|
zip_uint32_t i;
|
||
|
|
||
|
if (hash == NULL) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (hash->table != NULL) {
|
||
|
for (i = 0; i < hash->table_size; i++) {
|
||
|
if (hash->table[i] != NULL) {
|
||
|
free_list(hash->table[i]);
|
||
|
}
|
||
|
}
|
||
|
free(hash->table);
|
||
|
}
|
||
|
free(hash);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* insert into hash, return error on existence or memory issues */
|
||
|
bool
|
||
|
_zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) {
|
||
|
zip_uint32_t hash_value, table_index;
|
||
|
zip_hash_entry_t *entry;
|
||
|
|
||
|
if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) {
|
||
|
zip_error_set(error, ZIP_ER_INVAL, 0);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (hash->table_size == 0) {
|
||
|
if (!hash_resize(hash, HASH_MIN_SIZE, error)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
hash_value = hash_string(name);
|
||
|
table_index = hash_value % hash->table_size;
|
||
|
|
||
|
for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) {
|
||
|
if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
|
||
|
if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) {
|
||
|
zip_error_set(error, ZIP_ER_EXISTS, 0);
|
||
|
return false;
|
||
|
}
|
||
|
else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (entry == NULL) {
|
||
|
if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) {
|
||
|
zip_error_set(error, ZIP_ER_MEMORY, 0);
|
||
|
return false;
|
||
|
}
|
||
|
entry->name = name;
|
||
|
entry->next = hash->table[table_index];
|
||
|
hash->table[table_index] = entry;
|
||
|
entry->hash_value = hash_value;
|
||
|
entry->orig_index = -1;
|
||
|
hash->nentries++;
|
||
|
if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) {
|
||
|
if (!hash_resize(hash, hash->table_size * 2, error)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (flags & ZIP_FL_UNCHANGED) {
|
||
|
entry->orig_index = (zip_int64_t)index;
|
||
|
}
|
||
|
entry->current_index = (zip_int64_t)index;
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* remove entry from hash, error if not found */
|
||
|
bool
|
||
|
_zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) {
|
||
|
zip_uint32_t hash_value, index;
|
||
|
zip_hash_entry_t *entry, *previous;
|
||
|
|
||
|
if (hash == NULL || name == NULL) {
|
||
|
zip_error_set(error, ZIP_ER_INVAL, 0);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (hash->nentries > 0) {
|
||
|
hash_value = hash_string(name);
|
||
|
index = hash_value % hash->table_size;
|
||
|
previous = NULL;
|
||
|
entry = hash->table[index];
|
||
|
while (entry) {
|
||
|
if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
|
||
|
if (entry->orig_index == -1) {
|
||
|
if (previous) {
|
||
|
previous->next = entry->next;
|
||
|
}
|
||
|
else {
|
||
|
hash->table[index] = entry->next;
|
||
|
}
|
||
|
free(entry);
|
||
|
hash->nentries--;
|
||
|
if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
|
||
|
if (!hash_resize(hash, hash->table_size / 2, error)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
entry->current_index = -1;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
previous = entry;
|
||
|
entry = entry->next;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
zip_error_set(error, ZIP_ER_NOENT, 0);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* find value for entry in hash, -1 if not found */
|
||
|
zip_int64_t
|
||
|
_zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) {
|
||
|
zip_uint32_t hash_value, index;
|
||
|
zip_hash_entry_t *entry;
|
||
|
|
||
|
if (hash == NULL || name == NULL) {
|
||
|
zip_error_set(error, ZIP_ER_INVAL, 0);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
if (hash->nentries > 0) {
|
||
|
hash_value = hash_string(name);
|
||
|
index = hash_value % hash->table_size;
|
||
|
for (entry = hash->table[index]; entry != NULL; entry = entry->next) {
|
||
|
if (strcmp((const char *)name, (const char *)entry->name) == 0) {
|
||
|
if (flags & ZIP_FL_UNCHANGED) {
|
||
|
if (entry->orig_index != -1) {
|
||
|
return entry->orig_index;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if (entry->current_index != -1) {
|
||
|
return entry->current_index;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
zip_error_set(error, ZIP_ER_NOENT, 0);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
|
||
|
bool
|
||
|
_zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error) {
|
||
|
zip_uint32_t new_size;
|
||
|
|
||
|
if (capacity == 0) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
new_size = size_for_capacity(capacity);
|
||
|
|
||
|
if (new_size <= hash->table_size) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
if (!hash_resize(hash, new_size, error)) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
bool
|
||
|
_zip_hash_revert(zip_hash_t *hash, zip_error_t *error) {
|
||
|
zip_uint32_t i;
|
||
|
zip_hash_entry_t *entry, *previous;
|
||
|
|
||
|
for (i = 0; i < hash->table_size; i++) {
|
||
|
previous = NULL;
|
||
|
entry = hash->table[i];
|
||
|
while (entry) {
|
||
|
if (entry->orig_index == -1) {
|
||
|
zip_hash_entry_t *p;
|
||
|
if (previous) {
|
||
|
previous->next = entry->next;
|
||
|
}
|
||
|
else {
|
||
|
hash->table[i] = entry->next;
|
||
|
}
|
||
|
p = entry;
|
||
|
entry = entry->next;
|
||
|
/* previous does not change */
|
||
|
free(p);
|
||
|
hash->nentries--;
|
||
|
}
|
||
|
else {
|
||
|
entry->current_index = entry->orig_index;
|
||
|
previous = entry;
|
||
|
entry = entry->next;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
|
||
|
zip_uint32_t new_size = hash->table_size / 2;
|
||
|
while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) {
|
||
|
new_size /= 2;
|
||
|
}
|
||
|
if (!hash_resize(hash, new_size, error)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|