denyhosts/clamav/libclamav/hashtab.h

525 lines
13 KiB
C

/*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Török Edvin
*
* Summary: Hash-table and -set data structures.
*
* Acknowledgements: hash32shift() is an implementation of Thomas Wang's
* 32-bit integer hash function:
* http://www.cris.com/~Ttwang/tech/inthash.htm
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef _HASHTAB_H
#define _HASHTAB_H
#include <stdio.h>
#include <stddef.h>
#include <sys/types.h>
#include <stdbool.h>
#include "clamav.h"
#include "clamav-config.h"
#include "mpool.h"
/******************************************************************************/
/* A hash table.
*
* There are two types:
* 1. hashtable:
* The key is a const char* (string)
* The value (data) is a buffer, stored as a size_t (instead of a void *) and an offset.
*
* 2. htu32 (hashtable uint32_t)
* Th ekey is a uint32_t number
* The value (data) is a buffer, stored as either a size_t, or as a void *, and an offset.
*/
/******************************************************************************/
typedef size_t cli_element_data;
/* define this for debugging/profiling purposes only, NOT in production/release code */
#ifdef PROFILE_HASHTABLE
typedef struct {
size_t calc_hash;
size_t found;
size_t find_req;
size_t found_tries;
size_t not_found;
size_t not_found_tries;
size_t grow_found;
size_t grow_found_tries;
size_t grow;
size_t update;
size_t update_tries;
size_t inserts;
size_t insert_tries;
size_t deleted_reuse;
size_t deleted_tries;
size_t deletes;
size_t clear;
size_t hash_exhausted;
} PROFILE_STRUCT_;
#define STRUCT_PROFILE PROFILE_STRUCT_ PROFILE_STRUCT;
#else
#define STRUCT_PROFILE
#endif
struct cli_element {
const char *key;
cli_element_data data;
size_t len;
};
struct cli_hashtable {
struct cli_element *htable;
size_t capacity;
size_t used;
size_t maxfill; /* 80% */
STRUCT_PROFILE
};
/**
* @brief Generate C source code that represents the given hash table
*
* Comment: We don't really use this.
*
* @param s
* @param name Some string name for the elements of this generated table.
* @return cl_error_t
*/
cl_error_t cli_hashtab_generate_c(const struct cli_hashtable *s, const char *name);
struct cli_element *cli_hashtab_find(const struct cli_hashtable *s, const char *key, const size_t len);
/**
* @brief Create a new hashtab with a given capacity.
*
* @param s
* @param capacity
* @return cl_error_t
*/
cl_error_t cli_hashtab_init(struct cli_hashtable *s, size_t capacity);
/**
* @brief Insert a new key with data into the hashtable.
*
* @param s
* @param key
* @param len
* @param data
* @return const struct cli_element*
*/
const struct cli_element *cli_hashtab_insert(struct cli_hashtable *s, const char *key, const size_t len, const cli_element_data data);
/**
* @brief Delete a key from the hash table
*
* @param s
* @param key
* @param len
*/
void cli_hashtab_delete(struct cli_hashtable *s, const char *key, const size_t len);
/**
* @brief Remove all keys from the hashtable
*
* @param s
*/
void cli_hashtab_clear(struct cli_hashtable *s);
/**
* @brief Free the hash table
*
* This will clear the hash table first. You don't need to clear it manually first.
*
* @param s
*/
void cli_hashtab_free(struct cli_hashtable *s);
/**
* @brief Load a hash table from a file. (unpickle!)
*
* @param in
* @param s
* @return cl_error_t
*/
cl_error_t cli_hashtab_load(FILE *in, struct cli_hashtable *s);
/**
* @brief Write a hash table to a file. (pickle!)
*
* @param s
* @param out
* @return cl_error_t
*/
cl_error_t cli_hashtab_store(const struct cli_hashtable *s, FILE *out);
struct cli_htu32_element {
uint32_t key;
union {
size_t as_size_t;
void *as_ptr;
} data;
};
struct cli_htu32 {
struct cli_htu32_element *htable;
size_t capacity;
size_t used;
size_t maxfill; /* 80% */
STRUCT_PROFILE
};
#ifdef USE_MPOOL
/**
* @brief A macro to wrap cli_htu32_init() where you can assume MEMPOOL is enabled,
* but will replace the last partment with NULL if MEMPOOL is not enabled.
*/
#define CLI_HTU32_INIT(A, B, C) cli_htu32_init(A, B, C)
/**
* @brief A macro to wrap cli_htu32_insert() where you can assume MEMPOOL is enabled,
* but will replace the last partment with NULL if MEMPOOL is not enabled.
*/
#define CLI_HTU32_INSERT(A, B, C) cli_htu32_insert(A, B, C)
/**
* @brief A macro to wrap cli_htu32_free() where you can assume MEMPOOL is enabled,
* but will replace the last partment with NULL if MEMPOOL is not enabled.
*/
#define CLI_HTU32_FREE(A, B) cli_htu32_free(A, B)
#else
/**
* @brief A macro to wrap cli_htu32_init() where you can assume MEMPOOL is enabled,
* but will replace the last partment with NULL if MEMPOOL is not enabled.
*/
#define CLI_HTU32_INIT(A, B, C) cli_htu32_init(A, B, NULL)
/**
* @brief A macro to wrap cli_htu32_insert() where you can assume MEMPOOL is enabled,
* but will replace the last partment with NULL if MEMPOOL is not enabled.
*/
#define CLI_HTU32_INSERT(A, B, C) cli_htu32_insert(A, B, NULL)
/**
* @brief A macro to wrap cli_htu32_free() where you can assume MEMPOOL is enabled,
* but will replace the last partment with NULL if MEMPOOL is not enabled.
*/
#define CLI_HTU32_FREE(A, B) cli_htu32_free(A, NULL)
#endif
/**
* @brief Initialize a new u32 hashtable.
*
* @param s
* @param capacity
* @param mempool If MEMPOOL not enabled, this can be NULL.
* @return cl_error_t
*/
cl_error_t cli_htu32_init(struct cli_htu32 *s, size_t capacity, mpool_t *mempool);
/**
* @brief Insert a new element into the u32 hashtable.
*
* @param s
* @param item
* @param mempool
* @return cl_error_t
*/
cl_error_t cli_htu32_insert(struct cli_htu32 *s, const struct cli_htu32_element *item, mpool_t *mempool);
/**
* @brief Free the u32 hashtable.
*
* This will clear the hash table first. You don't need to clear it manually first.
*
* @param s
* @param mempool
*/
void cli_htu32_free(struct cli_htu32 *s, mpool_t *mempool);
/**
* @brief Find a sepcific element by key in the u32 hashtable.
*
* @param s
* @param key
* @return const struct cli_htu32_element*
*/
const struct cli_htu32_element *cli_htu32_find(const struct cli_htu32 *s, uint32_t key);
/**
* @brief Remove a specific element from the u32 hashtable.
*
* @param s
* @param key
*/
void cli_htu32_delete(struct cli_htu32 *s, uint32_t key);
/**
* @brief Remove all elements from the u32 hashtable.
*
* @param s
*/
void cli_htu32_clear(struct cli_htu32 *s);
/**
* @brief Get the next element in the table, following the provided element
*
* Use this to enumerate the table linearly.
*
* @param s
* @param current If you feed it NULL, it will give you the first element.
* @return const struct cli_htu32_element* Will return the next element, or NULL if there are no further elements.
*/
const struct cli_htu32_element *cli_htu32_next(const struct cli_htu32 *s, const struct cli_htu32_element *current);
/**
* @brief Get the number of items in the u32 hashtable.
*
* @param s
* @return size_t
*/
size_t cli_htu32_numitems(struct cli_htu32 *s);
/******************************************************************************/
/* a hashtable that stores the values too */
/******************************************************************************/
struct cli_map_value {
void *value;
int32_t valuesize;
};
struct cli_map {
struct cli_hashtable htab;
union {
struct cli_map_value *unsized_values;
void *sized_values;
} u;
uint32_t nvalues;
int32_t keysize;
int32_t valuesize;
int32_t last_insert;
int32_t last_find;
};
/**
* @brief Initialize a new map
*
* @param m
* @param keysize
* @param valuesize
* @param capacity
* @return cl_error_t CL_SUCCESS on success
* @return cl_error_t CL_E* if some error occured
*/
cl_error_t cli_map_init(struct cli_map *m, int32_t keysize, int32_t valuesize,
int32_t capacity);
/**
* @brief add key to the map
*
* @param m
* @param key
* @param keysize
* @return cl_error_t CL_SUCCESS if added.
* @return cl_error_t CL_ECREAT if already present.
* @return cl_error_t CL_E* if some error occured.
*/
cl_error_t cli_map_addkey(struct cli_map *m, const void *key, int32_t keysize);
/**
* @brief remove key from the map
*
* @param m
* @param key
* @param keysize
* @return cl_error_t CL_SUCCESS if removed.
* @return cl_error_t CL_EUNLINK if not present, so didn't need to be removed.
* @return cl_error_t CL_E* if some error occured.
*/
cl_error_t cli_map_removekey(struct cli_map *m, const void *key, int32_t keysize);
/**
* @brief set the value for the last inserted key with map_addkey
*
* @param m
* @param value
* @param valuesize
* @return cl_error_t CL_SUCCESS on success
* @return cl_error_t CL_E* if some error occured
*/
cl_error_t cli_map_setvalue(struct cli_map *m, const void *value, int32_t valuesize);
/**
* @brief find key in the map
*
* @param m
* @param key
* @param keysize
* @return cl_error_t CL_SUCCESS if found
* @return cl_error_t CL_EACCES if NOT found
* @return cl_error_t CL_E* if some error occured.
*/
cl_error_t cli_map_find(struct cli_map *m, const void *key, int32_t keysize);
/**
* @brief get the size of value obtained during the last map_find
*
* @param m
* @return int the value size on success
* @return int -1 on failure
*/
int cli_map_getvalue_size(struct cli_map *m);
/**
* @brief get the value obtained during the last map_find
*
* @param m
* @return void* the value on success
* @return void* NULL on failure
*/
void *cli_map_getvalue(struct cli_map *m);
/**
* @brief delete the map
*
* @param m
*/
void cli_map_delete(struct cli_map *m);
/******************************************************************************/
/* A set of unique keys (no values).
* The keys are just uint32_t numbers. */
/******************************************************************************/
struct cli_hashset {
uint32_t *keys;
uint32_t *bitmap;
mpool_t *mempool;
uint32_t capacity;
uint32_t mask;
uint32_t count;
uint32_t limit;
};
/**
* @brief Initialize hashset.
*
* When capacity * (load_factor/100) is reached, the hashset is growed.
*
* @param hs
* @param initial_capacity is rounded to nearest power of 2.
* @param load_factor is between 50 and 99.
* @return cl_error_t
*/
cl_error_t cli_hashset_init(struct cli_hashset *hs, size_t initial_capacity, uint8_t load_factor);
/**
* @brief Initialize hashset using the clamav MEMPOOL instead of just malloc/realloc.
*
* Comment: not presently used in any parsers or signature loaders or anything.
*
* @param hs
* @param initial_capacity is rounded to nearest power of 2.
* @param load_factor is between 50 and 99.
* @param mempool the mempool
* @return cl_error_t
*/
cl_error_t cli_hashset_init_pool(struct cli_hashset *hs, size_t initial_capacity, uint8_t load_factor, mpool_t *mempool);
/**
* @brief Add a key to the hashset.
*
* @param hs
* @param key
* @return cl_error_t
*/
cl_error_t cli_hashset_addkey(struct cli_hashset *hs, const uint32_t key);
/**
* @brief Remove a key from the hashset
*
* @param hs
* @param key
* @return cl_error_t
*/
cl_error_t cli_hashset_removekey(struct cli_hashset *hs, const uint32_t key);
/**
* @brief Find out if hashset contains akey
*
* @param hs
* @param key
* @return true If found
* @return false If not found
*/
bool cli_hashset_contains(const struct cli_hashset *hs, const uint32_t key);
/**
* @brief Destroy/deallocate a hashset.
*
* @param hs
*/
void cli_hashset_destroy(struct cli_hashset *hs);
/**
* @brief Convert the hashset to an array of uint32_t's
*
* It will allocate a 0-length array! You are still responsible for freeing it if
* it returns 0!
*
* You don't need to free anything if it returns -1.
*
* @param hs
* @param [out] array Allocated array of the length returned. Caller must free it.
* @return ssize_t The length of the array if success, or else -1 if failed.
*/
ssize_t cli_hashset_toarray(const struct cli_hashset *hs, uint32_t **array);
/**
* @brief Initializes the set without allocating memory
*
* Initializes the set without allocating memory, you can do lookups on it
* using _contains_maybe_noalloc. You need to initialize it using _init
* before using _addkey or _removekey though
*
* @param hs
*/
void cli_hashset_init_noalloc(struct cli_hashset *hs);
/**
* @brief
*
* this works like cli_hashset_contains (above), except that the hashset may
* have not been initialized by _init, only by _init_noalloc
*
* @param hs
* @param key
* @return true If found
* @return false If not found
*/
bool cli_hashset_contains_maybe_noalloc(const struct cli_hashset *hs, const uint32_t key);
#endif