<tb@panthema.net>
<http://www.gnu.org/licenses/>
#ifndef LP_HASH_TABLE_H_
#define LP_HASH_TABLE_H_
#include <vector>
#include <iostream>
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
namespace lp_hash_table {
template <typename KeyType>
inline size_t hash(const KeyType& key, unsigned int i);
template <typename KeyType>
inline size_t hash(const KeyType& key);
template <>
inline size_t hash(const uint32_t& key, unsigned int i)
{
static const uint32_t factor[] = {
2654435769u,
3474701532u,
3373259426u,
2918732888u,
3037000499u,
3719550786u,
};
return (key * factor[i]);
}
template <>
inline size_t hash(const uint32_t& key)
{
return hash(key,0);
}
template <>
inline size_t hash(const uint64_t& key, unsigned int i)
{
static const uint64_t factor[] = {
11400714819323198485llu,
14923729446516375050llu,
14488038916154245684llu,
12535862302449814170llu,
13043817825332782212llu,
15975348984942515101llu,
};
return (key * factor[i]);
}
template <>
inline size_t hash(const uint64_t& key)
{
return hash(key,0);
}
template <typename KeyType>
class BloomFilter
{
public:
typedef KeyType key_type;
public:
size_t m_M;
int m_K;
protected:
std::vector<size_t> m_bits;
inline void set_bit(unsigned int b)
{
unsigned int off = b / (8 * sizeof(size_t));
unsigned int sht = b % (8 * sizeof(size_t));
m_bits[off] |= (((size_t)1) << sht);
}
inline size_t get_bit(unsigned int b) const
{
unsigned int off = b / (8 * sizeof(size_t));
unsigned int sht = b % (8 * sizeof(size_t));
return (m_bits[off] & (((size_t)1) << sht));
}
public:
explicit BloomFilter(size_t M, int K)
: m_M(M),
m_K(K),
m_bits( ((M/8) + sizeof(size_t)-1) / sizeof(size_t), 0 )
{
}
inline void clear()
{
std::fill(m_bits.begin(), m_bits.end(), 0);
}
inline void set(const key_type& key)
{
for (int k = 0; k < m_K; ++k)
{
set_bit( hash(key,k) % m_M );
}
}
inline bool get(const key_type& key) const
{
for (int k = 0; k < m_K; ++k)
{
if (!get_bit( hash(key,k) % m_M )) return false;
}
return true;
}
};
enum result_type {
FOUND = 1, NOT_FOUND = 0, UNKNOWN = -1
};
template <typename KeyType, typename ValueType>
class LPHashTable
{
public:
typedef KeyType key_type;
typedef ValueType value_type;
protected:
typedef uint32_t ts_type;
struct Cell
{
key_type key;
value_type value;
ts_type ts;
};
BloomFilter<key_type> m_bloom;
std::vector<Cell> m_cell;
ts_type m_invalid_ts;
ts_type m_curr_ts;
unsigned int m_probe_length;
public:
size_t m_count_insert;
size_t m_count_eviction;
size_t m_count_found;
size_t m_count_notfound;
size_t m_count_unknown;
public:
explicit LPHashTable(size_t M)
: m_bloom( (M * 1/8) * 8, 1 ),
m_cell( (M * 7/8) / sizeof(Cell) ),
m_invalid_ts(0), m_curr_ts(0),
m_probe_length( sqrt(m_cell.size()) ),
m_count_insert(0), m_count_found(0),
m_count_notfound(0), m_count_unknown(0)
{
for (size_t i = 0; i < m_cell.size(); ++i)
m_cell[i].ts = m_invalid_ts;
}
void clear()
{
m_invalid_ts = m_curr_ts;
m_bloom.clear();
}
void set(const key_type& key, const value_type& value)
{
++m_count_insert;
size_t idx = hash(key) % m_cell.size();
if (m_curr_ts == std::numeric_limits<ts_type>::max())
{
for (size_t i = 0; i < m_cell.size(); ++i)
m_cell[i].ts = 1;
m_invalid_ts = 0;
m_curr_ts = 1;
}
++m_curr_ts;
ts_type chain_min_ts = m_curr_ts;
size_t chain_min_idx = -1;
unsigned int probes = 0;
while(
(m_cell[idx].ts > m_invalid_ts) &&
(m_cell[idx].key != key)
)
{
if (++probes > m_probe_length)
{
assert(chain_min_idx != (size_t)-1);
idx = chain_min_idx;
assert(0);
++m_count_eviction;
m_bloom.set( m_cell[idx].key );
assert( m_bloom.get(m_cell[idx].key) );
break;
}
if (m_cell[idx].ts < chain_min_ts) {
chain_min_ts = m_cell[idx].ts;
chain_min_idx = idx;
}
++idx;
idx %= m_cell.size();
}
m_cell[idx].key = key;
m_cell[idx].value = value;
m_cell[idx].ts = m_curr_ts;
}
result_type get(const key_type& key, value_type& value)
{
size_t idx = hash(key) % m_cell.size();
unsigned int probes = 0;
while(
(m_cell[idx].ts > m_invalid_ts) &&
(m_cell[idx].key != key)
)
{
if (++probes > m_probe_length)
break;
++idx;
idx %= m_cell.size();
}
if (m_cell[idx].key == key &&
m_cell[idx].ts > m_invalid_ts)
{
value = m_cell[idx].value;
++m_count_found;
return FOUND;
}
else
{
if (m_bloom.get(key)) {
++m_count_unknown;
return UNKNOWN;
}
else {
++m_count_notfound;
return NOT_FOUND;
}
}
}
};
static int testmain()
{
LPHashTable<uint32_t,uint32_t> ht(2*1024);
srand(2344234);
for(int i = 0; i < 1000; ++i)
{
int k = i;
ht.set(k,i);
}
srand(2344234);
for(int i = 0; i < 2000; ++i)
{
int k = i;
uint32_t out;
result_type r = ht.get(k,out);
std::cout << "i = " << i << " - " << r << "\n";
if (i < 1000)
assert( r == FOUND || r == UNKNOWN );
else
assert( r == NOT_FOUND || r == UNKNOWN );
}
return 0;
}
}
#endif