<tb@panthema.net>
<email@andreas-eberle.com>
<http://www.gnu.org/licenses/>
#ifndef STRINGPTR_H_
#define STRINGPTR_H_
#include <assert.h>
#include "debug.h"
#include <numa.h>
namespace stringtools {
typedef uintptr_t lcp_t;
struct LcpCacheStringPtr;
struct LcpStringPtr
{
public:
string * strings;
lcp_t* lcps;
size_t size;
public:
LcpStringPtr()
: strings(NULL), lcps(NULL), size(0)
{
}
LcpStringPtr(string* _strings, lcp_t* _lcps, size_t _size)
: strings(_strings), lcps(_lcps), size(_size)
{
}
inline bool empty() const
{
return (size == 0);
}
inline void
setFirst(string s, lcp_t lcp) const
{
assert(size > 0);
*strings = s;
*lcps = lcp;
}
inline void
setFirst(LcpStringPtr& ptr)
{
*strings = *ptr.strings;
*lcps = *ptr.lcps;
}
inline string&
firstString() const
{
assert(size > 0);
return *strings;
}
inline lcp_t&
firstLcp() const
{
assert(size > 0);
return *lcps;
}
inline void
copyFrom(LcpStringPtr& other, size_t length) const
{
memcpy(strings, other.strings, length * sizeof(string));
memcpy(lcps, other.lcps, length * sizeof(lcp_t));
}
inline void
copyStringsTo(string* destination, size_t length) const
{
memcpy(destination, strings, length * sizeof(string));
}
inline void
setLcp(size_t position, lcp_t value) const
{
assert(position < size);
lcps[position] = value;
}
inline LcpStringPtr&
operator++()
{
++strings;
++lcps;
--size;
return *this;
}
inline LcpStringPtr
sub(size_t offset, size_t n) const
{
assert(offset + n <= size);
return LcpStringPtr(strings + offset, lcps + offset, n);
}
inline LcpStringPtr
end() const
{
return sub(size, 0);
}
inline size_t
operator-(const LcpStringPtr& rhs) const
{
return strings - rhs.strings;
}
inline bool
operator<(const LcpStringPtr& rhs) const
{
return strings < rhs.strings;
}
};
struct LcpCacheStringPtr
{
public:
string * strings;
lcp_t* lcps;
char_type* cachedChars;
size_t size;
public:
LcpCacheStringPtr()
: strings(NULL), lcps(NULL), cachedChars(NULL), size(0)
{
}
LcpCacheStringPtr(string* _strings, lcp_t* _lcps, char_type* _cachedChars, size_t _size)
: strings(_strings), lcps(_lcps), cachedChars(_cachedChars), size(_size)
{
}
inline bool empty() const
{
return (size == 0);
}
inline void
setFirst(string s, lcp_t lcp) const
{
assert(size > 0);
*strings = s;
*lcps = lcp;
*cachedChars = s[lcp];
}
inline void
setFirst(string s, lcp_t lcp, char cachedCharacter) const
{
assert(size > 0);
*strings = s;
*lcps = lcp;
*cachedChars = cachedCharacter;
}
inline void
setFirst(LcpCacheStringPtr& ptr)
{
*strings = *ptr.strings;
*lcps = *ptr.lcps;
*cachedChars = *ptr.cachedChars;
}
inline string&
firstString() const
{
assert(size > 0);
return *strings;
}
inline lcp_t&
firstLcp() const
{
assert(size > 0);
return *lcps;
}
inline char_type&
firstCached() const
{
assert(size > 0);
return *cachedChars;
}
inline void
copyFrom(LcpCacheStringPtr& other, size_t length) const
{
memcpy(strings, other.strings, length * sizeof(string));
memcpy(lcps, other.lcps, length * sizeof(lcp_t));
memcpy(cachedChars, other.cachedChars, length * sizeof(char));
}
inline void
copyStringsTo(string* destination, size_t length) const
{
memcpy(destination, strings, length * sizeof(string));
}
inline void
calculateCache() const
{
for(unsigned i = 0; i < size; ++i){
cachedChars[i] = strings[i][lcps[i]];
}
}
inline void
allocateNumaMemory(int numaNode, size_t length)
{
#if 0
strings = new string[length];
lcps = new lcp_t[length];
cachedChars = new char_type[length];
numa_tonode_memory(strings, length * sizeof(string), numaNode);
numa_tonode_memory(lcps, length * sizeof(lcp_t), numaNode);
numa_tonode_memory(cachedChars, length * sizeof(char_type), numaNode);
#else
strings = (string*) numa_alloc_onnode(length * sizeof(string), numaNode);
lcps = (lcp_t*) numa_alloc_onnode(length * sizeof(lcp_t), numaNode);
cachedChars = (char_type*) numa_alloc_onnode(length * sizeof(char_type), numaNode);
#endif
size = length;
}
inline void
freeNumaMemory()
{
#if 0
delete [] strings;
delete [] lcps;
delete [] cachedChars;
#else
numa_free(strings, size * sizeof(string));
numa_free(lcps, size * sizeof(string));
numa_free(cachedChars, size * sizeof(char));
#endif
}
inline LcpCacheStringPtr&
operator++()
{
++strings;
++lcps;
++cachedChars;
--size;
return *this;
}
inline LcpCacheStringPtr
sub(size_t offset, size_t n) const
{
assert(offset + n <= size);
return LcpCacheStringPtr(strings + offset, lcps + offset, cachedChars + offset, n);
}
inline LcpStringPtr
subNoCache(size_t offset, size_t n) const
{
assert(offset + n <= size);
return LcpStringPtr(strings + offset, lcps + offset, n);
}
inline LcpCacheStringPtr
end() const
{
return sub(size, 0);
}
inline size_t
operator-(const LcpCacheStringPtr& rhs) const
{
return strings - rhs.strings;
}
inline bool
operator<(const LcpCacheStringPtr& rhs) const
{
return strings < rhs.strings;
}
inline size_t
binarySearch(string searched) const
{
size_t idx;
size_t l = 0;
size_t r = size - 1;
size_t hl = 0, hr = 0;
if(scmp(searched, strings[0], hl) <= 0)
{
return 0;
}
else if(scmp(searched, strings[r], hr) > 0)
{
return size;
}
else
{
while ((r - l) > 1)
{
size_t m = (l + r) / 2;
size_t h = std::min(hl, hr);
if(scmp(searched, strings[m], h) <= 0)
{
r = m;
hr = h;
}
else
{
l = m;
hl = h;
}
}
idx = r;
}
return idx;
}
};
template <bool WithLcp>
class StringShadowPtrBase
{
protected:
string *m_active, *m_shadow;
size_t m_size;
bool m_flipped;
public:
inline StringShadowPtrBase(string* original, string* shadow = NULL,
size_t size = 0, bool flipped = false)
: m_active(original), m_shadow(shadow), m_size(size), m_flipped(flipped)
{
}
inline bool flipped() const { return m_flipped; }
inline string* active() const { return m_active; }
inline string* shadow() const { return m_shadow; }
inline size_t size() const { return m_size; }
friend inline std::ostream& operator << (std::ostream& os, const StringShadowPtrBase& sp)
{
return os << '(' << sp.active() << '/' << sp.shadow() << '|' << sp.flipped() << ':' << sp.size() << ')';
}
inline StringShadowPtrBase sub(size_t offset, size_t size) const
{
assert(offset + size <= m_size);
return StringShadowPtrBase(m_active + offset, m_shadow + offset, size, m_flipped);
}
inline StringShadowPtrBase flip(size_t offset, size_t size) const
{
assert(offset + size <= m_size);
return StringShadowPtrBase(m_shadow + offset, m_active + offset, size, !m_flipped);
}
inline StringShadowPtrBase original() const
{
return m_flipped ? flip(0, m_size) : *this;
}
inline StringShadowPtrBase copy_back() const
{
if (!m_flipped) {
return *this;
}
else {
memcpy(m_shadow, m_active, m_size * sizeof(string));
return flip(0, m_size);
}
}
inline bool check() const
{
for (size_t i = 1; i < m_size; ++i)
assert(scmp(out(i-1), out(i)) <= 0);
return true;
}
inline string& str(size_t i) const
{
assert(i < m_size);
return m_active[i];
}
static inline bool with_lcp()
{
return WithLcp;
}
inline uintptr_t& lcp(size_t i) const
{
if (!WithLcp) assert(0);
assert(!m_flipped);
assert(i < m_size);
return ((uintptr_t*)m_shadow)[i];
}
inline void set_lcp(size_t i, const uintptr_t& v) const
{
if (!WithLcp) return;
assert(i > 0);
assert(i < m_size);
assert(v == calc_lcp(out(i-1), out(i)));
lcp(i) = v;
}
inline void fill_lcp(uintptr_t v)
{
if (!WithLcp) return;
for (size_t i = 1; i < m_size; ++i)
{
set_lcp(i, v);
set_cache(i, 0);
}
}
inline void set_cache(size_t, const char_type&) const
{
}
inline uintptr_t* lcparray() const
{
if (!WithLcp) assert(0);
assert(!m_flipped);
return (uintptr_t*)m_shadow;
}
inline string* output() const
{
assert(!m_flipped);
return m_active;
}
inline string& out(size_t i) const
{
assert(!m_flipped);
return str(i);
}
};
typedef StringShadowPtrBase<false> StringShadowPtr;
typedef StringShadowPtrBase<true> StringShadowLcpPtr;
template <bool WithLcp>
class StringShadowOutPtrBase
{
protected:
typedef StringShadowPtrBase<WithLcp> base_type;
base_type sp;
string *m_output;
public:
inline StringShadowOutPtrBase(string* original, string* shadow = NULL, string* output = NULL,
size_t size = 0, bool flipped = false)
: sp(original, shadow, size, flipped),
m_output(output)
{ }
inline bool flipped() const { return sp.flipped(); }
inline string* active() const { return sp.active(); }
inline string* shadow() const { return sp.shadow(); }
inline size_t size() const { return sp.size(); }
friend inline std::ostream& operator << (std::ostream& os, const StringShadowOutPtrBase& sp)
{
return os << '(' << sp.active() << '/' << sp.shadow() << '/' << sp.output()
<< '|' << sp.flipped() << ':' << sp.size() << ')';
}
inline StringShadowOutPtrBase sub(size_t offset, size_t size) const
{
assert(offset + size <= sp.size());
return StringShadowOutPtrBase(active() + offset, shadow() + offset, m_output + offset,
size, flipped());
}
inline StringShadowOutPtrBase flip(size_t offset, size_t size) const
{
assert(offset + size <= sp.size());
return StringShadowOutPtrBase(shadow() + offset, active() + offset, m_output + offset,
size, !flipped());
}
inline StringShadowOutPtrBase original() const
{
return flipped() ? flip(0, size()) : *this;
}
inline StringShadowOutPtrBase copy_back() const
{
memcpy(m_output, active(), size() * sizeof(string));
return original();
}
inline bool check() const
{
for (size_t i = 1; i < size(); ++i)
assert(scmp(out(i-1), out(i)) <= 0);
return true;
}
inline string& str(size_t i) const { return sp.str(i); }
static inline bool with_lcp() { return base_type::with_lcp(); }
inline uintptr_t& lcp(size_t i) const { return sp.lcp(i); }
inline void set_lcp(size_t i, const uintptr_t& v) const
{
if (!WithLcp) return;
assert(i > 0);
assert(i < size());
assert(v == calc_lcp(out(i-1), out(i)));
lcp(i) = v;
}
inline void fill_lcp(uintptr_t v)
{
if (!WithLcp) return;
for (size_t i = 1; i < size(); ++i)
{
set_lcp(i, v);
set_cache(i, 0);
}
}
inline void set_cache(size_t, const char_type&) const
{
}
inline uintptr_t* lcparray() const { return sp.lcparray(); }
inline string* output() const
{
return m_output;
}
inline string& out(size_t i) const
{
assert(i < size());
return m_output[i];
}
};
typedef StringShadowOutPtrBase<false> StringShadowOutPtr;
typedef StringShadowOutPtrBase<true> StringShadowLcpOutPtr;
class StringShadowLcpCacheOutPtr : public StringShadowLcpOutPtr
{
protected:
typedef StringShadowLcpCacheOutPtr self_type;
typedef StringShadowLcpOutPtr super_type;
char_type *m_cache;
public:
inline StringShadowLcpCacheOutPtr(string* original = NULL, string* shadow = NULL, string* output = NULL,
char_type* cache = NULL,
size_t size = 0, bool flipped = false)
: super_type(original, shadow, output, size, flipped),
m_cache(cache)
{ }
char_type* cache() const { return m_cache; }
inline self_type sub(size_t offset, size_t size) const
{
assert(offset + size <= this->size());
return self_type(active() + offset, shadow() + offset, output() + offset,
m_cache + offset, size, flipped());
}
inline self_type flip(size_t offset, size_t size) const
{
assert(offset + size <= this->size());
return self_type(shadow() + offset, active() + offset, m_output + offset,
m_cache + offset, size, !flipped());
}
inline self_type original() const
{
return flipped() ? flip(0, size()) : *this;
}
inline self_type copy_back() const
{
memcpy(super_type::m_output, active(), size() * sizeof(string));
return original();
}
inline void set_cache(size_t i, const char_type& c) const
{
assert(i < size());
m_cache[i] = c;
}
inline void fill_lcp(uintptr_t v)
{
if (!with_lcp()) return;
for (size_t i = 1; i < size(); ++i)
{
set_lcp(i, v);
set_cache(i, 0);
}
}
};
template<bool checkCache>
static inline bool
verify_lcp_cache(string* strings, lcp_t* lcps, char_type* cache, size_t n, lcp_t expectedFirstLcp)
{
bool allValid = true;
if (expectedFirstLcp != (lcp_t)-1)
{
if (lcps[0] != expectedFirstLcp)
{
std::cout << "lcp[0] = " << lcps[0] << " excepted " << expectedFirstLcp << std::endl;
allValid = false;
}
if (checkCache && *cache != strings[0][lcps[0]])
{
std::cout << "cache[0] = " << cache[0] << " excepted " << strings[0][lcps[0]] << std::endl;
allValid = false;
}
}
for (size_t i = 1; i < n; ++i)
{
string s1 = strings[i-1], s2 = strings[i];
size_t h = calc_lcp(s1, s2);
if (h != lcps[i])
{
std::cout << "lcp[" << i << "] = " << lcps[i] << " excepted " << h << std::endl;
allValid = false;
}
if (checkCache && cache[i] != s2[lcps[i]])
{
std::cout << "cache[" << i << "] = " << cache[i] << " excepted " << s2[lcps[i]] << std::endl;
allValid = false;
}
}
if (allValid)
std::cout << "All LCPs and cache values valid!" << std::endl;
else
std::cout << "Found invalid LCPS and/or cache values!" << std::endl;
return allValid;
}
static inline bool
verify_lcp(string* strings, lcp_t* lcps, size_t n, lcp_t expectedFirstLcp){
return verify_lcp_cache<false>(strings, lcps, NULL, n, expectedFirstLcp);
}
static inline bool
verify_lcp_cache(string* strings, lcp_t* lcps, char_type* cache, size_t n, lcp_t expectedFirstLcp)
{
return verify_lcp_cache<true>(strings, lcps, cache, n, expectedFirstLcp);
}
}
#endif