<email@andreas-eberle.com>
<http://www.gnu.org/licenses/>
#ifndef EBERLE_MERGESORT_LCP_H_
#define EBERLE_MERGESORT_LCP_H_
#include "../tools/stringtools.h"
#include "../tools/stats_writer.h"
namespace eberle_mergesort_lcp
{
using namespace stringtools;
static inline void
eberle_lcp_merge(string* input1, lcp_t* lcps1, size_t length1, string* input2, lcp_t* lcps2, size_t length2, string* output, lcp_t* outputLcps)
{
const string* end1 = input1 + length1;
const string* end2 = input2 + length2;
lcp_t lcp1 = *lcps1;
lcp_t lcp2 = *lcps2;
while (input1 < end1 && input2 < end2)
{
if (lcp1 == lcp2)
{
string s1 = *input1 + lcp1;
string s2 = *input2 + lcp1;
while (*s1 != '\0' && *s1 == *s2)
s1++, s2++;
const lcp_t lcp = s1 - *input1;
if (*s1 <= *s2)
{
*output = *input1;
*outputLcps = lcp1;
++input1;
++lcps1;
lcp1 = *lcps1;
lcp2 = lcp;
}
else
{
*output = *input2;
*outputLcps = lcp2;
++input2;
++lcps2;
lcp1 = lcp;
lcp2 = *lcps2;
}
}
else if (lcp1 < lcp2)
{
*output = *input2;
*outputLcps = lcp2;
++input2;
++lcps2;
lcp2 = *lcps2;
}
else
{
*output = *input1;
*outputLcps = lcp1;
++input1;
++lcps1;
lcp1 = *lcps1;
}
++output;
++outputLcps;
}
if (input1 < end1)
{
memcpy(output, input1, (end1 - input1) * sizeof(string));
memcpy(outputLcps, lcps1, (end1 - input1) * sizeof(lcp_t));
*outputLcps = lcp1;
}
else
{
memcpy(output, input2, (end2 - input2) * sizeof(string));
memcpy(outputLcps, lcps2, (end2 - input2) * sizeof(lcp_t));
*outputLcps = lcp2;
}
}
static inline void
eberle_lcp_merge(const LcpStringPtr& input1, const LcpStringPtr& input2, const LcpStringPtr& output)
{
eberle_lcp_merge(input1.strings, input1.lcps, input1.size,
input2.strings, input2.lcps, input2.size,
output.strings, output.lcps);
}
static inline void
eberle_lcp_mergesort(string *strings, size_t length, const LcpStringPtr& tmp, const LcpStringPtr& out)
{
if (length <= 1)
{
out.setFirst(*strings, 0);
return;
}
const size_t length1 = length / 2;
const size_t length2 = length - length1;
const LcpStringPtr out1 = out.sub(0, length1);
const LcpStringPtr out2 = out.sub(length1, length2);
const LcpStringPtr tmp1 = tmp.sub(0, length1);
const LcpStringPtr tmp2 = tmp.sub(length1, length2);
eberle_lcp_mergesort(strings, length1, out1, tmp1);
eberle_lcp_mergesort(strings + length1, length2, out2, tmp2);
eberle_lcp_merge(tmp1, tmp2, out);
}
void
eberle_lcp_mergesort(string *strings, size_t n)
{
lcp_t* outputLcps = new lcp_t[n];
string* tmpStrings = new string[n];
lcp_t* tmpLcps = new lcp_t[n];
LcpStringPtr output(strings, outputLcps, n);
LcpStringPtr tmp(tmpStrings, tmpLcps, n);
eberle_lcp_mergesort(strings, n, tmp, output);
delete[] outputLcps;
delete[] tmpStrings;
delete[] tmpLcps;
}
CONTESTANT_REGISTER(eberle_lcp_mergesort, "eberle/mergesort_lcp_binary", "Binary Mergesort with LCP-usage by Andreas Eberle")
static inline void
eberle_lcp_merge(string* input1, lcp_t* lcps1, size_t length1,
string* input2, lcp_t* lcps2, size_t length2,
string* output)
{
const string* end1 = input1 + length1;
const string* end2 = input2 + length2;
lcp_t lcp1 = *lcps1;
lcp_t lcp2 = *lcps2;
while (input1 < end1 && input2 < end2)
{
if (lcp1 == lcp2)
{
string s1 = *input1 + lcp1;
string s2 = *input2 + lcp1;
while (*s1 != '\0' && *s1 == *s2)
s1++, s2++;
const lcp_t lcp = s1 - *input1;
if (*s1 <= *s2)
{
*output = *input1;
++input1;
++lcps1;
lcp1 = *lcps1;
lcp2 = lcp;
}
else
{
*output = *input2;
++input2;
++lcps2;
lcp1 = lcp;
lcp2 = *lcps2;
}
}
else if (lcp1 < lcp2)
{
*output = *input2;
++input2;
++lcps2;
lcp2 = *lcps2;
}
else
{
*output = *input1;
++input1;
++lcps1;
lcp1 = *lcps1;
}
++output;
}
if (input1 < end1)
{
memcpy(output, input1, (end1 - input1) * sizeof(string));
}
else
{
memcpy(output, input2, (end2 - input2) * sizeof(string));
}
}
static inline void
eberle_lcp_merge(const LcpStringPtr& input1, const LcpStringPtr& input2, string* output)
{
eberle_lcp_merge(input1.strings, input1.lcps, input1.size,
input2.strings, input2.lcps, input2.size, output);
}
static inline void
eberle_lcp_merge(string* input1, lcp_t* lcps1, char_type* cache1, size_t length1,
string* input2, lcp_t* lcps2, char_type* cache2, size_t length2,
string* output)
{
const string* end1 = input1 + length1;
const string* end2 = input2 + length2;
lcp_t lcp1 = *lcps1;
lcp_t lcp2 = *lcps2;
while (input1 < end1 && input2 < end2)
{
if (lcp1 == lcp2)
{
lcp_t lcp = lcp1;
char_type ch1 = *cache1;
char_type ch2 = *cache2;
assert((*input1)[lcp] == ch1);
assert((*input2)[lcp] == ch2);
while (ch1 != 0 && ch1 == ch2)
{
++lcp;
ch1 = (*input1)[lcp];
ch2 = (*input2)[lcp];
}
if (ch1 < ch2)
{
*output = *input1;
++input1, ++lcps1, ++cache1;
lcp1 = *lcps1;
lcp2 = lcp;
*cache2 = ch2;
}
else
{
*output = *input2;
++input2, ++lcps2, ++cache2;
lcp1 = lcp;
lcp2 = *lcps2;
*cache1 = ch1;
}
}
else if (lcp1 < lcp2)
{
*output = *input2;
++input2, ++lcps2, ++cache2;
lcp2 = *lcps2;
}
else
{
*output = *input1;
++input1, ++lcps1, ++cache1;
lcp1 = *lcps1;
}
++output;
}
if (input1 < end1)
{
memcpy(output, input1, (end1 - input1) * sizeof(string));
}
else
{
memcpy(output, input2, (end2 - input2) * sizeof(string));
}
}
static inline void
eberle_lcp_merge(const LcpCacheStringPtr& input1, const LcpCacheStringPtr& input2, string* output)
{
if (0)
{
stringtools::verify_lcp_cache(
input1.strings, input1.lcps, input1.cachedChars,
input1.size, -1);
stringtools::verify_lcp_cache(
input2.strings, input2.lcps, input2.cachedChars,
input2.size, -1);
}
#if 1
eberle_lcp_merge(input1.strings, input1.lcps, input1.cachedChars, input1.size,
input2.strings, input2.lcps, input2.cachedChars, input2.size,
output);
#else
eberle_lcp_merge(input1.strings, input1.lcps, input1.size,
input2.strings, input2.lcps, input2.size,
output);
#endif
}
}
#endif