<tb@panthema.net>
<http://www.gnu.org/licenses/>
namespace bingmann_sample_sortBS {
using namespace bingmann_sample_sort;
inline unsigned int
find_bkt_binsearch(const key_type& key, const key_type* splitter, size_t leaves)
{
unsigned int lo = 0, hi = leaves;
while ( lo < hi )
{
unsigned int mid = (lo + hi) >> 1;
assert(mid < leaves);
if (key <= splitter[mid])
hi = mid;
else
lo = mid + 1;
}
#if 0
int pos = leaves-1;
while ( pos >= 0 && key <= splitter[pos] ) --pos;
pos++;
assert(lo == pos);
#endif
size_t b = lo * 2;
if (lo < leaves && splitter[lo] == key) b += 1;
return b;
}
void sample_sortBS(string* strings, size_t n, size_t depth)
{
#if 0
static const size_t leaves = 32;
#else
static const size_t leaves = ( l2cache - sizeof(size_t) ) / (sizeof(key_type) + 2 * sizeof(size_t));
#endif
if (n < g_samplesort_smallsort)
{
g_rs_steps++;
return bingmann_radix_sort::msd_CI5(strings, n, depth);
}
g_ss_steps++;
size_t samplesize = oversample_factor * leaves;
key_type* samples = new key_type[ samplesize ];
LCGRandom rng(&samples);
for (unsigned int i = 0; i < samplesize; ++i)
{
samples[i] = get_char<key_type>(strings[ rng() % n ], depth);
}
std::sort(samples, samples + samplesize);
key_type splitter[leaves];
unsigned char splitter_lcp[leaves];
DBG(debug_splitter, "splitter:");
splitter_lcp[0] = 0;
for (size_t i = 0, j = oversample_factor/2; i < leaves; ++i)
{
splitter[i] = samples[j];
DBG(debug_splitter, "key " << toHex(splitter[i]));
if (i != 0) {
key_type xorSplit = splitter[i-1] ^ splitter[i];
DBG1(debug_splitter, " XOR -> " << toHex(xorSplit) << " - ");
DBG3(debug_splitter, count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
splitter_lcp[i] = count_high_zero_bits(xorSplit) / 8;
}
j += oversample_factor;
}
delete [] samples;
static const size_t bktnum = 2*leaves+1;
size_t* bktsize = new size_t[bktnum];
memset(bktsize, 0, bktnum * sizeof(size_t));
for (size_t si = 0; si < n; ++si)
{
key_type key = get_char<key_type>(strings[si], depth);
unsigned int b = find_bkt_binsearch(key, splitter, leaves);
assert(b < bktnum);
++bktsize[ b ];
}
if (debug_bucketsize)
{
DBG1(1, "bktsize: ");
for (size_t i = 0; i < bktnum; ++i)
{
DBG2(1, bktsize[i] << " ");
}
DBG3(1, "");
}
size_t bktindex[bktnum];
bktindex[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned int i=1; i < bktnum; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
assert(bktindex[bktnum-1] == n);
for (size_t i=0, j; i < n - last_bkt_size; )
{
string perm = strings[i];
key_type key;
unsigned int b;
while (1)
{
key = get_char<key_type>(perm, depth);
b = find_bkt_binsearch(key, splitter, leaves);
j = --bktindex[ b ];
if ( j <= i )
break;
std::swap(perm, strings[j]);
}
strings[i] = perm;
i += bktsize[ b ];
}
size_t i = 0, bsum = 0;
while (i < bktnum-1)
{
if (bktsize[i] > 1)
{
DBG(debug_recursion, "Recurse[" << depth << "]: < bkt " << bsum << " size " << bktsize[i] << " lcp " << int(splitter_lcp[i/2]));
sample_sortBS(strings+bsum, bktsize[i], depth + splitter_lcp[i/2]);
}
bsum += bktsize[i++];
if (bktsize[i] > 1)
{
if ( (splitter[i/2] & 0xFF) == 0 ) {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " is done!");
}
else {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " lcp keydepth!");
sample_sortBS(strings+bsum, bktsize[i], depth + sizeof(key_type));
}
}
bsum += bktsize[i++];
}
if (bktsize[i] > 0)
{
DBG(debug_recursion, "Recurse[" << depth << "]: > bkt " << bsum << " size " << bktsize[i] << " no lcp");
sample_sortBS(strings+bsum, bktsize[i], depth);
}
bsum += bktsize[i++];
assert(i == bktnum && bsum == n);
delete [] bktsize;
}
void bingmann_sample_sortBS(string* strings, size_t n)
{
sample_sort_pre();
sample_sortBS(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBS, "bingmann/sample_sortBS",
"bingmann/sample_sortBS (binary search, no cache)")
}