<tb@panthema.net>
<http://www.gnu.org/licenses/>
namespace bingmann_sample_sortBSC {
using namespace bingmann_sample_sort;
inline unsigned int
find_bkt_assembler(const key_type& key, const key_type* splitter, size_t leaves)
{
unsigned int lo;
#if 0
asm("xorl %%edx, %%edx \n"
"movl %[leaves], %%ecx \n"
".myL2985: \n"
"leal (%%rcx,%%rdx), %%eax \n"
"shrl %%eax \n"
"movl %%eax, %%edi \n"
"cmpq %[key], (%[splitter],%%rax,8) \n"
"jb .myL2987 \n"
".myL3019: \n"
"cmpl %%eax, %%edx \n"
"movl %%eax, %%ecx \n"
"jae .myL2989 \n"
"addl %%edx, %%eax \n"
"shrl %%eax \n"
"movl %%eax, %%edi \n"
"cmpq (%[splitter],%%rdi,8), %[key] \n"
"jbe .myL3019 \n"
".myL2987: \n"
"leal 1(%%rax), %%edx \n"
"cmpl %%edx, %%ecx \n"
"ja .myL2985 \n"
".myL2989: \n"
: "=d" (lo)
: [leaves] "g" (leaves), [key] "r" (key), [splitter] "r" (splitter)
: "eax", "ecx", "edi");
#endif
asm("xorl %%ecx, %%ecx \n"
"movl %[leaves], %%edx \n"
"1: \n"
"leal (%%rcx,%%rdx), %%eax \n"
"shrl %%eax \n"
"cmpq (%[splitter],%%rax,8), %[key] \n"
"cmovbe %%eax, %%edx \n"
"leal 1(%%rax), %%eax \n"
"cmova %%eax, %%ecx \n"
"cmpl %%edx, %%ecx \n"
"jb 1b \n"
: "=&c" (lo)
: [leaves] "g" (leaves), [key] "r" (key), [splitter] "r" (splitter)
: "eax", "edx");
#if 0
int pos = leaves-1;
while ( pos >= 0 && key <= splitter[pos] ) --pos;
pos++;
std::cout << "lo " << lo << " pos " << pos << "\n";
if (lo != pos) abort();
#endif
size_t b = lo * 2;
if (lo < leaves && splitter[lo] == key) b += 1;
return b;
}
template <unsigned int (*find_bkt)(const key_type&, const key_type*, size_t)>
void sample_sortBSC(string* strings, size_t n, size_t depth)
{
#if 0
static const size_t leaves = 32;
#else
static const size_t leaves = ( l2cache - sizeof(size_t) ) / (sizeof(key_type) + 2 * sizeof(size_t));
#endif
if (n < g_samplesort_smallsort)
{
g_rs_steps++;
return bingmann_radix_sort::msd_CI5(strings, n, depth);
}
g_ss_steps++;
size_t samplesize = oversample_factor * leaves;
key_type* samples = new key_type[ samplesize ];
LCGRandom rng(&samples);
for (unsigned int i = 0; i < samplesize; ++i)
{
samples[i] = get_char<key_type>(strings[ rng() % n ], depth);
}
std::sort(samples, samples + samplesize);
key_type splitter[leaves];
unsigned char splitter_lcp[leaves];
DBG(debug_splitter, "splitter:");
splitter_lcp[0] = 0;
for (size_t i = 0, j = oversample_factor/2; i < leaves; ++i)
{
splitter[i] = samples[j];
DBG(debug_splitter, "key " << toHex(splitter[i]));
if (i != 0) {
key_type xorSplit = splitter[i-1] ^ splitter[i];
DBG1(debug_splitter, " XOR -> " << toHex(xorSplit) << " - ");
DBG3(debug_splitter, count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
splitter_lcp[i] = count_high_zero_bits(xorSplit) / 8;
}
j += oversample_factor;
}
delete [] samples;
static const size_t bktnum = 2*leaves+1;
uint16_t* bktcache = new uint16_t[n];
for (size_t si = 0; si < n; ++si)
{
key_type key = get_char<key_type>(strings[si], depth);
unsigned int b = find_bkt(key, splitter, leaves);
assert(b < bktnum);
bktcache[si] = b;
}
size_t* bktsize = new size_t[bktnum];
memset(bktsize, 0, bktnum * sizeof(size_t));
for (size_t si = 0; si < n; ++si)
++bktsize[ bktcache[si] ];
if (debug_bucketsize)
{
DBG1(1, "bktsize: ");
for (size_t i = 0; i < bktnum; ++i)
{
DBG2(1, bktsize[i] << " ");
}
DBG3(1, "");
}
size_t bktindex[bktnum];
bktindex[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned int i=1; i < bktnum; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
assert(bktindex[bktnum-1] == n);
for (size_t i=0, j; i < n - last_bkt_size; )
{
string perm = strings[i];
uint16_t permbkt = bktcache[i];
while ( (j = --bktindex[ permbkt ]) > i )
{
std::swap(perm, strings[j]);
std::swap(permbkt, bktcache[j]);
}
strings[i] = perm;
i += bktsize[ permbkt ];
}
delete [] bktcache;
size_t i = 0, bsum = 0;
while (i < bktnum-1)
{
if (bktsize[i] > 1)
{
DBG(debug_recursion, "Recurse[" << depth << "]: < bkt " << bsum << " size " << bktsize[i] << " lcp " << int(splitter_lcp[i/2]));
sample_sortBSC<find_bkt>(strings+bsum, bktsize[i], depth + splitter_lcp[i/2]);
}
bsum += bktsize[i++];
if (bktsize[i] > 1)
{
if ( (splitter[i/2] & 0xFF) == 0 ) {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " is done!");
}
else {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " lcp keydepth!");
sample_sortBSC<find_bkt>(strings+bsum, bktsize[i], depth + sizeof(key_type));
}
}
bsum += bktsize[i++];
}
if (bktsize[i] > 0)
{
DBG(debug_recursion, "Recurse[" << depth << "]: > bkt " << bsum << " size " << bktsize[i] << " no lcp");
sample_sortBSC<find_bkt>(strings+bsum, bktsize[i], depth);
}
bsum += bktsize[i++];
assert(i == bktnum && bsum == n);
delete [] bktsize;
}
void bingmann_sample_sortBSC(string* strings, size_t n)
{
sample_sort_pre();
sample_sortBSC<bingmann_sample_sortBS::find_bkt_binsearch>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBSC, "bingmann/sample_sortBSC",
"bingmann/sample_sortBSC (binary search, bkt cache)")
void bingmann_sample_sortBSCA(string* strings, size_t n)
{
sample_sort_pre();
sample_sortBSC<find_bkt_assembler>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBSCA, "bingmann/sample_sortBSCA",
"bingmann/sample_sortBSCA (binary search, assembler CMOV, bkt cache)")
inline unsigned int
find_bkt_equal(const key_type& key, const key_type* splitter, size_t leaves)
{
unsigned int lo = 0, hi = leaves;
while ( lo < hi )
{
unsigned int mid = (lo + hi) >> 1;
assert(mid < leaves);
if (key == splitter[mid])
return 2 * mid + 1;
else if (key < splitter[mid])
hi = mid;
else
lo = mid + 1;
}
return 2 * lo;
}
inline unsigned int
find_bkt_asmequal(const key_type& key, const key_type* splitter, size_t leaves)
{
unsigned int lo;
asm("xorl %%ecx, %%ecx \n"
"movl %[leaves], %%edx \n"
"1: \n"
"leal (%%rcx,%%rdx), %%eax \n"
"shrl %%eax \n"
"cmpq (%[splitter],%%rax,8), %[key] \n"
"je 2f \n"
"cmovb %%eax, %%edx \n"
"leal 1(%%rax), %%eax \n"
"cmova %%eax, %%ecx \n"
"cmpl %%edx, %%ecx \n"
"jb 1b \n"
"leal (%%ecx,%%ecx), %%eax \n"
"jmp 3f \n"
"2: \n"
"leal 1(%%rax,%%rax), %%eax \n"
"3: \n"
: "=&a" (lo)
: [leaves] "g" (leaves), [key] "r" (key), [splitter] "r" (splitter)
: "ecx", "edx");
return lo;
}
void bingmann_sample_sortBSCE(string* strings, size_t n)
{
sample_sort_pre();
bingmann_sample_sortBSC::sample_sortBSC<find_bkt_equal>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBSCE, "bingmann/sample_sortBSCE",
"bingmann/sample_sortBSCE (binary search equal, bkt cache)")
void bingmann_sample_sortBSCEA(string* strings, size_t n)
{
sample_sort_pre();
bingmann_sample_sortBSC::sample_sortBSC<find_bkt_asmequal>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBSCEA, "bingmann/sample_sortBSCEA",
"bingmann/sample_sortBSCEA (binary search equal, assembler CMOV, bkt cache)")
}