<tb@panthema.net>
<http://www.gnu.org/licenses/>
namespace bingmann_sample_sortBTCE {
using namespace bingmann_sample_sort;
#if 0
static const size_t numsplitters2 = 16;
static const size_t treebits = logfloor_<numsplitters2>::value;
static const size_t numsplitters = (1 << treebits) - 1;
#else
static const size_t numsplitters2 = ( l2cache - sizeof(size_t) ) / (2 * sizeof(size_t));
static const size_t treebits = logfloor_<numsplitters2>::value;
static const size_t numsplitters = (1 << treebits) - 1;
#endif
template <size_t treebits>
struct ClassifySimple
{
static const size_t numsplitters = (1 << treebits) - 1;
static inline unsigned int
find_bkt(const key_type& key, const key_type* splitter_tree0)
{
const key_type* splitter_tree = splitter_tree0 - 1;
unsigned int i = 1;
while ( i <= numsplitters )
{
if (key == splitter_tree[i])
return 2 * TreeCalculations<treebits>::level_to_inorder(i) - 1;
else if (key < splitter_tree[i])
i = 2*i + 0;
else
i = 2*i + 1;
}
i -= numsplitters+1;
return 2 * i;
}
};
template <size_t treebits>
struct ClassifyAssembler
{
static const size_t numsplitters = (1 << treebits) - 1;
static inline unsigned int
find_bkt(const key_type& key, const key_type* splitter_tree0)
{
const key_type* splitter_tree = splitter_tree0 - 1;
unsigned int i;
asm("mov $1, %%rax \n"
"1: \n"
"cmpq (%[splitter_tree],%%rax,8), %[key] \n"
"je 2f \n"
"lea (%%rax,%%rax), %%rax \n"
"lea 1(%%rax), %%rcx \n"
"cmova %%rcx, %%rax \n"
"cmp %[numsplitters1], %%rax \n"
"jb 1b \n"
"sub %[numsplitters1], %%rax \n"
"lea (%%rax,%%rax), %%rax \n"
"jmp 3f \n"
"2: \n"
"bsr %%rax, %%rdx \n"
"mov %[treebits], %%rcx \n"
"sub %%rdx, %%rcx \n"
"shl %%cl, %%rax \n"
"and %[numsplitters], %%rax \n"
"lea -1(%%rcx), %%rcx \n"
"mov $1, %%rdx \n"
"shl %%cl, %%rdx \n"
"or %%rdx, %%rax \n"
"lea -1(%%rax,%%rax), %%rax \n"
"3: \n"
: "=&a" (i)
: [key] "r" (key), [splitter_tree] "r" (splitter_tree),
[numsplitters1] "g" (numsplitters+1),
[treebits] "g" (treebits),
[numsplitters] "g" (numsplitters)
: "rcx", "rdx");
return i;
}
};
template <size_t treebits>
struct ClassifyUnroll
{
#define SPLITTER_TREE_STEP \
\
"cmpq (%[splitter_tree],%%rax,8), %[key] \n" \
"je 2f \n" \
"lea (%%rax,%%rax), %%rax \n" \
"lea 1(%%rax), %%rcx \n" \
"cmova %%rcx, %%rax \n"
#define SPLITTER_TREE_END \
\
"sub %[numsplitters1], %%rax \n" \
"lea (%%rax,%%rax), %%rax \n" \
"jmp 3f \n" \
"2: \n" \
"bsr %%rax, %%rdx \n" \
"mov %[treebits], %%rcx \n" \
"sub %%rdx, %%rcx \n" \
"shl %%cl, %%rax \n" \
"and %[numsplitters], %%rax \n" \
"lea -1(%%rcx), %%rcx \n" \
"mov $1, %%rdx \n" \
"shl %%cl, %%rdx \n" \
"or %%rdx, %%rax \n" \
"lea -1(%%rax,%%rax), %%rax \n" \
"3: \n"
};
template <>
struct ClassifyUnroll<11>
{
static const size_t treebits = 11;
static const size_t numsplitters = (1 << treebits) - 1;
static inline unsigned int
find_bkt(const key_type& key, const key_type* splitter_tree0)
{
const key_type* splitter_tree = splitter_tree0 - 1;
unsigned int i;
asm("mov $1, %%rax \n"
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_END
: "=&a" (i)
: [key] "r" (key), [splitter_tree] "r" (splitter_tree),
[numsplitters1] "g" (numsplitters+1),
[treebits] "g" (treebits),
[numsplitters] "g" (numsplitters)
: "rcx", "rdx");
return i;
}
};
template <>
struct ClassifyUnroll<12>
{
static const size_t treebits = 12;
static const size_t numsplitters = (1 << treebits) - 1;
static inline unsigned int
find_bkt(const key_type& key, const key_type* splitter_tree0)
{
const key_type* splitter_tree = splitter_tree0 - 1;
unsigned int i;
asm("mov $1, %%rax \n"
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_END
: "=&a" (i)
: [key] "r" (key), [splitter_tree] "r" (splitter_tree),
[numsplitters1] "g" (numsplitters+1),
[treebits] "g" (treebits),
[numsplitters] "g" (numsplitters)
: "rcx", "rdx");
return i;
}
};
template <>
struct ClassifyUnroll<13>
{
static const size_t treebits = 13;
static const size_t numsplitters = (1 << treebits) - 1;
static inline unsigned int
find_bkt(const key_type& key, const key_type* splitter_tree0)
{
const key_type* splitter_tree = splitter_tree0 - 1;
unsigned int i;
asm("mov $1, %%rax \n"
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_STEP
SPLITTER_TREE_END
: "=&a" (i)
: [key] "r" (key), [splitter_tree] "r" (splitter_tree),
[numsplitters1] "g" (numsplitters+1),
[treebits] "g" (treebits),
[numsplitters] "g" (numsplitters)
: "rcx", "rdx");
return i;
}
};
template <template <size_t> class Classify>
void sample_sortBTCE2(string* strings, size_t n, size_t depth)
{
if (n < g_samplesort_smallsort)
{
g_rs_steps++;
g_timer.change(TM_SMALLSORT);
bingmann_radix_sort::msd_CI5(strings, n, depth);
g_timer.change(TM_GENERAL);
return;
}
g_ss_steps++;
g_timer.change(TM_MAKE_SAMPLE);
const size_t samplesize = oversample_factor * numsplitters;
static key_type samples[ samplesize ];
LCGRandom rng(&strings);
for (unsigned int i = 0; i < samplesize; ++i)
{
samples[i] = get_char<key_type>(strings[ rng() % n ], depth);
}
std::sort(samples, samples + samplesize);
g_timer.change(TM_MAKE_SPLITTER);
key_type* splitter_tree = new key_type[numsplitters];
unsigned char* splitter_lcp = new unsigned char[numsplitters];
{
unsigned int treelvl[treebits];
int idx = 1;
for (unsigned int i = treebits; i > 0; ) {
treelvl[--i] = idx;
idx <<= 1;
}
key_type* splitter_tree1 = splitter_tree-1;
key_type prevsplitter = 0;
DBG(debug_splitter, "splitter:");
splitter_lcp[0] = 0;
for (size_t i = 0, j = oversample_factor/2; i < numsplitters; ++i)
{
const key_type& splitter = samples[j];
int l = __builtin_ctz(i+1);
DBG(debug_splitter, "splitter[" << i << "] on level " << l
<< " = tree[" << treelvl[l] << "] = key " << toHex(splitter));
splitter_tree1[ treelvl[l]++ ] = samples[j];
if (i != 0) {
key_type xorSplit = prevsplitter ^ splitter;
DBG1(debug_splitter, " XOR -> " << toHex(xorSplit) << " - ");
DBG3(debug_splitter, count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
splitter_lcp[i] = (count_high_zero_bits(xorSplit) / 8)
| ((splitter & 0xFF) ? 0 : 0x80);
}
prevsplitter = splitter;
j += oversample_factor;
}
}
if (debug_splitter_tree)
{
DBG1(1, "splitter_tree: ");
for (size_t i = 0; i < numsplitters; ++i)
{
DBG2(1, splitter_tree[i] << " ");
}
DBG3(1, "");
}
g_timer.change(TM_CLASSIFY);
uint16_t* bktcache = new uint16_t[n];
static const size_t bktnum = 2*numsplitters+1;
for (size_t si = 0; si < n; ++si)
{
key_type key = get_char<key_type>(strings[si], depth);
unsigned int b = Classify<treebits>::find_bkt(key, splitter_tree);
assert(b < bktnum);
bktcache[si] = b;
}
delete [] splitter_tree;
size_t* bktsize = new size_t[bktnum];
memset(bktsize, 0, bktnum * sizeof(size_t));
for (size_t si = 0; si < n; ++si)
++bktsize[ bktcache[si] ];
if (debug_bucketsize)
{
DBG1(1, "bktsize: ");
for (size_t i = 0; i < bktnum; ++i)
{
DBG2(1, bktsize[i] << " ");
}
DBG3(1, "");
}
g_timer.change(TM_PREFIXSUM);
size_t bktindex[bktnum];
bktindex[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned int i=1; i < bktnum; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
assert(bktindex[bktnum-1] == n);
g_timer.change(TM_PERMUTE);
for (size_t i=0, j; i < n - last_bkt_size; )
{
string perm = strings[i];
uint16_t permbkt = bktcache[i];
while ( (j = --bktindex[ permbkt ]) > i )
{
std::swap(perm, strings[j]);
std::swap(permbkt, bktcache[j]);
}
strings[i] = perm;
i += bktsize[ permbkt ];
}
delete [] bktcache;
g_timer.change(TM_GENERAL);
size_t i = 0, bsum = 0;
while (i < bktnum-1)
{
if (bktsize[i] > 1)
{
DBG(debug_recursion, "Recurse[" << depth << "]: < bkt " << bsum << " size " << bktsize[i] << " lcp " << int(splitter_lcp[i/2] & 0x7F));
sample_sortBTCE2<Classify>(strings+bsum, bktsize[i], depth + (splitter_lcp[i/2] & 0x7F));
}
bsum += bktsize[i++];
if (bktsize[i] > 1)
{
if ( splitter_lcp[i/2] & 0x80 ) {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " is done!");
}
else {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " lcp keydepth!");
sample_sortBTCE2<Classify>(strings+bsum, bktsize[i], depth + sizeof(key_type));
}
}
bsum += bktsize[i++];
}
if (bktsize[i] > 0)
{
DBG(debug_recursion, "Recurse[" << depth << "]: > bkt " << bsum << " size " << bktsize[i] << " no lcp");
sample_sortBTCE2<Classify>(strings+bsum, bktsize[i], depth);
}
bsum += bktsize[i++];
assert(i == bktnum && bsum == n);
delete [] splitter_lcp;
delete [] bktsize;
}
void bingmann_sample_sortBTCE2(string* strings, size_t n)
{
sample_sort_pre();
g_stats >> "numsplitters" << numsplitters
>> "splitter_treebits" << treebits;
sample_sortBTCE2<ClassifySimple>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBTCE2, "bingmann/sample_sortBTCE2",
"bingmann/sample_sortBTCE2 (binary tree equal, bkt cache)")
void bingmann_sample_sortBTCE2A(string* strings, size_t n)
{
sample_sort_pre();
g_stats >> "numsplitters" << numsplitters
>> "splitter_treebits" << treebits;
sample_sortBTCE2<ClassifyAssembler>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBTCE2A, "bingmann/sample_sortBTCE2A",
"bingmann/sample_sortBTCE2A (binary tree equal, asm CMOV, bkt cache)")
void bingmann_sample_sortBTCE2U(string* strings, size_t n)
{
sample_sort_pre();
g_stats >> "numsplitters" << numsplitters
>> "splitter_treebits" << treebits;
sample_sortBTCE2<ClassifyUnroll>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBTCE2U, "bingmann/sample_sortBTCE2U",
"bingmann/sample_sortBTCE2U (binary tree equal unroll, asm CMOV, bkt cache)")
class SampleSortBTCE3
{
public:
struct SplitterTree
{
key_type splitter_tree[numsplitters+1];
unsigned char splitter_lcp[numsplitters];
key_type rec_buildtree(key_type* samples, size_t lo, size_t hi, unsigned int treeidx,
key_type& rec_prevkey, size_t& iter)
{
DBG(debug_splitter, "rec_buildtree(" << lo << "," << hi << ", treeidx=" << treeidx << ")");
size_t mid = (lo + hi) >> 1;
DBG(debug_splitter, "tree[" << treeidx << "] = samples[" << mid << "] = "
<< toHex(samples[mid]));
key_type mykey = splitter_tree[treeidx] = samples[mid];
#if 1
size_t midlo = mid;
while (lo < midlo && samples[midlo-1] == mykey) midlo--;
size_t midhi = mid+1;
while (midhi < hi && samples[midhi] == mykey) midhi++;
if (midhi - midlo > 1)
DBG(0, "key range = [" << midlo << "," << midhi << ")");
#else
const size_t midlo = mid, midhi = mid+1;
#endif
if (2 * treeidx < numsplitters)
{
key_type prevkey = rec_buildtree(samples, lo, midlo, 2 * treeidx + 0, rec_prevkey, iter);
key_type xorSplit = prevkey ^ mykey;
DBG(debug_splitter, " lcp: " << toHex(prevkey) << " XOR " << toHex(mykey) << " = "
<< toHex(xorSplit) << " - " << count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
splitter_lcp[iter++] = (count_high_zero_bits(xorSplit) / 8)
| ((mykey & 0xFF) ? 0 : 0x80);
return rec_buildtree(samples, midhi, hi, 2 * treeidx + 1, mykey, iter);
}
else
{
key_type xorSplit = rec_prevkey ^ mykey;
DBG(debug_splitter, " lcp: " << toHex(rec_prevkey) << " XOR " << toHex(mykey) << " = "
<< toHex(xorSplit) << " - " << count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
splitter_lcp[iter++] = (count_high_zero_bits(xorSplit) / 8)
| ((mykey & 0xFF) ? 0 : 0x80);
return mykey;
}
}
};
struct TreeBuilder
{
key_type* m_tree;
unsigned char* m_lcp_iter;
key_type* m_samples;
TreeBuilder(SplitterTree& st, key_type* samples, size_t samplesize)
: m_tree( st.splitter_tree ),
m_lcp_iter( st.splitter_lcp ),
m_samples( samples )
{
key_type sentinel = 0;
recurse(samples, samples + samplesize, 1, sentinel);
assert(m_lcp_iter == st.splitter_lcp + numsplitters);
st.splitter_lcp[0] &= 0x80;
}
ptrdiff_t snum(key_type* s) const
{
return (ptrdiff_t)(s - m_samples);
}
key_type recurse(key_type* lo, key_type* hi, unsigned int treeidx,
key_type& rec_prevkey)
{
DBG(debug_splitter, "rec_buildtree(" << snum(lo) << "," << snum(hi)
<< ", treeidx=" << treeidx << ")");
key_type* mid = lo + (ptrdiff_t)(hi - lo) / 2;
DBG(debug_splitter, "tree[" << treeidx << "] = samples[" << snum(mid) << "] = "
<< toHex(*mid));
key_type mykey = m_tree[treeidx] = *mid;
#if 1
key_type* midlo = mid;
while (lo < midlo && *(midlo-1) == mykey) midlo--;
key_type* midhi = mid;
while (midhi+1 < hi && *midhi == mykey) midhi++;
if (midhi - midlo > 1)
DBG(0, "key range = [" << snum(midlo) << "," << snum(midhi) << ")");
#else
key_type *midlo = mid, *midhi = mid+1;
#endif
if (2 * treeidx < numsplitters)
{
key_type prevkey = recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
key_type xorSplit = prevkey ^ mykey;
DBG(debug_splitter, " lcp: " << toHex(prevkey) << " XOR " << toHex(mykey) << " = "
<< toHex(xorSplit) << " - " << count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
*m_lcp_iter++ = (count_high_zero_bits(xorSplit) / 8)
| ((mykey & 0xFF) ? 0 : 0x80);
return recurse(midhi, hi, 2 * treeidx + 1, mykey);
}
else
{
key_type xorSplit = rec_prevkey ^ mykey;
DBG(debug_splitter, " lcp: " << toHex(rec_prevkey) << " XOR " << toHex(mykey) << " = "
<< toHex(xorSplit) << " - " << count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
*m_lcp_iter++ = (count_high_zero_bits(xorSplit) / 8)
| ((mykey & 0xFF) ? 0 : 0x80);
return mykey;
}
}
};
template <template <size_t> class Classify>
static void sort(string* strings, size_t n, size_t depth)
{
if (n < g_samplesort_smallsort)
{
g_rs_steps++;
g_timer.change(TM_SMALLSORT);
bingmann_radix_sort::msd_CI5(strings, n, depth);
g_timer.change(TM_GENERAL);
return;
}
g_ss_steps++;
g_timer.change(TM_MAKE_SAMPLE);
const size_t samplesize = oversample_factor * numsplitters;
static key_type samples[ samplesize ];
LCGRandom rng(&strings);
for (unsigned int i = 0; i < samplesize; ++i)
{
samples[i] = get_char<key_type>(strings[ rng() % n ], depth);
}
std::sort(samples, samples + samplesize);
g_timer.change(TM_MAKE_SPLITTER);
#if 0
SplitterTree tree;
{
key_type sentinel = 0;
size_t iter = 0;
tree.rec_buildtree(samples, 0, samplesize, 1, sentinel, iter);
tree.splitter_lcp[0] = 0;
assert(iter == numsplitters);
}
#else
SplitterTree tree;
TreeBuilder(tree, samples, samplesize);
#endif
#if 0
key_type splitter_tree[numsplitters+1];
unsigned char splitter_lcp[numsplitters];
{
unsigned int treelvl[treebits];
int idx = 1;
for (unsigned int i = treebits; i > 0; ) {
treelvl[--i] = idx;
idx <<= 1;
}
key_type prevsplitter;
DBG(debug_splitter, "splitter:");
splitter_lcp[0] = 0;
for (size_t i = 0, j = oversample_factor/2; i < numsplitters; ++i)
{
const key_type& splitter = samples[j];
int l = __builtin_ctz(i+1);
DBG(debug_splitter, "splitter[" << i << "] on level " << l
<< " = tree[" << treelvl[l] << "] = key " << toHex(splitter));
splitter_tree[ treelvl[l]++ ] = splitter;
if (i != 0) {
key_type xorSplit = prevsplitter ^ splitter;
DBG1(debug_splitter, " XOR -> " << toHex(xorSplit) << " - ");
DBG3(debug_splitter, count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
splitter_lcp[i] = (count_high_zero_bits(xorSplit) / 8)
| ((splitter & 0xFF) ? 0 : 0x80);
}
prevsplitter = splitter;
j += oversample_factor;
}
}
for(size_t i = 0; i < numsplitters; ++i)
{
DBG(1, "splitter_tree[" << i+1 << "] = " << tree.splitter_tree[i+1] << " -? " << splitter_tree[i+1]);
assert(tree.splitter_tree[i+1] == splitter_tree[i+1]);
DBG(1, "splitter_lcp[" << i << "] = " << int(tree.splitter_lcp[i]) << " -? " << int(splitter_lcp[i]));
assert(tree.splitter_lcp[i] == splitter_lcp[i]);
}
#endif
g_timer.change(TM_CLASSIFY);
uint16_t* bktcache = new uint16_t[n];
static const size_t bktnum = 2*numsplitters+1;
for (size_t si = 0; si < n; ++si)
{
key_type key = get_char<key_type>(strings[si], depth);
unsigned int b = Classify<treebits>::find_bkt(key, tree.splitter_tree+1);
assert(b < bktnum);
bktcache[si] = b;
}
size_t* bktsize = new size_t[bktnum];
memset(bktsize, 0, bktnum * sizeof(size_t));
for (size_t si = 0; si < n; ++si)
++bktsize[ bktcache[si] ];
if (debug_bucketsize)
{
DBG1(1, "bktsize: ");
for (size_t i = 0; i < bktnum; ++i)
{
DBG2(1, bktsize[i] << " ");
}
DBG3(1, "");
}
g_timer.change(TM_PREFIXSUM);
size_t bktindex[bktnum];
bktindex[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned int i=1; i < bktnum; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
assert(bktindex[bktnum-1] == n);
g_timer.change(TM_PERMUTE);
for (size_t i=0, j; i < n - last_bkt_size; )
{
string perm = strings[i];
uint16_t permbkt = bktcache[i];
while ( (j = --bktindex[ permbkt ]) > i )
{
std::swap(perm, strings[j]);
std::swap(permbkt, bktcache[j]);
}
strings[i] = perm;
i += bktsize[ permbkt ];
}
delete [] bktcache;
g_timer.change(TM_GENERAL);
size_t i = 0, bsum = 0;
while (i < bktnum-1)
{
if (bktsize[i] > 1)
{
DBG(debug_recursion, "Recurse[" << depth << "]: < bkt " << bsum << " size " << bktsize[i] << " lcp " << int(tree.splitter_lcp[i/2] & 0x7F));
sort<Classify>(strings+bsum, bktsize[i], depth + (tree.splitter_lcp[i/2] & 0x7F));
}
bsum += bktsize[i++];
if (bktsize[i] > 1)
{
if ( tree.splitter_lcp[i/2] & 0x80 ) {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " is done!");
}
else {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " lcp keydepth!");
sort<Classify>(strings+bsum, bktsize[i], depth + sizeof(key_type));
}
}
bsum += bktsize[i++];
}
if (bktsize[i] > 0)
{
DBG(debug_recursion, "Recurse[" << depth << "]: > bkt " << bsum << " size " << bktsize[i] << " no lcp");
sort<Classify>(strings+bsum, bktsize[i], depth);
}
bsum += bktsize[i++];
assert(i == bktnum && bsum == n);
delete [] bktsize;
}
};
void bingmann_sample_sortBTCE3(string* strings, size_t n)
{
sample_sort_pre();
g_stats >> "numsplitters" << numsplitters
>> "splitter_treebits" << treebits;
SampleSortBTCE3::sort<ClassifySimple>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBTCE3, "bingmann/sample_sortBTCE3",
"bingmann/sample_sortBTCE3 (adapt binary tree equal, bkt cache)")
void bingmann_sample_sortBTCE3A(string* strings, size_t n)
{
sample_sort_pre();
g_stats >> "numsplitters" << numsplitters
>> "splitter_treebits" << treebits;
SampleSortBTCE3::sort<ClassifyAssembler>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBTCE3A, "bingmann/sample_sortBTCE3A",
"bingmann/sample_sortBTCE3A (adapt binary tree equal, asm CMOV, bkt cache)")
void bingmann_sample_sortBTCE3U(string* strings, size_t n)
{
sample_sort_pre();
g_stats >> "numsplitters" << numsplitters
>> "splitter_treebits" << treebits;
SampleSortBTCE3::sort<ClassifyUnroll>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortBTCE3U, "bingmann/sample_sortBTCE3U",
"bingmann/sample_sortBTCE3U (adapt binary tree equal unroll, asm CMOV, bkt cache)")
}