<tb@panthema.net>
<http://www.gnu.org/licenses/>
namespace bingmann_sample_sortRBTCE {
using namespace bingmann_sample_sort;
static const bool debug_subtree = false;
static const bool debug_splitter_subtree = false;
struct SplitterTree
{
#if 0
static const size_t numsplitters2 = 16;
#else
static const size_t numsplitters2 = ( l2cache - sizeof(size_t) ) / (2 * sizeof(size_t));
#endif
static const size_t treebits = logfloor_<numsplitters2>::value;
static const size_t numsplitters = (1 << treebits) - 1;
static const size_t bktnum = 2 * numsplitters + 1;
typedef std::pair<key_type, size_t> samplepair_type;
key_type splitter_tree[numsplitters+1];
unsigned char splitter_lcp[numsplitters];
unsigned char splitter_subtree[numsplitters];
struct Builder
{
key_type* m_tree;
unsigned char* m_lcp_iter;
unsigned char* m_subtree_iter;
samplepair_type* m_samples;
std::vector<SplitterTree*>& m_treelist;
string* m_strings;
size_t m_depth;
inline Builder(SplitterTree& st, samplepair_type* samples, samplepair_type* samples_end,
std::vector<SplitterTree*>& treelist, string* strings, size_t depth)
: m_tree( st.splitter_tree ),
m_lcp_iter( st.splitter_lcp ),
m_subtree_iter( st.splitter_subtree ),
m_samples( samples ),
m_treelist( treelist ),
m_strings( strings ),
m_depth( depth )
{
size_t treenum = treelist.size()-1;
DBG(debug_splitter, "Builder for subtree[" << treenum << "] at depth " << depth);
std::fill(st.splitter_subtree, st.splitter_subtree + numsplitters, 0);
key_type sentinel = 0;
recurse(samples, samples_end, 1, sentinel);
assert(m_lcp_iter == st.splitter_lcp + numsplitters);
assert(m_subtree_iter == st.splitter_subtree + numsplitters);
st.splitter_lcp[0] &= 0x80;
DBG(debug_splitter, "done building subtree[" << treenum << "] at depth " << depth);
}
inline ptrdiff_t snum(samplepair_type* s) const
{
return (ptrdiff_t)(s - m_samples);
}
inline void keynode(key_type& prevkey, key_type& mykey,
samplepair_type* midlo, samplepair_type* midhi)
{
key_type xorSplit = prevkey ^ mykey;
DBG(debug_splitter, " lcp: " << toHex(prevkey) << " XOR " << toHex(mykey) << " = "
<< toHex(xorSplit) << " - " << count_high_zero_bits(xorSplit) << " bits = "
<< count_high_zero_bits(xorSplit) / 8 << " chars lcp");
*m_lcp_iter++ = (count_high_zero_bits(xorSplit) / 8)
| ((mykey & 0xFF) ? 0 : 0x80);
DBG(debug_splitter, "key range = [" << snum(midlo) << "," << snum(midhi) << ") - " << midhi - midlo);
if ( midhi - midlo >= (ptrdiff_t)(numsplitters/2)
&& (mykey & 0xFF) != 0
&& m_treelist.size() < 255 )
{
DBG(debug_subtree, "creating subtree for equal range of key "
<< toHex(mykey) << " with " << (midhi - midlo) << " samples");
for (samplepair_type* s = midlo; s < midhi; ++s)
{
size_t p = s->second;
s->first = get_char<key_type>(m_strings[p], m_depth + sizeof(key_type));
}
std::sort(midlo, midhi);
*m_subtree_iter = m_treelist.size();
m_treelist.push_back( new SplitterTree() );
SplitterTree::Builder(*m_treelist.back(), midlo, midhi,
m_treelist, m_strings, m_depth + sizeof(key_type));
DBG(debug_splitter_subtree, "done creating subtree " << int(*m_subtree_iter));
}
++m_subtree_iter;
}
key_type recurse(samplepair_type* lo, samplepair_type* hi, unsigned int treeidx,
key_type& rec_prevkey)
{
DBG(debug_splitter, "buildtree(" << snum(lo) << "," << snum(hi)
<< ", treeidx=" << treeidx << ")");
samplepair_type* mid = lo + (ptrdiff_t)(hi - lo) / 2;
DBG(debug_splitter, "tree[" << treeidx << "] = samples[" << snum(mid) << "] = "
<< toHex(mid->first));
key_type mykey = m_tree[treeidx] = mid->first;
samplepair_type* midlo = mid;
while (lo < midlo && (midlo-1)->first == mykey) midlo--;
samplepair_type* midhi = mid;
while (midhi+1 < hi && midhi->first == mykey) midhi++;
DBG(debug_splitter, "key range = [" << snum(midlo) << "," << snum(midhi) << ")");
if (2 * treeidx < numsplitters)
{
key_type prevkey = recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
keynode(prevkey, mykey, midlo, midhi);
return recurse(midhi, hi, 2 * treeidx + 1, mykey);
}
else
{
keynode(rec_prevkey, mykey, midlo, midhi);
return mykey;
}
}
};
static inline std::string binary(uint16_t v) {
char binstr[17];
binstr[16] = 0;
for (int i = 0; i < 16; i++) {
binstr[15-i] = (v & 1) ? '1' : '0';
v /= 2;
}
return binstr;
}
static inline unsigned int
treeid_to_bkt(unsigned int id, size_t treebits, size_t numsplitters)
{
assert(id > 0);
static const int bitmask = numsplitters;
int hi = treebits-32 + count_high_zero_bits<uint32_t>(id);
unsigned int bkt = ((id << (hi+1)) & bitmask) | (1 << hi);
return bkt;
}
inline unsigned int
find_bkt_tree_equal(const key_type& key)
{
unsigned int i = 1;
while ( i <= numsplitters )
{
if (key == splitter_tree[i])
return 2 * treeid_to_bkt(i,treebits,numsplitters) - 1;
else if (key < splitter_tree[i])
i = 2*i + 0;
else
i = 2*i + 1;
}
i -= numsplitters+1;
return 2 * i;
}
inline unsigned int
find_bkt_tree_asmequal(const key_type& key)
{
unsigned int i;
asm("mov $1, %%rax \n"
"1: \n"
"cmpq (%[splitter_tree],%%rax,8), %[key] \n"
"je 2f \n"
"lea (%%rax,%%rax), %%rax \n"
"lea 1(%%rax), %%rcx \n"
"cmova %%rcx, %%rax \n"
"cmp %[numsplitters1], %%rax \n"
"jb 1b \n"
"sub %[numsplitters1], %%rax \n"
"lea (%%rax,%%rax), %%rax \n"
"jmp 3f \n"
"2: \n"
"bsr %%rax, %%rdx \n"
"mov %[treebits], %%rcx \n"
"sub %%rdx, %%rcx \n"
"shl %%cl, %%rax \n"
"and %[numsplitters], %%rax \n"
"lea -1(%%rcx), %%rcx \n"
"mov $1, %%rdx \n"
"shl %%cl, %%rdx \n"
"or %%rdx, %%rax \n"
"lea -1(%%rax,%%rax), %%rax \n"
"3: \n"
: "=&a" (i)
: [key] "r" (key), [splitter_tree] "r" (splitter_tree),
[numsplitters1] "g" (numsplitters+1),
[treebits] "g" (treebits),
[numsplitters] "g" (numsplitters)
: "rcx", "rdx");
return i;
}
std::vector<uint16_t> bktcache;
size_t bktsize[bktnum], bktindex[bktnum];
inline void calc_bktsize_prefixsum(uint16_t* bktcache, size_t n)
{
memset(bktsize, 0, bktnum * sizeof(size_t));
for (uint16_t* b = bktcache; b != bktcache + n; ++b)
++bktsize[ *b ];
bktindex[0] = 0;
for (unsigned int i=1; i < bktnum; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i-1];
}
assert(bktindex[bktnum-1] + bktsize[bktnum-1] == n);
}
inline void recursive_permute(string* strings, size_t n, uint16_t* bktcache, string* sorted, std::vector<SplitterTree*>& treelist)
{
DBG(debug_recursion, "permuting " << n << " strings @ " << strings);
for (size_t i=0; i < n; ++i)
sorted[ bktindex[ bktcache[i] ]++ ] = strings[i];
memcpy(strings, sorted, n * sizeof(string));
{
std::vector<uint16_t> bktcache_delete;
bktcache_delete.swap(this->bktcache);
}
size_t i = 0, bsum = 0;
while (i < bktnum-1)
{
bsum += bktsize[i++];
if (splitter_subtree[i/2])
{
DBG(debug_subtree, "recursively permuting subtree " << int(splitter_subtree[i/2]) << " @ bkt " << i);
assert( splitter_subtree[i/2] < treelist.size() );
SplitterTree& t = *treelist[ splitter_subtree[i/2] ];
assert( bktsize[i] == t.bktcache.size() );
t.recursive_permute(strings + bsum, bktsize[i], t.bktcache.data(), sorted, treelist);
}
bsum += bktsize[i++];
}
bsum += bktsize[i++];
assert(i == bktnum && bsum == n);
}
template <unsigned int (SplitterTree::*find_bkt)(const key_type&)>
inline void recursive_sort(string* strings, size_t n, std::vector<SplitterTree*>& treelist, size_t depth)
{
DBG(debug_recursion, "sorting " << n << " strings @ " << strings << " in depth " << depth);
size_t i = 0, bsum = 0;
while (i < bktnum-1)
{
if (bktsize[i] > 1)
{
DBG(debug_recursion, "Recurse[" << depth << "]: < bkt " << bsum << " size " << bktsize[i] << " lcp " << int(splitter_lcp[i/2] & 0x7F));
sort<find_bkt>(strings+bsum, bktsize[i], depth + (splitter_lcp[i/2] & 0x7F));
}
bsum += bktsize[i++];
if (bktsize[i] > 1)
{
if ( splitter_lcp[i/2] & 0x80 ) {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " is done!");
}
else if ( splitter_subtree[i/2] )
{
assert( splitter_subtree[i/2] < treelist.size() );
SplitterTree& t = *treelist[ splitter_subtree[i/2] ];
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " lcp keydepth, into subtree " << int(splitter_subtree[i/2]) << "!");
t.recursive_sort<find_bkt>(strings + bsum, bktsize[i], treelist, depth + sizeof(key_type));
}
else {
DBG(debug_recursion, "Recurse[" << depth << "]: = bkt " << bsum << " size " << bktsize[i] << " lcp keydepth!");
sort<find_bkt>(strings+bsum, bktsize[i], depth + sizeof(key_type));
}
}
bsum += bktsize[i++];
}
if (bktsize[i] > 0)
{
DBG(debug_recursion, "Recurse[" << depth << "]: > bkt " << bsum << " size " << bktsize[i] << " no lcp");
sort<find_bkt>(strings+bsum, bktsize[i], depth);
}
bsum += bktsize[i++];
assert(i == bktnum && bsum == n);
}
template <unsigned int (SplitterTree::*find_bkt)(const key_type&)>
static void sort(string* strings, size_t n, size_t depth)
{
if (n < g_samplesort_smallsort)
{
g_rs_steps++;
g_timer.change(TM_SMALLSORT);
bingmann_radix_sort::msd_CI5(strings, n, depth);
g_timer.change(TM_GENERAL);
return;
}
g_ss_steps++;
g_timer.change(TM_MAKE_SAMPLE);
const size_t samplesize = oversample_factor * numsplitters;
static samplepair_type samples[ samplesize ];
LCGRandom rng(&strings);
for (unsigned int i = 0; i < samplesize; ++i)
{
size_t p = rng() % n;
samples[i] = samplepair_type( get_char<key_type>(strings[p],depth), p );
}
std::sort(samples, samples + samplesize);
g_timer.change(TM_MAKE_SPLITTER);
std::vector<SplitterTree*> treelist;
treelist.push_back( new SplitterTree() );
SplitterTree::Builder(*treelist.back(), samples, samples + samplesize,
treelist, strings, depth);
g_timer.change(TM_CLASSIFY);
uint16_t* bktcache = new uint16_t[n];
for (size_t si = 0; si < n; ++si)
{
unsigned int t = 0, d = 0;
key_type key = get_char<key_type>(strings[si], depth);
unsigned int b = (treelist[t]->*find_bkt)(key);
assert(b < bktnum);
bktcache[si] = b;
while ( (b & 1) && (t = treelist[t]->splitter_subtree[b/2]) != 0 )
{
d += sizeof(key_type);
key = get_char<key_type>(strings[si], depth + d);
b = (treelist[t]->*find_bkt)(key);
assert(b < bktnum);
treelist[t]->bktcache.push_back(b);
}
}
g_timer.change(TM_PREFIXSUM);
treelist[0]->calc_bktsize_prefixsum(bktcache, n);
for (unsigned int ti = 1; ti < treelist.size(); ++ti)
{
SplitterTree& t = *treelist[ti];
t.calc_bktsize_prefixsum(t.bktcache.data(), t.bktcache.size());
}
g_timer.change(TM_PERMUTE);
string* sorted = new string[n];
treelist[0]->recursive_permute(strings, n, bktcache, sorted, treelist);
delete [] sorted;
delete [] bktcache;
g_timer.change(TM_GENERAL);
treelist[0]->recursive_sort<find_bkt>(strings, n, treelist, depth);
}
};
void bingmann_sample_sortRBTCE(string* strings, size_t n)
{
sample_sort_pre();
SplitterTree::sort<&SplitterTree::find_bkt_tree_equal>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortRBTCE, "bingmann/sample_sortRBTCE",
"bingmann/sample_sortRBTCE (adapt binary tree equal, bkt cache)")
void bingmann_sample_sortRBTCEA(string* strings, size_t n)
{
sample_sort_pre();
SplitterTree::sort<&SplitterTree::find_bkt_tree_asmequal>(strings,n,0);
sample_sort_post();
}
CONTESTANT_REGISTER(bingmann_sample_sortRBTCEA, "bingmann/sample_sortRBTCEA",
"bingmann/sample_sortRBTCEA (adapt binary tree equal, asm CMOV, bkt cache)")
}