<tb@panthema.net>
<http://www.gnu.org/licenses/>
namespace bingmann_radix_sort {
static const size_t g_inssort_threshold = 64;
typedef unsigned char* string;
static inline void
insertion_sort(string* str_begin, string* str_end, size_t depth)
{
for (string* i = str_begin + 1; i != str_end; ++i) {
string* j = i;
string tmp = *i;
while (j > str_begin) {
string s = *(j-1)+depth;
string t = tmp+depth;
while (*s == *t && *s != 0) ++s, ++t;
if (*s <= *t) break;
*j = *(j-1);
--j;
}
*j = tmp;
}
}
static void
msd_CE(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings, n, depth);
size_t bktsize[256] = { 0 };
for (size_t i=0; i < n; ++i)
++bktsize[strings[i][depth]];
string* sorted = (string*) malloc(n * sizeof(string));
size_t bktindex[256];
bktindex[0] = 0;
for (size_t i=1; i < 256; ++i)
bktindex[i] = bktindex[i-1]+bktsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[ bktindex[strings[i][depth]]++ ] = strings[i];
memcpy(strings, sorted, n*sizeof(string));
free(sorted);
size_t bsum = bktsize[0];
for (size_t i=1; i < 256; ++i) {
if (bktsize[i] == 0) continue;
msd_CE(strings+bsum, bktsize[i], depth+1);
bsum += bktsize[i];
}
}
void bingmann_msd_CE(string* strings, size_t n) { return msd_CE(strings,n,0); }
CONTESTANT_REGISTER(bingmann_msd_CE, "bingmann/msd_CE",
"bingmann/msd_CE (rantala CE original)")
static void
msd_CE2(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings, n, depth);
size_t bkt[256+1] = {0};
for (size_t i=0; i < n; ++i)
++bkt[strings[i][depth]];
string* sorted = (string*)malloc(n*sizeof(string));
for (size_t i=1; i <= 256; ++i)
bkt[i] += bkt[i-1];
for (size_t i=0; i < n; ++i)
sorted[ --bkt[strings[i][depth]] ] = strings[i];
memcpy(strings, sorted, n*sizeof(string));
free(sorted);
for (size_t i=1; i < 256; ++i) {
if (bkt[i] == bkt[i+1]) continue;
msd_CE2(strings+bkt[i], bkt[i+1]-bkt[i], depth+1);
}
}
void bingmann_msd_CE2(string* strings, size_t n) { return msd_CE2(strings,n,0); }
CONTESTANT_REGISTER(bingmann_msd_CE2, "bingmann/msd_CE2",
"bingmann/msd_CE2 (CE with reused prefix sum)")
static void
msd_CE3(string* str_begin, string* str_end, size_t depth)
{
if (str_begin + g_inssort_threshold > str_end)
return insertion_sort(str_begin, str_end, depth);
size_t bkt[256+1] = { 0 };
for (string* str = str_begin; str != str_end; ++str)
++bkt[(*str)[depth]];
string* sorted = (string*)malloc((str_end - str_begin) * sizeof(string));
for (size_t i=1; i <= 256; ++i)
bkt[i] += bkt[i-1];
for (string* str = str_begin; str != str_end; ++str)
sorted[ --bkt[(*str)[depth]] ] = *str;
memcpy(str_begin, sorted, (str_end - str_begin) * sizeof(string));
free(sorted);
for (size_t i=1; i < 256; ++i) {
if (bkt[i] == bkt[i+1]) continue;
msd_CE3(str_begin+bkt[i], str_begin+bkt[i+1], depth+1);
}
}
void bingmann_msd_CE3(string* strings, size_t n) { return msd_CE3(strings,strings+n,0); }
CONTESTANT_REGISTER(bingmann_msd_CE3, "bingmann/msd_CE3",
"bingmann/msd_CE3 (CE2 with iterators)")
template <typename BucketType>
struct distblock {
string ptr;
BucketType bkt;
};
template <typename BucketsizeType>
static void msd_CI(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold) {
inssort::inssort(strings, n, depth);
return;
}
BucketsizeType bktsize[256] = {0};
string restrict oracle = (string) malloc(n);
for (size_t i=0; i < n; ++i)
oracle[i] = strings[i][depth];
for (size_t i=0; i < n; ++i)
++bktsize[oracle[i]];
static size_t bktindex[256];
bktindex[0] = bktsize[0];
BucketsizeType last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0; i < n-last_bkt_size; ) {
distblock<uint8_t> tmp = { strings[i], oracle[i] };
while (1) {
if (--bktindex[tmp.bkt] <= i)
break;
size_t backup_idx = bktindex[tmp.bkt];
distblock<uint8_t> tmp2 = { strings[backup_idx], oracle[backup_idx] };
strings[backup_idx] = tmp.ptr;
oracle[backup_idx] = tmp.bkt;
tmp = tmp2;
}
strings[i] = tmp.ptr;
i += bktsize[tmp.bkt];
}
free(oracle);
size_t bsum = bktsize[0];
for (size_t i=1; i < 256; ++i) {
if (bktsize[i] == 0) continue;
msd_CI<BucketsizeType>(strings+bsum, bktsize[i], depth+1);
bsum += bktsize[i];
}
}
void bingmann_msd_CI(string* strings, size_t n) { msd_CI<size_t>(strings, n, 0); }
CONTESTANT_REGISTER(bingmann_msd_CI, "bingmann/msd_CI",
"bingmann/msd_CI (rantala CI original with oracle)")
template <typename BucketsizeType>
static void msd_CI2(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold) {
inssort::inssort(strings, n, depth);
return;
}
BucketsizeType bktsize[256] = {0};
for (size_t i=0; i < n; ++i)
++bktsize[ strings[i][depth] ];
size_t bktindex[256];
bktindex[0] = bktsize[0];
BucketsizeType last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0; i < n-last_bkt_size; ) {
distblock<uint8_t> tmp = { strings[i], strings[i][depth] };
while (1) {
if (--bktindex[tmp.bkt] <= i)
break;
size_t backup_idx = bktindex[tmp.bkt];
distblock<uint8_t> tmp2 = { strings[backup_idx], strings[backup_idx][depth] };
strings[backup_idx] = tmp.ptr;
tmp = tmp2;
}
strings[i] = tmp.ptr;
i += bktsize[tmp.bkt];
}
size_t bsum = bktsize[0];
for (size_t i=1; i < 256; ++i) {
if (bktsize[i] == 0) continue;
msd_CI2<BucketsizeType>(strings+bsum, bktsize[i], depth+1);
bsum += bktsize[i];
}
}
void bingmann_msd_CI2(string* strings, size_t n) { msd_CI2<size_t>(strings, n, 0); }
CONTESTANT_REGISTER(bingmann_msd_CI2, "bingmann/msd_CI2",
"bingmann/msd_CI2 (CI without oracle)")
static void
msd_CI3(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings, n, depth);
size_t bktsize[256] = { 0 };
for (size_t i=0; i < n; ++i)
++bktsize[ strings[i][depth] ];
size_t bktindex[256];
bktindex[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bktindex[i] = bktindex[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0, j; i < n-last_bkt_size; )
{
while ( (j = --bktindex[ strings[i][depth] ]) > i )
{
std::swap(strings[i], strings[j]);
}
i += bktsize[ strings[i][depth] ];
}
size_t bsum = bktsize[0];
for (size_t i=1; i < 256; ++i) {
if (bktsize[i] == 0) continue;
msd_CI3(strings+bsum, bktsize[i], depth+1);
bsum += bktsize[i];
}
}
void bingmann_msd_CI3(string* strings, size_t n) { msd_CI3(strings, n, 0); }
CONTESTANT_REGISTER(bingmann_msd_CI3, "bingmann/msd_CI3",
"bingmann/msd_CI3 (CI2 with swap operations)")
static void
msd_CI4(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings, n, depth);
size_t bktsize[256] = { 0 };
for (size_t i=0; i < n; ++i)
++bktsize[ strings[i][depth] ];
size_t bkt[256];
bkt[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bkt[i] = bkt[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0, j; i < n-last_bkt_size; )
{
string perm = strings[i];
while ( (j = --bkt[ perm[depth] ]) > i )
{
std::swap(perm, strings[j]);
}
strings[i] = perm;
i += bktsize[ perm[depth] ];
}
size_t bsum = bktsize[0];
for (size_t i=1; i < 256; ++i) {
if (bktsize[i] == 0) continue;
msd_CI4(strings+bsum, bktsize[i], depth+1);
bsum += bktsize[i];
}
}
void bingmann_msd_CI4(string* strings, size_t n) { msd_CI4(strings, n, 0); }
CONTESTANT_REGISTER(bingmann_msd_CI4, "bingmann/msd_CI4",
"bingmann/msd_CI4 (CI3 with swap cache)")
static inline size_t*
msd_CI5_bktsize(string* strings, size_t n, size_t depth)
{
uint8_t* charcache = new uint8_t[n];
for (size_t i=0; i < n; ++i)
charcache[i] = strings[i][depth];
size_t* bktsize = new size_t[256];
memset(bktsize, 0, 256 * sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bktsize[ charcache[i] ];
size_t bkt[256];
bkt[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bkt[i] = bkt[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0, j; i < n-last_bkt_size; )
{
string perm = strings[i];
uint8_t permch = charcache[i];
while ( (j = --bkt[ permch ]) > i )
{
std::swap(perm, strings[j]);
std::swap(permch, charcache[j]);
}
strings[i] = perm;
i += bktsize[ permch ];
}
delete [] charcache;
return bktsize;
}
void
msd_CI5(string* strings, size_t n, size_t depth)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings, n, depth);
size_t* bktsize = msd_CI5_bktsize(strings, n, depth);
size_t bsum = bktsize[0];
for (size_t i=1; i < 256; ++i) {
if (bktsize[i] == 0) continue;
msd_CI5(strings+bsum, bktsize[i], depth+1);
bsum += bktsize[i];
}
delete [] bktsize;
}
void bingmann_msd_CI5(string* strings, size_t n) { msd_CI5(strings, n, 0); }
CONTESTANT_REGISTER(bingmann_msd_CI5, "bingmann/msd_CI5",
"bingmann/msd_CI5 (CI4 with charcache)")
static inline size_t*
msd_CI5_16bit_bktsize(string* strings, size_t n, size_t depth)
{
static const size_t RADIX = 0x10000;
using namespace stringtools;
uint16_t* charcache = new uint16_t[n];
for (size_t i=0; i < n; ++i)
charcache[i] = get_char<uint16_t>(strings[i], depth);
size_t* bktsize = new size_t[RADIX];
memset(bktsize, 0, RADIX * sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bktsize[ charcache[i] ];
size_t bkt[RADIX];
bkt[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned i=1; i < RADIX; ++i) {
bkt[i] = bkt[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0, j; i < n-last_bkt_size; )
{
string perm = strings[i];
uint16_t permch = charcache[i];
while ( (j = --bkt[ permch ]) > i )
{
std::swap(perm, strings[j]);
std::swap(permch, charcache[j]);
}
strings[i] = perm;
i += bktsize[ permch ];
}
delete [] charcache;
return bktsize;
}
static void
msd_CI5_16bit(string* strings, size_t n, size_t depth)
{
if (n < 0x10000)
return msd_CI5(strings, n, depth);
size_t* bktsize = msd_CI5_16bit_bktsize(strings, n, depth);
size_t bsum = bktsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bktsize[i] == 0) continue;
if (i & 0xFF)
msd_CI5_16bit(strings+bsum, bktsize[i], depth+2);
bsum += bktsize[i];
}
delete [] bktsize;
}
void bingmann_msd_CI5_16bit(string* strings, size_t n) { msd_CI5_16bit(strings, n, 0); }
CONTESTANT_REGISTER(bingmann_msd_CI5_16bit, "bingmann/msd_CI5_16bit",
"bingmann/msd_CI5_16bit (CI5 with 16-bit radix)")
struct RadixStep_CE_nr
{
string* str;
size_t bkt[256+1];
size_t idx;
RadixStep_CE_nr(string* strings, size_t n, size_t depth)
{
memset(bkt, 0, sizeof(bkt));
for (size_t i=0; i < n; ++i)
++bkt[ strings[i][depth] ];
string* sorted = (string*)malloc(n * sizeof(string));
for (unsigned i=1; i <= 256; ++i)
bkt[i] += bkt[i-1];
for (size_t i=0; i < n; ++i)
sorted[ --bkt[strings[i][depth]] ] = strings[i];
memcpy(strings, sorted, n * sizeof(string));
free(sorted);
str = strings;
idx = 0;
}
};
static void
bingmann_msd_CE_nr(string* strings, size_t n)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings,n,0);
typedef RadixStep_CE_nr RadixStep;
std::stack< RadixStep, std::vector<RadixStep> > radixstack;
radixstack.push( RadixStep(strings,n,0) );
while ( radixstack.size() )
{
RadixStep& rs = radixstack.top();
while ( ++rs.idx < 256 )
{
if (rs.bkt[rs.idx] == rs.bkt[rs.idx+1])
;
else if (rs.bkt[rs.idx+1] < rs.bkt[rs.idx] + g_inssort_threshold)
{
inssort::inssort(rs.str + rs.bkt[ rs.idx ],
rs.bkt[ rs.idx+1 ] - rs.bkt[ rs.idx ],
radixstack.size());
}
else
{
radixstack.push( RadixStep(rs.str + rs.bkt[ rs.idx ],
rs.bkt[ rs.idx+1 ] - rs.bkt[ rs.idx ],
radixstack.size()) );
break;
}
}
if (radixstack.top().idx == 256) {
radixstack.pop();
}
}
}
CONTESTANT_REGISTER(bingmann_msd_CE_nr, "bingmann/msd_CE_nr",
"bingmann/msd_CE_nr (CE non-recursive)")
struct RadixStep_CE_nr2
{
string* str;
size_t bktsize[256];
size_t idx;
RadixStep_CE_nr2(string* strings, size_t n, size_t depth)
{
memset(bktsize, 0, sizeof(bktsize));
for (size_t i=0; i < n; ++i)
++bktsize[ strings[i][depth] ];
size_t bkt[256];
bkt[0] = 0;
for (size_t i=1; i < 256; ++i)
bkt[i] = bkt[i-1] + bktsize[i-1];
string* sorted = (string*)malloc(n * sizeof(string));
for (size_t i=0; i < n; ++i)
sorted[ bkt[strings[i][depth]]++ ] = strings[i];
memcpy(strings, sorted, n * sizeof(string));
free(sorted);
str = strings + bktsize[0];
idx = 0;
}
};
static void
bingmann_msd_CE_nr2(string* strings, size_t n)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings,n,0);
typedef RadixStep_CE_nr2 RadixStep;
std::stack< RadixStep, std::vector<RadixStep> > radixstack;
radixstack.push( RadixStep(strings,n,0) );
while ( radixstack.size() )
{
while ( radixstack.top().idx < 255 )
{
RadixStep& rs = radixstack.top();
++rs.idx;
if (rs.bktsize[rs.idx] == 0)
;
else if (rs.bktsize[rs.idx] < g_inssort_threshold)
{
inssort::inssort(rs.str, rs.bktsize[rs.idx], radixstack.size());
rs.str += rs.bktsize[ rs.idx ];
}
else
{
rs.str += rs.bktsize[rs.idx];
radixstack.push( RadixStep(rs.str - rs.bktsize[rs.idx], rs.bktsize[rs.idx], radixstack.size()) );
}
}
radixstack.pop();
}
}
CONTESTANT_REGISTER(bingmann_msd_CE_nr2, "bingmann/msd_CE_nr2",
"bingmann/msd_CE_nr2 (CE non-recursive)")
struct RadixStep_CI_nr
{
string* str;
size_t bkt[256+1];
size_t idx;
RadixStep_CI_nr(string* strings, size_t n, size_t depth)
{
size_t bktsize[256] = { 0 };
for (size_t i=0; i < n; ++i)
++bktsize[ strings[i][depth] ];
bkt[0] = bktsize[0];
size_t last_bkt_size = bktsize[0], last_bkt_num = 0;
for (unsigned i=1; i < 256; ++i) {
bkt[i] = bkt[i-1] + bktsize[i];
if (bktsize[i]) {
last_bkt_size = bktsize[i];
last_bkt_num = i;
}
}
bkt[256] = bkt[255];
assert(bkt[256] == n);
for (size_t i=0, j; i < n-last_bkt_size; )
{
string perm = strings[i];
if (bkt[ perm[depth] ] == i)
{
i += bktsize[ perm[depth] ];
continue;
}
while ( (j = --bkt[ perm[depth] ]) > i )
{
std::swap(perm, strings[j]);
}
assert(j == i);
strings[i] = perm;
i += bktsize[ perm[depth] ];
}
bkt[last_bkt_num] = n-last_bkt_size;
str = strings;
idx = 0;
}
};
static void
bingmann_msd_CI_nr(string* strings, size_t n)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings,n,0);
typedef RadixStep_CI_nr RadixStep;
std::stack< RadixStep, std::vector<RadixStep> > radixstack;
radixstack.push( RadixStep(strings,n,0) );
while ( radixstack.size() )
{
RadixStep& rs = radixstack.top();
while ( ++rs.idx < 256 )
{
if (rs.bkt[rs.idx] == rs.bkt[rs.idx+1])
;
else if (rs.bkt[rs.idx+1] < rs.bkt[rs.idx] + g_inssort_threshold)
{
inssort::inssort(rs.str + rs.bkt[ rs.idx ],
rs.bkt[ rs.idx+1 ] - rs.bkt[ rs.idx ],
radixstack.size());
}
else
{
radixstack.push( RadixStep(rs.str + rs.bkt[ rs.idx ],
rs.bkt[ rs.idx+1 ] - rs.bkt[ rs.idx ],
radixstack.size()) );
break;
}
}
if (radixstack.top().idx == 256) {
radixstack.pop();
}
}
}
CONTESTANT_REGISTER(bingmann_msd_CI_nr, "bingmann/msd_CI_nr",
"bingmann/msd_CI_nr (CI non-recursive)")
struct RadixStep_CI_nr2
{
string* str;
size_t idx;
size_t bktsize[256];
RadixStep_CI_nr2(string* strings, size_t n, size_t depth)
{
memset(bktsize,0,sizeof(bktsize));
for (size_t i=0; i < n; ++i)
++bktsize[ strings[i][depth] ];
size_t bkt[256];
bkt[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bkt[i] = bkt[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0, j; i < n-last_bkt_size; )
{
string perm = strings[i];
while ( (j = --bkt[ perm[depth] ]) > i )
{
std::swap(perm, strings[j]);
}
strings[i] = perm;
i += bktsize[ perm[depth] ];
}
str = strings + bktsize[0];
idx = 0;
}
};
static void
bingmann_msd_CI_nr2(string* strings, size_t n)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings,n,0);
typedef RadixStep_CI_nr2 RadixStep;
std::stack< RadixStep, std::vector<RadixStep> > radixstack;
radixstack.push( RadixStep(strings,n,0) );
while ( radixstack.size() )
{
while ( radixstack.top().idx < 255 )
{
RadixStep& rs = radixstack.top();
++rs.idx;
if (rs.bktsize[rs.idx] == 0)
;
else if (rs.bktsize[rs.idx] < g_inssort_threshold)
{
inssort::inssort(rs.str, rs.bktsize[rs.idx], radixstack.size());
rs.str += rs.bktsize[ rs.idx ];
}
else
{
rs.str += rs.bktsize[rs.idx];
radixstack.push( RadixStep(rs.str - rs.bktsize[rs.idx], rs.bktsize[rs.idx], radixstack.size()) );
}
}
radixstack.pop();
}
}
CONTESTANT_REGISTER(bingmann_msd_CI_nr2, "bingmann/msd_CI_nr2",
"bingmann/msd_CI_nr2 (CI non-recursive)")
struct RadixStep_CI_nr3
{
string* str;
size_t idx;
size_t bktsize[256];
RadixStep_CI_nr3(string* strings, size_t n, size_t depth, uint8_t* charcache)
{
for (size_t i=0; i < n; ++i)
charcache[i] = strings[i][depth];
memset(bktsize,0,sizeof(bktsize));
for (size_t i=0; i < n; ++i)
++bktsize[ charcache[i] ];
size_t bkt[256];
bkt[0] = bktsize[0];
size_t last_bkt_size = bktsize[0];
for (unsigned i=1; i < 256; ++i) {
bkt[i] = bkt[i-1] + bktsize[i];
if (bktsize[i]) last_bkt_size = bktsize[i];
}
for (size_t i=0, j; i < n-last_bkt_size; )
{
string perm = strings[i];
uint8_t permch = charcache[i];
while ( (j = --bkt[ permch ]) > i )
{
std::swap(perm, strings[j]);
std::swap(permch, charcache[j]);
}
strings[i] = perm;
i += bktsize[ permch ];
}
str = strings + bktsize[0];
idx = 0;
}
};
static void
bingmann_msd_CI_nr3(string* strings, size_t n)
{
if (n < g_inssort_threshold)
return inssort::inssort(strings,n,0);
typedef RadixStep_CI_nr3 RadixStep;
uint8_t* charcache = new uint8_t[n];
std::stack< RadixStep, std::vector<RadixStep> > radixstack;
radixstack.push( RadixStep(strings,n,0,charcache) );
while ( radixstack.size() )
{
while ( radixstack.top().idx < 255 )
{
RadixStep& rs = radixstack.top();
++rs.idx;
if (rs.bktsize[rs.idx] == 0)
;
else if (rs.bktsize[rs.idx] < g_inssort_threshold)
{
inssort::inssort(rs.str, rs.bktsize[rs.idx], radixstack.size());
rs.str += rs.bktsize[ rs.idx ];
}
else
{
rs.str += rs.bktsize[rs.idx];
radixstack.push( RadixStep(rs.str - rs.bktsize[rs.idx], rs.bktsize[rs.idx],
radixstack.size(), charcache) );
}
}
radixstack.pop();
}
delete [] charcache;
}
CONTESTANT_REGISTER(bingmann_msd_CI_nr3, "bingmann/msd_CI_nr3",
"bingmann/msd_CI_nr3 (CI non-recursive, charcache)")
}