<tt.rantala@gmail.com>
namespace rantala {
template <typename BucketType>
struct distblock {
unsigned char* ptr;
BucketType bucket;
};
template <typename BucketsizeType>
static void
msd_ci(unsigned char** strings, size_t n, size_t depth)
{
if (n < 32) {
insertion_sort(strings, n, depth);
return;
}
BucketsizeType bucketsize[256] = {0};
unsigned char* restrict oracle =
(unsigned char*) malloc(n);
for (size_t i=0; i < n; ++i)
oracle[i] = strings[i][depth];
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
static size_t bucketindex[256];
bucketindex[0] = bucketsize[0];
BucketsizeType last_bucket_size = bucketsize[0];
for (unsigned i=1; i < 256; ++i) {
bucketindex[i] = bucketindex[i-1] + bucketsize[i];
if (bucketsize[i]) last_bucket_size = bucketsize[i];
}
for (size_t i=0; i < n-last_bucket_size; ) {
distblock<uint8_t> tmp = { strings[i], oracle[i] };
while (1) {
if (--bucketindex[tmp.bucket] <= i)
break;
size_t backup_idx = bucketindex[tmp.bucket];
distblock<uint8_t> tmp2 = { strings[backup_idx], oracle[backup_idx] };
strings[backup_idx] = tmp.ptr;
oracle[backup_idx] = tmp.bucket;
tmp = tmp2;
}
strings[i] = tmp.ptr;
i += bucketsize[tmp.bucket];
}
free(oracle);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 256; ++i) {
if (bucketsize[i] == 0) continue;
msd_ci<BucketsizeType>(strings+bsum, bucketsize[i], depth+1);
bsum += bucketsize[i];
}
}
static void
msd_ci_adaptive(unsigned char** strings, size_t n, size_t depth)
{
if (n < 0x10000) {
msd_ci<uint16_t>(strings, n, depth);
return;
}
uint16_t* restrict oracle =
(uint16_t*) malloc(n*sizeof(uint16_t));
for (size_t i=0; i < n; ++i)
oracle[i] = get_char<uint16_t>(strings[i], depth);
size_t* restrict bucketsize = (size_t*)
calloc(0x10000, sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
static size_t bucketindex[0x10000];
bucketindex[0] = bucketsize[0];
size_t last_bucket_size = bucketsize[0];
for (unsigned i=1; i < 0x10000; ++i) {
bucketindex[i] = bucketindex[i-1] + bucketsize[i];
if (bucketsize[i]) last_bucket_size = bucketsize[i];
}
for (size_t i=0; i < n-last_bucket_size; ) {
distblock<uint16_t> tmp = { strings[i], oracle[i] };
while (1) {
if (--bucketindex[tmp.bucket] <= i)
break;
size_t backup_idx = bucketindex[tmp.bucket];
distblock<uint16_t> tmp2 = { strings[backup_idx], oracle[backup_idx] };
strings[backup_idx] = tmp.ptr;
oracle[backup_idx] = tmp.bucket;
tmp = tmp2;
}
strings[i] = tmp.ptr;
i += bucketsize[tmp.bucket];
}
free(oracle);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bucketsize[i] == 0) continue;
if (i & 0xFF) msd_ci_adaptive(strings+bsum,
bucketsize[i], depth+2);
bsum += bucketsize[i];
}
free(bucketsize);
}
void msd_ci(unsigned char** strings, size_t n)
{ msd_ci<size_t>(strings, n, 0); }
void msd_ci_adaptive(unsigned char** strings, size_t n)
{ msd_ci_adaptive(strings, n, 0); }
CONTESTANT_REGISTER(msd_ci,
"rantala/msd_CI",
"msd_CI")
CONTESTANT_REGISTER(msd_ci_adaptive,
"rantala/msd_CI_adaptive",
"msd_CI: adaptive")
}