<tt.rantala@gmail.com>
namespace rantala {
void
msd_CE0(unsigned char** strings, size_t n, size_t depth)
{
if (n < 32) {
insertion_sort(strings, n, depth);
return;
}
size_t bucketsize[256] = {0};
for (size_t i=0; i < n; ++i)
++bucketsize[strings[i][depth]];
unsigned char** sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
static size_t bucketindex[256];
bucketindex[0] = 0;
for (size_t i=1; i < 256; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[strings[i][depth]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
free(sorted);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 256; ++i) {
if (bucketsize[i] == 0) continue;
msd_CE0(strings+bsum, bucketsize[i], depth+1);
bsum += bucketsize[i];
}
}
void msd_CE0(unsigned char** strings, size_t n)
{ msd_CE0(strings, n, 0); }
CONTESTANT_REGISTER(msd_CE0,
"rantala/msd_CE0",
"msd_CE0: baseline")
void
msd_CE1(unsigned char** strings, size_t n, size_t depth)
{
if (n < 32) {
insertion_sort(strings, n, depth);
return;
}
size_t bucketsize[256] = {0};
unsigned char* restrict oracle =
(unsigned char*) malloc(n);
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i] = strings[i][depth]];
unsigned char** restrict sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
size_t bucketindex[256];
bucketindex[0] = 0;
for (size_t i=1; i < 256; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
free(sorted);
free(oracle);
size_t bsum = bucketsize[0];
for (unsigned short i=1; i < 256; ++i) {
if (bucketsize[i] == 0) continue;
msd_CE1(strings+bsum, bucketsize[i], depth+1);
bsum += bucketsize[i];
}
}
void msd_CE1(unsigned char** strings, size_t n)
{ msd_CE1(strings, n, 0); }
CONTESTANT_REGISTER(msd_CE1,
"rantala/msd_CE1",
"msd_CE1: oracle")
void
msd_CE2(unsigned char** strings, size_t n, size_t depth)
{
if (n < 32) {
insertion_sort(strings, n, depth);
return;
}
size_t bucketsize[256] = {0};
unsigned char* restrict oracle =
(unsigned char*) malloc(n);
for (size_t i=0; i < n; ++i)
oracle[i] = strings[i][depth];
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
unsigned char** restrict sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
size_t bucketindex[256];
bucketindex[0] = 0;
for (size_t i=1; i < 256; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
free(sorted);
free(oracle);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 256; ++i) {
if (bucketsize[i] == 0) continue;
msd_CE2(strings+bsum, bucketsize[i], depth+1);
bsum += bucketsize[i];
}
}
void msd_CE2(unsigned char** strings, size_t n)
{ msd_CE2(strings, n, 0); }
CONTESTANT_REGISTER(msd_CE2,
"rantala/msd_CE2",
"msd_CE2: oracle+loop fission")
void
msd_CE2_16bit(unsigned char** strings, size_t n, size_t depth)
{
if (n < 32) {
insertion_sort(strings, n, depth);
return;
}
uint16_t bucketsize[256] = {0};
unsigned char* restrict oracle =
(unsigned char*) malloc(n);
for (size_t i=0; i < n; ++i)
oracle[i] = strings[i][depth];
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
unsigned char** restrict sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
uint16_t bucketindex[256];
bucketindex[0] = 0;
for (size_t i=1; i < 256; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
free(sorted);
free(oracle);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 256; ++i) {
if (bucketsize[i] == 0) continue;
msd_CE2_16bit(strings+bsum, bucketsize[i], depth+1);
bsum += bucketsize[i];
}
}
void
msd_CE3(unsigned char** strings, size_t n, size_t depth)
{
if (n < 0x10000) {
msd_CE2(strings, n, depth);
return;
}
uint16_t* restrict oracle =
(uint16_t*) malloc(n*sizeof(uint16_t));
for (size_t i=0; i < n; ++i)
oracle[i] = get_char<uint16_t>(strings[i], depth);
size_t* restrict bucketsize = (size_t*)
calloc(0x10000, sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
unsigned char** sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
static size_t bucketindex[0x10000];
bucketindex[0] = 0;
for (size_t i=1; i < 0x10000; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
free(sorted);
free(oracle);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bucketsize[i] == 0) continue;
if (i & 0xFF) msd_CE3(strings+bsum,
bucketsize[i], depth+2);
bsum += bucketsize[i];
}
free(bucketsize);
}
void msd_CE3(unsigned char** strings, size_t n)
{ msd_CE3(strings, n, 0); }
CONTESTANT_REGISTER(msd_CE3,
"rantala/msd_CE3",
"msd_CE3: oracle+loop fission+adaptive")
static void
msd_CE4(unsigned char** strings, size_t n, size_t depth)
{
if (n < 0x10000) {
msd_CE2_16bit(strings, n, depth);
return;
}
uint16_t* restrict oracle =
(uint16_t*) malloc(n*sizeof(uint16_t));
for (size_t i=0; i < n; ++i)
oracle[i] = get_char<uint16_t>(strings[i], depth);
size_t* restrict bucketsize = (size_t*)
calloc(0x10000, sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
unsigned char** sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
static size_t bucketindex[0x10000];
bucketindex[0] = 0;
for (size_t i=1; i < 0x10000; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
free(sorted);
free(oracle);
size_t bsum = bucketsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bucketsize[i] == 0) continue;
if (i & 0xFF) msd_CE4(strings+bsum,
bucketsize[i], depth+2);
bsum += bucketsize[i];
}
free(bucketsize);
}
void msd_CE4(unsigned char** strings, size_t n)
{ msd_CE4(strings, n, 0); }
CONTESTANT_REGISTER(msd_CE4,
"rantala/msd_CE4",
"msd_CE4: oracle+loop fission+adaptive+16bit counter")
static void
msd_CE2_16bit_5(unsigned char** strings, size_t n, size_t depth,
unsigned char* oracle, unsigned char** sorted)
{
if (n < 32) {
insertion_sort(strings, n, depth);
return;
}
uint16_t bucketsize[256] = {0};
for (size_t i=0; i < n; ++i)
oracle[i] = strings[i][depth];
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
uint16_t bucketindex[256];
bucketindex[0] = 0;
for (size_t i=1; i < 256; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
size_t bsum = bucketsize[0];
for (size_t i=1; i < 256; ++i) {
if (bucketsize[i] == 0) continue;
msd_CE2_16bit_5(strings+bsum, bucketsize[i], depth+1,
oracle, sorted);
bsum += bucketsize[i];
}
}
static void
msd_CE5(unsigned char** strings, size_t n, size_t depth,
uint16_t* oracle, unsigned char** sorted)
{
if (n < 0x10000) {
msd_CE2_16bit_5(strings, n, depth,
(unsigned char*)oracle, sorted);
return;
}
for (size_t i=0; i < n; ++i)
oracle[i] = get_char<uint16_t>(strings[i], depth);
size_t* restrict bucketsize = (size_t*)
calloc(0x10000, sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
static size_t bucketindex[0x10000];
bucketindex[0] = 0;
for (size_t i=1; i < 0x10000; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
size_t bsum = bucketsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bucketsize[i] == 0) continue;
if (i & 0xFF) msd_CE5(strings+bsum, bucketsize[i],
depth+2, oracle, sorted);
bsum += bucketsize[i];
}
free(bucketsize);
}
void msd_CE5(unsigned char** strings, size_t n)
{
uint16_t* restrict oracle = (uint16_t*)
malloc(n*sizeof(uint16_t));
unsigned char** sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
msd_CE5(strings, n, 0, oracle, sorted);
free(oracle);
free(sorted);
}
CONTESTANT_REGISTER(msd_CE5,
"rantala/msd_CE5",
"msd_CE5: oracle+loop fission+adaptive+16bit counter+prealloc")
static void
msd_CE6(unsigned char** strings, size_t n, size_t depth,
uint16_t* oracle, unsigned char** sorted)
{
if (n < 0x10000) {
msd_CE2_16bit_5(strings, n, depth, (unsigned char*)oracle, sorted);
return;
}
{
size_t i;
for (i=0; i < n-n%2; i+=2) {
unsigned char* str1 = strings[i];
unsigned char* str2 = strings[i+1];
uint16_t ch1 = get_char<uint16_t>(str1, depth);
uint16_t ch2 = get_char<uint16_t>(str2, depth);
oracle[i] = ch1;
oracle[i+1] = ch2;
}
for (; i < n; ++i)
oracle[i] = get_char<uint16_t>(strings[i], depth);
}
size_t* restrict bucketsize = (size_t*)
calloc(0x10000, sizeof(size_t));
for (size_t i=0; i < n; ++i)
++bucketsize[oracle[i]];
static size_t bucketindex[0x10000];
bucketindex[0] = 0;
for (size_t i=1; i < 0x10000; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
size_t bsum = bucketsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bucketsize[i] == 0) continue;
if (i & 0xFF) msd_CE6(strings+bsum, bucketsize[i],
depth+2, oracle, sorted);
bsum += bucketsize[i];
}
free(bucketsize);
}
void msd_CE6(unsigned char** strings, size_t n)
{
uint16_t* restrict oracle = (uint16_t*)
malloc(n*sizeof(uint16_t));
unsigned char** sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
msd_CE6(strings, n, 0, oracle, sorted);
free(oracle);
free(sorted);
}
CONTESTANT_REGISTER(msd_CE6,
"rantala/msd_CE6",
"msd_CE6: oracle+loop fission+adaptive+16bit counter+prealloc+unroll")
static void
msd_CE7_(unsigned char** strings, size_t n, size_t depth,
uint16_t* oracle, unsigned char** sorted)
{
if (n < 0x10000) {
msd_CE2_16bit_5(strings, n, depth, (unsigned char*)oracle, sorted);
return;
}
{
size_t i;
for (i=0; i < n-n%2; i+=2) {
unsigned char* str1 = strings[i];
unsigned char* str2 = strings[i+1];
uint16_t ch1 = get_char<uint16_t>(str1, depth);
uint16_t ch2 = get_char<uint16_t>(str2, depth);
oracle[i ] = ch1;
oracle[i+1] = ch2;
}
for (; i < n; ++i)
oracle[i] = get_char<uint16_t>(strings[i], depth);
}
size_t* restrict bucketsize = (size_t*)
calloc(0x10000, sizeof(size_t));
int is_sorted = 1;
{
size_t i;
uint16_t prev_ch = oracle[0];
++bucketsize[prev_ch];
for (i=1; i < n; ++i) {
uint16_t ch = oracle[i];
++bucketsize[ch];
if (ch > prev_ch) {
is_sorted = 0;
++i;
break;
}
prev_ch = ch;
}
for (; i < n; ++i)
++bucketsize[oracle[i]];
}
if (is_sorted)
goto in_order;
static size_t bucketindex[0x10000];
bucketindex[0] = 0;
for (size_t i=1; i < 0x10000; ++i)
bucketindex[i] = bucketindex[i-1]+bucketsize[i-1];
for (size_t i=0; i < n; ++i)
sorted[bucketindex[oracle[i]]++] = strings[i];
memcpy(strings, sorted, n*sizeof(unsigned char*));
in_order:
size_t bsum = bucketsize[0];
for (size_t i=1; i < 0x10000; ++i) {
if (bucketsize[i] == 0) continue;
if (i & 0xFF) msd_CE7_(strings+bsum, bucketsize[i],
depth+2, oracle, sorted);
bsum += bucketsize[i];
}
free(bucketsize);
}
void msd_CE7(unsigned char** strings, size_t n)
{
uint16_t* restrict oracle = (uint16_t*)
malloc(n*sizeof(uint16_t));
unsigned char** sorted = (unsigned char**)
malloc(n*sizeof(unsigned char*));
msd_CE7_(strings, n, 0, oracle, sorted);
free(oracle);
free(sorted);
}
CONTESTANT_REGISTER(msd_CE7,
"rantala/msd_CE7",
"msd_CE7: oracle+loop fission+adaptive+16bit counter+prealloc+unroll+sortedness")
}