http://dx.doi.org/10.1093/ietfec/e90-a.2.457
<tt.rantala@gmail.com>
namespace ng_cradix_rantala {
typedef size_t UINT;
typedef unsigned char BYTE, *LPBYTE, **LPPBYTE;
typedef unsigned char STR, *LPSTR, **LPPSTR, **STRPARR;
static const UINT AS = 256;
static const UINT BS = 4;
static const UINT AL = 0;
static const UINT AH = 255;
static const UINT IC = 20;
static const UINT KBC = 128;
static const UINT SS = 4096;
#define push(a, k, n, b) _sp->sa=a, _sp->sk=k, _sp->sn=n, (_sp++)->sb=b
#define pop(a, k, n, b) a=(--_sp)->sa, k=_sp->sk, n=_sp->sn, b=_sp->sb
#define stackempty() (_sp<=stack)
#define splittable(c) c > 0 && count[c] > IC
struct Stack {
LPSTR* sa; LPBYTE sk;
int sn, sb;
} stack[SS], *_sp=stack;
static void
insertion_sort(unsigned char** strings, int n, size_t depth)
{
for (unsigned char** i = strings + 1; --n > 0; ++i) {
unsigned char** j = i;
unsigned char* tmp = *i;
while (j > strings) {
unsigned char* s = *(j-1)+depth;
unsigned char* t = tmp+depth;
while (*s == *t && *s) {
++s;
++t;
}
if (*s <= *t) break;
*j = *(j-1);
--j;
}
*j = tmp;
}
}
static
void FillKeyBuffer(LPPSTR a, LPBYTE kb, UINT* count, UINT n, UINT d)
{
for (size_t i=0; i<n; ++i) {
const unsigned char* str = a[i];
unsigned j=0;
for (; j<BS; ++j) {
unsigned char c = str[d+j];
kb[BS*i+j] = c;
if (c==0) break;
}
if (j<BS) kb[BS*i+j] = 0;
}
for (size_t i=0; i<n; ++i) ++count[kb[BS*i]];
}
static
void RDFK(LPPSTR* GrpKP, LPPSTR a, UINT n, LPPSTR ta,
UINT* count, UINT d)
{
LPPSTR ak, tc; UINT i, *cptr, gs; unsigned char c=0;
for (i=0; i<n-n%32; i+=32) {
unsigned char cache[32];
for (unsigned j=0; j<32; ++j) cache[j] = a[i+j][d];
for (unsigned j=0; j<32; ++j) ++count[cache[j]];
}
for (; i<n; i++) count[a[i][d]]++;
cptr=&count[AL]; while (*cptr<1) cptr++;
if (*cptr<n) gs=n;
else { c=(cptr-&count[AL])+AL; gs=0; }
if (!gs) {
if (splittable(c)) push(a, 0, n, d+1);
else if (n>1 && c>0) insertion_sort(a, n, d);
count[c]=0; return;
}
GrpKP[AL]=a;
for (ak=a, i=AL; i<AH; i++) GrpKP[i+1]=ak+=count[i];
memcpy(ta, a, sizeof(LPSTR)*n);
for (i=0, tc=ta; i<n; i++, tc++) {
*GrpKP[ta[i][d]]=*tc; GrpKP[ta[i][d]]++;
}
for (ak=a, i=AL; i<AH; i++) {
if (splittable(i)) push(ak, 0, count[i], d+1);
else if (count[i]>1 && i>0) insertion_sort(ak, count[i], d);
ak+=count[i]; count[i]=0;
}
}
void cradix_rantala(LPPSTR a, UINT n)
{
UINT kbsd, kbsd1, i, j, stage, d, MEMSIZE;
UINT *cptr, gs, count[AS];
LPSTR tj, tk, ax, tl, kb, ss, tt, GrpKB[AS];
LPPSTR GrpKP[AS], ak, ta, tc, t;
if (sizeof(LPPSTR)>sizeof(unsigned char)*BS)
MEMSIZE=sizeof(LPPSTR);
else
MEMSIZE=sizeof(unsigned char)*BS;
ta = (LPPSTR)malloc(n * MEMSIZE);
tk = (LPBYTE)malloc(n * sizeof(unsigned char) * BS);
tj=tk;
push(a, tk, n, 0); for (i=AL; i<AH; i++) count[i]=0;
while (!stackempty()) {
pop(a, tk, n, stage);
if (tk) {
if ((d=stage%BS)!=0)
for (i=0, tl=tk; i<n; i++, tl+=(BS-d))
count[*tl]++;
else {
if (n>KBC)
FillKeyBuffer(a, tk, count, n, stage);
else {
RDFK(GrpKP, a, n, ta, count, stage);
continue;
}
}
cptr=&count[AL];
while (*cptr<1) cptr++;
if (*cptr<n) gs=n; else gs=0;
kbsd=BS-d, kbsd1=kbsd-1;
GrpKP[AL]=a; GrpKB[AL]=tk;
for (ak=a, ax=tk, i=AL; i<AH; i++) {
GrpKP[i+1]=ak+=count[i];
GrpKB[i+1]=ax+=count[i]*kbsd1;
}
memcpy(ta, a, sizeof(LPSTR)*gs);
for (i=0, ax=tk, tc=ta; i<gs;
i++, ax+=kbsd, tc++) {
*GrpKP[*ax]=*tc; GrpKP[*ax]++;
}
memcpy(ta, tk, sizeof(unsigned char)*n*kbsd);
for (i=0, kb=(LPBYTE)ta; i<n; i++, *t+=kbsd1) {
t=&GrpKB[*kb]; ss=*t; tt=kb+1;
for (j=0; j<kbsd1; j++)
{ *ss=*tt; ss++; tt++; }
kb+=kbsd;
}
for (ak=a, ax=tk, i=AL; i<AH; i++) {
if (splittable(i))
{ push(ak, ax, count[i], stage+1); }
else if (count[i]>1 && i>0)
insertion_sort(ak, count[i], stage);
ak+=count[i]; ax+=count[i]*(kbsd1);
count[i]=0;
}
}
else RDFK(GrpKP, a, n, ta, count, stage);
}
free((void*)ta);
free((void*)tj);
}
CONTESTANT_REGISTER(cradix_rantala,
"ng/rantala_cradix",
"CRadix by Waihong Ng and Katsuhiko Kakehi modified by Rantala")
#undef push
#undef pop
#undef stackempty
#undef splittable
}