<tt.rantala@gmail.com>
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <boost/array.hpp>
#include "tools/debug.h"
#include "tools/get_char.h"
#include "tools/vector_malloc.h"
#include "tools/vector_realloc.h"
#include "tools/vector_block.h"
#include "tools/vector_bagwell.h"
#include "tools/vector_brodnik.h"
#include "../tools/contest.h"
#include "../sequential/bs-mkqs.h"
namespace rantala_burstsort2 {
using namespace rantala;
template <typename CharT, typename BucketT>
struct TrieNode
{
std::vector<void*> _buckets;
TrieNode() {}
TrieNode* get_node(unsigned index) const
{
assert(is_trie(index));
return reinterpret_cast<TrieNode*>(
static_cast<char*>(_buckets[index])-1);
}
BucketT* get_bucket(unsigned index)
{
if (index < _buckets.size()) {
BucketT* b = static_cast<BucketT*>(_buckets[index]);
if (not b) { _buckets[index] = b = new BucketT; }
return b;
} else {
_buckets.resize(index+1);
BucketT* b = new BucketT;
_buckets[index] = b;
return b;
}
}
bool is_trie(unsigned index) const
{
BOOST_STATIC_ASSERT(sizeof(size_t)==sizeof(void*));
if (index < _buckets.size()) {return size_t(_buckets[index])&1;}
return false;
}
void extend(unsigned size)
{
if (size > _buckets.size()) { _buckets.resize(size); }
}
};
static inline void make_trie(void*& ptr)
{
BOOST_STATIC_ASSERT(sizeof(size_t)==sizeof(void*));
assert(ptr);
ptr = (void*)(size_t(ptr) | 1);
}
template <typename CharT>
struct BurstSimple
{
template <typename BucketT>
TrieNode<CharT, BucketT>*
operator()(const BucketT& bucket, size_t depth) const
{
TrieNode<CharT, BucketT>* new_node = new TrieNode<CharT, BucketT>;
const unsigned bucket_size = bucket.size();
unsigned i=0;
for (; i < bucket_size-bucket_size%64; i+=64) {
boost::array<CharT, 64> cache;
boost::array<unsigned char*, 64> strings;
for (unsigned j=0; j < 64; ++j) {
strings[j] = bucket[i+j];
cache[j] = get_char<CharT>(strings[j], depth);
}
for (unsigned j=0; j < 64; ++j) {
const CharT ch = cache[j];
BucketT* sub_bucket = new_node->get_bucket(ch);
assert(sub_bucket);
sub_bucket->push_back(strings[j]);
}
}
for (; i < bucket_size; ++i) {
unsigned char* ptr = bucket[i];
const CharT ch = get_char<CharT>(ptr, depth);
BucketT* sub_bucket = new_node->get_bucket(ch);
assert(sub_bucket);
sub_bucket->push_back(ptr);
}
return new_node;
}
};
template <typename CharT>
struct BurstRecursive
{
template <typename BucketT>
TrieNode<CharT, BucketT>*
operator()(const BucketT& bucket, size_t depth) const
{
TrieNode<CharT, BucketT>* new_node
= BurstSimple<CharT>()(bucket, depth);
const size_t threshold = std::max(size_t(100), bucket.size()/2);
for (unsigned i=0; i < new_node->_buckets.size(); ++i) {
BucketT* sub_bucket = static_cast<BucketT*>(
new_node->_buckets[i]);
if (not sub_bucket) continue;
if (not is_end(i) and sub_bucket->size() > threshold) {
new_node->_buckets[i] =
BurstRecursive<CharT>()(*sub_bucket,
depth+sizeof(CharT));
delete sub_bucket;
make_trie(new_node->_buckets[i]);
}
}
return new_node;
}
};
template <typename CharT, typename BucketT>
static TrieNode<CharT, BucketT>*
random_sample(unsigned char** strings, size_t n)
{
const size_t sample_size = n/8192;
debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n";
size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
for (size_t i=0; i < sample_size; ++i) {
unsigned char* str = strings[size_t(drand48()*n)];
size_t depth = 0;
TrieNode<CharT, BucketT>* node = root;
while (true) {
CharT c = get_char<CharT>(str, depth);
if (is_end(c)) break;
depth += sizeof(CharT);
node->extend(c+1);
if (not node->is_trie(c)) {
node->_buckets[c] = new TrieNode<CharT, BucketT>;
make_trie(node->_buckets[c]);
if (--max_nodes==0) goto finish;
}
node = node->get_node(c);
assert(node);
}
}
finish:
return root;
}
template <typename CharT, typename BucketT>
static TrieNode<CharT, BucketT>*
pseudo_sample(unsigned char** strings, size_t n)
{
debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n";
size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
for (size_t i=0; i < n; i += 8192) {
unsigned char* str = strings[i];
size_t depth = 0;
TrieNode<CharT, BucketT>* node = root;
while (true) {
CharT c = get_char<CharT>(str, depth);
if (is_end(c)) break;
depth += sizeof(CharT);
node->extend(c+1);
if (not node->is_trie(c)) {
node->_buckets[c] = new TrieNode<CharT, BucketT>;
make_trie(node->_buckets[c]);
if (--max_nodes==0) goto finish;
}
node = node->get_node(c);
assert(node);
}
}
finish:
return root;
}
template <unsigned Threshold, typename BucketT,
typename BurstImpl, typename CharT>
static inline void
insert(TrieNode<CharT, BucketT>* root, unsigned char** strings, size_t n)
{
for (size_t i=0; i < n; ++i) {
unsigned char* str = strings[i];
size_t depth = 0;
CharT c = get_char<CharT>(str, 0);
TrieNode<CharT, BucketT>* node = root;
while (node->is_trie(c)) {
assert(not is_end(c));
node = node->get_node(c);
depth += sizeof(CharT);
c = get_char<CharT>(str, depth);
}
BucketT* bucket = node->get_bucket(c);
assert(bucket);
bucket->push_back(str);
if (is_end(c)) continue;
if (bucket->size() > Threshold) {
node->_buckets[c] = BurstImpl()(*bucket,
depth+sizeof(CharT));
make_trie(node->_buckets[c]);
delete bucket;
}
}
}
static inline void
copy(const std::vector<unsigned char*>& bucket, unsigned char** dst)
{
std::copy(bucket.begin(), bucket.end(), dst);
}
template <typename SmallSortT, typename BucketT, typename CharT>
static unsigned char**
traverse(TrieNode<CharT, BucketT>* node,
unsigned char** dst,
size_t depth,
SmallSortT small_sort)
{
for (unsigned i=0; i < node->_buckets.size(); ++i) {
if (node->is_trie(i)) {
dst = traverse(node->get_node(i),
dst, depth+sizeof(CharT), small_sort);
} else {
BucketT* bucket =
static_cast<BucketT*>(node->_buckets[i]);
if (not bucket) continue;
size_t bsize = bucket->size();
copy(*bucket, dst);
if (not is_end(i)) small_sort(dst, bsize, depth);
dst += bsize;
delete bucket;
}
}
delete node;
return dst;
}
#define SmallSort bs_mkqs::ssort2
void burstsort2_vector(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef std::vector<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<8192, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_brodnik(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef vector_brodnik<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_bagwell(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef vector_bagwell<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_vector_block(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef vector_block<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_superalphabet_vector(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef std::vector<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_superalphabet_brodnik(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef vector_brodnik<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_superalphabet_bagwell(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef vector_bagwell<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_superalphabet_vector_block(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef vector_block<unsigned char*, 128> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_vector(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef std::vector<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<8192, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_brodnik(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef vector_brodnik<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_bagwell(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef vector_bagwell<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_vector_block(unsigned char** strings, size_t n)
{
typedef unsigned char CharT;
typedef vector_block<unsigned char*, 128> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_superalphabet_vector(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef std::vector<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<16384, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_superalphabet_brodnik(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef vector_brodnik<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_superalphabet_bagwell(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef vector_bagwell<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
void burstsort2_sampling_superalphabet_vector_block(unsigned char** strings, size_t n)
{
typedef uint16_t CharT;
typedef vector_block<unsigned char*, 128> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TrieNode<CharT, BucketT>* root = pseudo_sample<CharT, BucketT>(strings, n);
insert<32768, BucketT, BurstImpl>(root, strings, n);
traverse(root, strings, 0, SmallSort);
}
CONTESTANT_REGISTER(burstsort2_vector,
"rantala/burstsort2_vector",
"burstsort2 with std::vector bucket type")
CONTESTANT_REGISTER(burstsort2_brodnik,
"rantala/burstsort2_brodnik",
"burstsort2 with vector_brodnik bucket type")
CONTESTANT_REGISTER(burstsort2_bagwell,
"rantala/burstsort2_bagwell",
"burstsort2 with vector_bagwell bucket type")
CONTESTANT_REGISTER(burstsort2_vector_block,
"rantala/burstsort2_vector_block",
"burstsort2 with vector_block bucket type")
CONTESTANT_REGISTER(burstsort2_superalphabet_vector,
"rantala/burstsort2_superalphabet_vector",
"burstsort2 superalphabet with std::vector bucket type")
CONTESTANT_REGISTER(burstsort2_superalphabet_brodnik,
"rantala/burstsort2_superalphabet_brodnik",
"burstsort2 superalphabet with vector_brodnik bucket type")
CONTESTANT_REGISTER(burstsort2_superalphabet_bagwell,
"rantala/burstsort2_superalphabet_bagwell",
"burstsort2 superalphabet with vector_bagwell bucket type")
CONTESTANT_REGISTER(burstsort2_superalphabet_vector_block,
"rantala/burstsort2_superalphabet_vector_block",
"burstsort2 superalphabet with vector_block bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_vector,
"rantala/burstsort2_sampling_vector",
"burstsort2 sampling with std::vector bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_brodnik,
"rantala/burstsort2_sampling_brodnik",
"burstsort2 sampling with vector_brodnik bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_bagwell,
"rantala/burstsort2_sampling_bagwell",
"burstsort2 sampling with vector_bagwell bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_vector_block,
"rantala/burstsort2_sampling_vector_block",
"burstsort2 sampling with vector_block bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_superalphabet_vector,
"rantala/burstsort2_sampling_superalphabet_vector",
"burstsort2 sampling superalphabet with std::vector bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_superalphabet_brodnik,
"rantala/burstsort2_sampling_superalphabet_brodnik",
"burstsort2 sampling superalphabet with vector_brodnik bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_superalphabet_bagwell,
"rantala/burstsort2_sampling_superalphabet_bagwell",
"burstsort2 sampling superalphabet with vector_bagwell bucket type")
CONTESTANT_REGISTER(burstsort2_sampling_superalphabet_vector_block,
"rantala/burstsort2_sampling_superalphabet_vector_block",
"burstsort2 sampling superalphabet with vector_block bucket type")
#undef SmallSort
}