<tb@panthema.net>
<http://www.gnu.org/licenses/>
namespace skew {
using namespace tuples;
using namespace stxxl;
template <typename offset_type>
class skew
{
public:
static const bool debug = true;
static const bool debug_input = false;
static const bool debug_lexname = false;
static const bool debug_isa = false;
static const bool debug_merge = false;
static const bool debug_mcreate = false;
static const bool debug_S012 = false;
static const bool debug_merge_result = false;
static const bool debug_merge_lcp = false;
static const bool debug_lcp_expand = false;
static const bool debug_lcp_build = false;
static const bool debug_rmq = false;
typedef tuples::pair<offset_type, offset_type> skew_pair_type;
typedef tuples::triple<offset_type, offset_type, offset_type> skew_triple_type;
typedef tuples::quad<offset_type, offset_type, offset_type, offset_type> skew_quad_type;
typedef tuples::quint<offset_type, offset_type, offset_type, offset_type, offset_type> skew_quint_type;
static const size_t vector_block_size = ::block_size;
typedef typename stxxl::VECTOR_GENERATOR<offset_type, 8, 2, vector_block_size>::result offset_array_type;
typedef stream::vector_iterator2stream <typename offset_array_type::iterator> offset_array_it_rg;
#if LCP_CALC
typedef typename stxxl::VECTOR_GENERATOR<skew_pair_type, 8, 2, vector_block_size>::result skew_pair_array_type;
typedef stream::vector_iterator2stream <typename skew_pair_array_type::iterator> skew_pair_array_it_rg;
typedef typename stxxl::VECTOR_GENERATOR<uint8_t, 8, 2, vector_block_size>::result uint8_array_type;
typedef stream::vector_iterator2stream <typename uint8_array_type::iterator> uint8_array_it_rg;
#endif
struct less_mod0
{
typedef skew_quint_type value_type;
bool operator() (const value_type& a, const value_type& b) const
{
if (a.second == b.second) {
return a.fourth < b.fourth;
}
else {
return a.second < b.second;
}
}
static value_type min_value() { return value_type::min_value(); }
static value_type max_value() { return value_type::max_value(); }
};
typedef tuples::pair_less1st<skew_pair_type> less_pair_1st;
typedef tuples::pair_less2nd<skew_pair_type> less_pair_2nd;
typedef tuples::triple_less1st<skew_triple_type> less_triple_1st;
struct less_quad_2nd
{
typedef skew_quad_type value_type;
bool operator() (const skew_quad_type& a, const skew_quad_type& b) const
{
return a.second < b.second;
}
static value_type min_value() { return value_type::min_value(); }
static value_type max_value() { return value_type::max_value(); }
};
struct less_quint_2nd
{
typedef skew_quint_type value_type;
bool operator() (const value_type& a, const value_type& b) const
{
return a.second < b.second;
}
static value_type min_value() { return value_type::min_value(); }
static value_type max_value() { return value_type::max_value(); }
};
struct less_skew
{
typedef skew_pair_type value_type;
bool operator() (const value_type& a, const value_type& b) const
{
if ((a.first & 1) == (b.first & 1)) {
return a.first < b.first;
}
else
{
return (a.first & 1) < (b.first & 1);
}
}
static value_type min_value() { return value_type::min_value(); }
static value_type max_value() { return value_type::max_value(); }
};
template <typename alphabet_type>
struct less_quad
{
typedef typename tuples::quad<offset_type, alphabet_type, alphabet_type, alphabet_type> value_type;
bool operator() (const value_type& a, const value_type& b) const
{
if (a.second == b.second) {
if (a.third == b.third) {
return a.fourth < b.fourth;
}
else {
return a.third < b.third;
}
}
else {
return a.second < b.second;
}
}
static value_type min_value() { return value_type::min_value(); }
static value_type max_value() { return value_type::max_value(); }
};
template <class quad_type>
static inline bool quad_eq(const quad_type& a, const quad_type& b)
{
return (a.second == b.second) && (a.third == b.third) && (a.fourth == b.fourth);
}
template <class quad_type>
static inline uint8_t quad_lcp(const quad_type& a, const quad_type& b)
{
if (a.second != b.second) return 0;
if (a.third != b.third) return 1;
if (a.fourth != b.fourth) return 2;
return 3;
}
template <class Input>
class naming
{
public:
typedef typename Input::value_type quad_type;
typedef skew_pair_type value_type;
private:
Input& A;
#if LCP_CALC
uint8_array_type& lcp_names;
#endif
bool & unique;
offset_type lexname;
quad_type prev;
skew_pair_type result;
public:
naming(Input& A_,
#if LCP_CALC
uint8_array_type& lcp_names_,
#endif
bool & unique_)
: A(A_),
#if LCP_CALC
lcp_names(lcp_names_),
#endif
unique(unique_),
lexname(0)
{
assert(!A.empty());
unique = true;
prev = *A;
result.first = prev.first;
result.second = lexname;
DBG(debug_lexname, "lexname[" << lexname << "]: " << prev << " and sentinel lcp 0");
#if LCP_CALC
lcp_names.push_back(0);
#endif
}
const value_type& operator*() const
{
return result;
}
naming& operator++()
{
assert(!A.empty());
++A;
if (A.empty()) return *this;
quad_type curr = *A;
#if !LCP_CALC
if (!quad_eq(prev, curr)) {
++lexname;
}
#else
uint8_t lcp;
if ( (lcp = quad_lcp(prev, curr)) != 3 ) {
++lexname;
}
#endif
else {
if (!A.empty() && curr.second != offset_type(0)) {
unique = false;
}
}
#if !LCP_CALC
DBG(debug_lexname, "lexname[" << lexname << "]: " << curr << " and prev " << prev);
#else
DBG(debug_lexname, "lexname[" << lexname << "]: " << curr << " and prev " << prev << " have lcp " << int(lcp));
lcp_names.push_back(lcp);
#endif
result.first = curr.first;
result.second = lexname;
prev = curr;
return *this;
}
bool empty() const
{
return A.empty();
}
};
template<class InputA, class InputB, const size_t add_alphabet = 0>
class make_pairs
{
public:
typedef skew_pair_type value_type;
private:
InputA& A;
InputB& B;
value_type result;
public:
make_pairs(InputA& a, InputB& b)
: A(a), B(b)
{
assert(!A.empty()); assert(!B.empty());
if (!empty()) {
result = value_type(*A, *B + add_alphabet);
}
}
const value_type & operator*() const
{
return result;
}
make_pairs & operator++()
{
assert(!A.empty()); assert(!B.empty());
++A; ++B;
if (!A.empty() && !B.empty()) {
result = value_type(*A, *B + add_alphabet);
}
return *this;
}
bool empty() const
{
return (A.empty() || B.empty());
}
};
template <class Input, typename alphabet_type, const size_t add_alphabet = 0>
class make_quads
{
public:
typedef tuples::quad<offset_type, alphabet_type, alphabet_type, alphabet_type> value_type;
private:
Input & A;
value_type current;
offset_type counter;
unsigned int z3z;
bool finished;
offset_array_type & backup;
public:
make_quads(Input & data_in_, offset_array_type & backup_)
: A(data_in_),
current(0,0,0,0),
counter(0),
z3z(0),
finished(false),
backup(backup_)
{
assert(!A.empty());
current.first = counter;
current.second = (*A).second + add_alphabet;
++A;
if (!A.empty()) {
current.third = (*A).second + add_alphabet;
++A;
}
else {
current.third = 0;
current.fourth = 0;
}
if (!A.empty()) {
current.fourth = (*A).second + add_alphabet;
}
else {
current.fourth = 0;
}
}
const value_type & operator*() const
{
return current;
}
make_quads & operator++()
{
assert(!A.empty() || !finished);
if (current.second != offset_type(0)) {
DBG(debug_input, "Input[" << counter << "] = " << current.second - add_alphabet);
backup.push_back(current.second);
}
if (++z3z == 3) z3z = 0;
current.first = ++counter;
current.second = current.third;
current.third = current.fourth;
if (!A.empty())
++A;
if (!A.empty()) {
current.fourth = (*A).second + add_alphabet;
}
else {
current.fourth = 0;
}
if ((current.second == offset_type(0)) && (z3z != 1)) {
finished = true;
}
return *this;
}
bool empty() const
{
return (A.empty() && finished);
}
};
template <class Input>
class extract_mod12
{
public:
typedef typename Input::value_type value_type;
private:
Input & A;
offset_type counter;
offset_type output_counter;
value_type result;
public:
extract_mod12(Input & A_)
: A(A_),
counter(0),
output_counter(0)
{
assert(!A.empty());
++A, ++counter;
if (!A.empty()) {
result = *A;
result.set_1(output_counter);
}
}
const value_type & operator*() const
{
return result;
}
extract_mod12 & operator++()
{
assert(!A.empty());
++A, ++counter, ++output_counter;
if (!A.empty() && (counter % 3) == 0) {
++A, ++counter;
}
if (!A.empty()) {
result = *A;
result.set_1(output_counter);
}
return *this;
}
bool empty() const
{
return A.empty();
}
};
#if LCP_CALC
class RMQ
{
private:
static const size_t memsize_compute = ram_use / 5;
static const size_t slabsize = memsize_compute / 4 / sizeof(offset_type);
typedef tuples::pair<offset_type,offset_type> RMQLeft;
typedef tuples::triple<offset_type,offset_type,offset_type> RMQRight;
typedef tuples::pair<offset_type,offset_type> RMQAnswer;
template <typename RMQTuple>
struct RMQTupleOrder
{
typedef RMQTuple value_type;
inline bool operator()(const value_type& a, const value_type& b) const
{
return (a.first / slabsize) < (b.first / slabsize);
}
static value_type min_value() { return value_type::min_value(); }
static value_type max_value() { return value_type::max_value(); }
};
typedef stxxl::sorter<RMQLeft, RMQTupleOrder<RMQLeft>, block_size> st_RMQLeft_type;
typedef stxxl::sorter<RMQRight, RMQTupleOrder<RMQRight>, block_size> st_RMQRight_type;
typedef stxxl::sorter<RMQAnswer, tuples::pair_less1st<RMQAnswer>, block_size> st_RMQAnswer_type;
private:
st_RMQLeft_type st_RMQLeft;
st_RMQRight_type st_RMQRight;
std::auto_ptr<st_RMQAnswer_type> st_RMQAnswer;
RMQAnswer current;
bool computed, done;
public:
RMQ(size_t memsize_input)
: st_RMQLeft(RMQTupleOrder<RMQLeft>(), memsize_input / 2),
st_RMQRight(RMQTupleOrder<RMQRight>(), memsize_input / 2),
computed(false)
{
}
void query(const offset_type& left, const offset_type& right, const offset_type& target)
{
if ( (left / slabsize) == (right / slabsize) )
{
st_RMQRight.push( RMQRight(right,left,target) );
}
else
{
st_RMQLeft.push( RMQLeft(left,target) );
st_RMQRight.push( RMQRight(right,left,target) );
}
}
template <typename DataStream>
void compute(DataStream& stream)
{
assert(!computed); computed = true;
st_RMQLeft.sort( memsize_compute / 4 );
st_RMQRight.sort( memsize_compute / 4 );
st_RMQAnswer = std::auto_ptr<st_RMQAnswer_type>
( new st_RMQAnswer_type(tuples::pair_less1st<RMQAnswer>(), memsize_compute / 4) );
size_t start = 0;
offset_type* buffer = new offset_type[slabsize];
RMQ_Stack<offset_type,offset_type> spanrmq;
for (size_t thisslab = 0; !stream.empty(); ++thisslab)
{
size_t thissize = 0;
assert( start / slabsize == thisslab );
while ( thissize < slabsize && !stream.empty() )
{
buffer[thissize++] = *stream;
++stream;
}
RMQ_succinct<offset_type, uint32_t> slabrmq(buffer, thissize);
while ( !st_RMQLeft.empty() && (st_RMQLeft->first / slabsize) == thisslab )
{
DBG(debug_rmq,"RMQ left in slab: (" << st_RMQLeft->first << ",inf) target " << st_RMQLeft->second);
offset_type result = slabrmq.query( st_RMQLeft->first - start, thissize-1 );
assert( result < offset_type(thissize) );
result = buffer[result];
DBG(debug_rmq,"RMQ result " << result);
st_RMQAnswer->push( RMQAnswer(st_RMQLeft->second, result) );
++st_RMQLeft;
}
while ( !st_RMQRight.empty() && (st_RMQRight->first / slabsize) == thisslab )
{
if ( (st_RMQRight->second / slabsize) == thisslab )
{
DBG(debug_rmq,"RMQ pair in slab: (" << st_RMQRight->second << "," << st_RMQRight->first << ") target " << st_RMQRight->third);
offset_type result;
if ( st_RMQRight->first < offset_type(start + thissize) ) {
result = slabrmq.query( st_RMQRight->second - start, st_RMQRight->first - start );
assert( result < offset_type(thissize) );
result = buffer[result];
}
else {
result = 0;
}
DBG(debug_rmq,"RMQ result: " << result);
st_RMQAnswer->push( RMQAnswer(st_RMQRight->third, result) );
++st_RMQRight;
}
else
{
DBG(debug_rmq,"RMQ right in slab: (-inf," << st_RMQRight->first << ") target " << st_RMQRight->third);
offset_type result;
if ( st_RMQRight->first < offset_type(start + thissize) ) {
result = slabrmq.query( 0, st_RMQRight->first - start );
assert( result < offset_type(thissize) );
result = buffer[result];
}
else {
result = 0;
}
assert( (st_RMQRight->second / slabsize) < thisslab );
if ( (st_RMQRight->second / slabsize) < thisslab-1 ) {
const offset_type& min2 = spanrmq.query( (st_RMQRight->second / slabsize)+1, thisslab-1 );
result = std::min(result, min2);
}
DBG(debug_rmq,"RMQ result: " << result);
st_RMQAnswer->push( RMQAnswer(st_RMQRight->third, result) );
++st_RMQRight;
}
}
{
offset_type gmin = slabrmq.query(0,thissize-1);
gmin = buffer[gmin];
DBG(debug_rmq,"Saving slab global minimum " << gmin);
spanrmq.set( thisslab, gmin );
}
start += thissize;
}
delete [] buffer;
st_RMQLeft.finish_clear();
st_RMQRight.finish_clear();
st_RMQAnswer->finish();
}
void start_output(size_t memsize_output)
{
st_RMQAnswer->sort(memsize_output);
done = false;
operator++();
}
typedef RMQAnswer value_type;
const value_type & operator*() const
{
return current;
}
const value_type* operator->() const
{
return ¤t;
}
RMQ& operator++()
{
if (st_RMQAnswer->empty()) {
done = true;
return *this;
}
current = *(*st_RMQAnswer);
++(*st_RMQAnswer);
if (!st_RMQAnswer->empty() && (*st_RMQAnswer)->first == current.first) {
current.second = std::min(current.second, (*st_RMQAnswer)->second);
++(*st_RMQAnswer);
}
return *this;
}
bool empty() const
{
return done;
}
};
#endif
template <class Mod0, class Mod1, class Mod2>
class merge_sa
{
public:
typedef offset_type value_type;
private:
Mod0 & A;
Mod1 & B;
Mod2 & C;
skew_quint_type s0;
skew_quad_type s1;
skew_quint_type s2;
int selected;
bool exists[3];
offset_type index;
offset_type merge_result;
#if LCP_CALC
int prev_selected;
skew_quint_type p0;
skew_quad_type p1;
skew_quint_type p2;
uint8_array_type& lcp_base;
RMQ& lcp_rmq;
skew_pair_array_type& lcp_rmq_ranks;
#endif
<index, character, character+1, ~SA12+1, ~SA12+2>
<index, ~SA12 name, character, ~SA12+1 name>
<index ,~SA12 name, character, character+1, ~SA12+2 name>
bool cmp_mod12()
{
return s1.get_2() < s2.get_2();
}
bool cmp_mod02()
{
if (s0.get_2() == s2.get_3()) {
if (s0.get_3() == s2.get_4()) {
return s0.get_5() < s2.get_5();
}
else {
return s0.get_3() < s2.get_4();
}
}
else {
return s0.get_2() < s2.get_3();
}
}
bool cmp_mod01()
{
if (s0.get_2() == s1.get_3()) {
return s0.get_4() < s1.get_4();
}
else {
return s0.get_2() < s1.get_3();
}
}
void get012()
{
DBG1(debug_merge, "0" << s0 << " 1" << s1 << " 2" << s2 << " -> ");
if (cmp_mod01()) {
if (cmp_mod02()) {
selected = 0;
merge_result = s0.get_1();
}
else {
selected = 2;
merge_result = s2.get_1();
}
}
else {
if (cmp_mod12()) {
selected = 1;
merge_result = s1.get_1();
}
else {
selected = 2;
merge_result = s2.get_1();
}
}
DBG3(debug_merge, "selected " << selected << " - index " << merge_result);
}
void get01()
{
DBG1(debug_merge, "0" << s0 << " 1" << s1 << " -> ");
if (cmp_mod01()) {
selected = 0;
merge_result = s0.get_1();
}
else {
selected = 1;
merge_result = s1.get_1();
}
DBG3(debug_merge, "selected " << selected << " - index " << merge_result);
}
void get12()
{
DBG1(debug_merge, "1" << s1 << " 2" << s2 << " -> ");
if (cmp_mod12()) {
selected = 1;
merge_result = s1.get_1();
}
else {
selected = 2;
merge_result = s2.get_1();
}
DBG3(debug_merge, "selected " << selected << " - index " << merge_result);
}
void get02()
{
DBG1(debug_merge, "0" << s0 << " 2" << s2 << " -> ");
if (cmp_mod02()) {
selected = 0;
merge_result = s0.get_1();
}
else {
selected = 2;
merge_result = s2.get_1();
}
DBG3(debug_merge, "selected " << selected << " - index " << merge_result);
}
void solve()
{
#if LCP_CALC
prev_selected = selected;
#endif
if (exists[0]) {
if (exists[1]) {
if (exists[2]) {
get012();
}
else {
get01();
}
}
else {
if (exists[2]) {
get02();
}
else {
selected = 0;
merge_result = s0.get_1();
DBG(debug_merge, "get0(): 0" << s0 << " -> selected " << selected << " - index " << merge_result);
}
}
}
else {
if (exists[1]) {
if (exists[2]) {
get12();
}
else {
selected = 1;
merge_result = s1.get_1();
DBG(debug_merge, "get1(): 1" << s1 << " -> selected " << selected << " - index " << merge_result);
}
}
else {
if (exists[2]) {
selected = 2;
merge_result = s2.get_1();
DBG(debug_merge, "get2(): 2" << s2 << " -> selected " << selected << " - index " << merge_result);
}
else {
assert(false);
}
}
}
DBG(debug_merge_result, merge_result);
#if LCP_CALC
if (prev_selected < 0)
{
lcp_base.push_back(0);
}
else if (prev_selected == 0 && selected == 0)
{
calc_lcp_char2(p0.get_2(), s0.get_2(),
p0.get_3(), s0.get_3(),
p0.get_5(), s0.get_5());
}
else if (prev_selected == 0 && selected == 1)
{
calc_lcp_char1(p0.get_2(), s1.get_3(),
p0.get_4(), s1.get_4());
}
else if (prev_selected == 0 && selected == 2)
{
calc_lcp_char2(p0.get_2(), s2.get_3(),
p0.get_3(), s2.get_4(),
p0.get_5(), s2.get_5());
}
else if (prev_selected == 1 && selected == 0)
{
calc_lcp_char1(p1.get_3(), s0.get_2(),
p1.get_4(), s0.get_4());
}
else if (prev_selected == 1 && selected == 1)
{
calc_lcp_char0(p1.get_2(), s1.get_2());
}
else if (prev_selected == 1 && selected == 2)
{
calc_lcp_char0(p1.get_2(), s2.get_2());
}
else if (prev_selected == 2 && selected == 0)
{
calc_lcp_char2(p2.get_3(), s0.get_2(),
p2.get_4(), s0.get_3(),
p2.get_5(), s0.get_5());
}
else if (prev_selected == 2 && selected == 1)
{
calc_lcp_char0(p2.get_2(), s1.get_2());
}
else if (prev_selected == 2 && selected == 2)
{
calc_lcp_char0(p2.get_2(), s2.get_2());
}
else {
assert(!"Impossible case");
}
if (selected == 0)
p0 = s0;
else if (selected == 1)
p1 = s1;
else if (selected == 2)
p2 = s2;
else
assert(!"Impossible case");
#endif
}
#if LCP_CALC
void save_rmq(const offset_type& rankA, const offset_type& rankB)
{
if (rankA == offset_type(0)) return;
assert(rankA > offset_type(0)); assert(rankB > offset_type(0));
lcp_rmq.query(rankA-1+1, rankB-1, index);
lcp_rmq_ranks.push_back( skew_pair_type(rankA-1, rankB-1) );
}
void calc_lcp_char0(const offset_type& rankA, const offset_type& rankB)
{
DBG(debug_merge_lcp, "lcp[" << index << "] = 0 + rmq_lcp(" << rankA-1 << "+1," << rankB-1 << ")");
assert(rankB > offset_type(0));
lcp_base.push_back(0);
save_rmq(rankA, rankB);
}
void calc_lcp_char1(const offset_type& chA0, const offset_type& chB0,
const offset_type& rankA, const offset_type& rankB)
{
if (chA0 != chB0) {
DBG(debug_merge_lcp, "lcp[" << index << "] = 0");
lcp_base.push_back(0);
}
else {
DBG(debug_merge_lcp, "lcp[" << index << "] = 1 + rmq_lcp(" << rankA-1 << "+1," << rankB-1 << ")");
assert(rankB > offset_type(0));
lcp_base.push_back(1);
save_rmq(rankA, rankB);
}
}
void calc_lcp_char2(const offset_type& chA0, const offset_type& chB0,
const offset_type& chA1, const offset_type& chB1,
const offset_type& rankA, const offset_type& rankB)
{
if (chA0 != chB0) {
DBG(debug_merge_lcp, "lcp[" << index << "] = 0");
lcp_base.push_back(0);
}
else if (chA1 != chB1) {
DBG(debug_merge_lcp, "lcp[" << index << "] = 1");
lcp_base.push_back(1);
}
else {
DBG(debug_merge_lcp, "lcp[" << index << "] = 2 + rmq_lcp(" << rankA-1 << "+1," << rankB-1 << ")");
assert(rankB > offset_type(0));
lcp_base.push_back(2);
save_rmq(rankA, rankB);
}
}
#endif
public:
bool empty() const
{
return (A.empty() && B.empty() && C.empty());
}
merge_sa(Mod0 & x1, Mod1 & x2, Mod2 & x3
#if LCP_CALC
, uint8_array_type& lcp_base_, RMQ& lcp_rmq_, skew_pair_array_type& lcp_rmq_ranks_
#endif
)
: A(x1), B(x2), C(x3), selected(-1), index(0)
#if LCP_CALC
, lcp_base(lcp_base_), lcp_rmq(lcp_rmq_), lcp_rmq_ranks(lcp_rmq_ranks_)
#endif
{
assert(!A.empty());
assert(!B.empty());
assert(!C.empty());
exists[0] = true;
exists[1] = true;
exists[2] = true;
s0 = *A;
s1 = *B;
s2 = *C;
solve();
}
const value_type & operator* () const
{
return merge_result;
}
merge_sa & operator++ ()
{
if (selected == 0) {
assert(!A.empty());
++A;
if (!A.empty())
s0 = *A;
else
exists[0] = false;
}
else if (selected == 1) {
assert(!B.empty());
++B;
if (!B.empty())
s1 = *B;
else
exists[1] = false;
}
else {
assert(!C.empty());
assert(selected == 2);
++C;
if (!C.empty())
s2 = *C;
else
exists[2] = false;
}
++index;
if (!empty()) solve();
return *this;
}
};
static inline size_t subp_size(size_t n)
{
return (n / 3) * 2 + ((n % 3) == 2);
}
@param
@param
@param
@param
@param
template <class S, class Mod1, class Mod2,
const int block_size, const size_t memsize>
class build_sa
{
public:
typedef offset_type value_type;
static const size_t add_rank = 1;
private:
typedef typename stream::use_push < skew_quad_type > mod1_push_type;
typedef typename stream::runs_creator < mod1_push_type, less_quad_2nd, block_size, RC > mod1_runs_type;
typedef typename mod1_runs_type::sorted_runs_type sorted_mod1_runs_type;
typedef typename stream::runs_merger < sorted_mod1_runs_type, less_quad_2nd > mod1_rm_type;
typedef typename stream::use_push < skew_quint_type > mod2_push_type;
typedef typename stream::runs_creator < mod2_push_type, less_quint_2nd, block_size, RC > mod2_runs_type;
typedef typename mod2_runs_type::sorted_runs_type sorted_mod2_runs_type;
typedef typename stream::runs_merger < sorted_mod2_runs_type, less_quint_2nd > mod2_rm_type;
typedef typename stream::use_push < skew_quint_type > mod0_push_type;
typedef typename stream::runs_creator < mod0_push_type, less_mod0, block_size, RC > mod0_runs_type;
typedef typename mod0_runs_type::sorted_runs_type sorted_mod0_runs_type;
typedef typename stream::runs_merger < sorted_mod0_runs_type, less_mod0 > mod0_rm_type;
typedef merge_sa < mod0_rm_type, mod1_rm_type, mod2_rm_type > merge_sa_type;
less_mod0 c0;
less_quad_2nd c1;
less_quint_2nd c2;
mod1_rm_type *mod1_result;
mod2_rm_type *mod2_result;
mod0_rm_type *mod0_result;
merge_sa_type *vmerge_sa;
S & source;
Mod1 & mod_1;
Mod2 & mod_2;
offset_type t[3];
offset_type old_t2;
offset_type old_mod2;
bool exists[3];
offset_type mod_one;
offset_type mod_two;
size_t index;
bool ready;
value_type result;
public:
build_sa(S & source_, Mod1 & mod_1_, Mod2 & mod_2_, size_t a_size
#if LCP_CALC
, uint8_array_type& lcp_base, RMQ& lcp_rmq1, skew_pair_array_type& lcp_rmq1_ranks
#endif
)
: source(source_), mod_1(mod_1_), mod_2(mod_2_), index(0), ready(false)
{
assert(!source_.empty());
DBGMEM("creating mod0,1,2");
mod0_runs_type mod0_runs(c0, memsize / 3);
mod1_runs_type mod1_runs(c1, memsize / 3);
mod2_runs_type mod2_runs(c2, memsize / 3);
while (!source.empty())
{
exists[0] = false;
exists[1] = false;
exists[2] = false;
if (!source.empty()) {
t[0] = *source;
++source;
exists[0] = true;
}
if (!source.empty()) {
assert(!mod_1.empty());
t[1] = *source;
++source;
mod_one = *mod_1 + add_rank;
++mod_1;
exists[1] = true;
}
if (!source.empty()) {
assert(!mod_2.empty());
t[2] = *source;
++source;
mod_two = *mod_2 + add_rank;
++mod_2;
exists[2] = true;
}
assert(t[0] != offset_type(0));
assert(t[1] != offset_type(0));
assert(t[2] != offset_type(0));
if (exists[2]) {
DBG(debug_mcreate, "nomiss");
DBG(debug_mcreate, "Mod0 : (" << index << "|" << t[0] << "," << t[1] << " - " << mod_one << "," << mod_two << ")");
DBG(debug_mcreate, "Mod1 : (" << (index + 1) << "|" << t[1] << " - " << mod_one << "," << mod_two << ")");
mod0_runs.push(skew_quint_type(index, t[0], t[1], mod_one, mod_two));
mod1_runs.push(skew_quad_type(index + 1, mod_one, t[1], mod_two));
if (index != offset_type(0)) {
DBG(debug_mcreate, "Mod2 : (" << (index - 1) << "|" << old_t2 << "," << t[0] << " - " << old_mod2 << "," << mod_one << ")");
mod2_runs.push(skew_quint_type((index - 1), old_mod2, old_t2, t[0], mod_one));
}
}
else if (exists[1]) {
DBG(debug_mcreate, "nolast");
DBG(debug_mcreate, "Mod0 : (" << index << "|" << t[0] << "," << t[1] << " - " << mod_one << "," << 0 << ")");
DBG(debug_mcreate, "Mod1 : (" << (index + 1) << "|" << t[1] << " - " << mod_one << "," << 0 << ")");
mod0_runs.push(skew_quint_type(index, t[0], t[1], mod_one, 0));
mod1_runs.push(skew_quad_type(index + 1, mod_one, t[1], 0));
if (index != offset_type(0)) {
DBG(debug_mcreate, "Mod2 : (" << (index - 1) << "|" << old_t2 << "," << t[0] << " - " << old_mod2 << "," << mod_one << ")");
mod2_runs.push(skew_quint_type((index - 1), old_mod2, old_t2, t[0], mod_one));
}
}
else {
assert(exists[0]);
DBG(debug_mcreate, "twomiss");
DBG(debug_mcreate, "Mod0 : (" << index << "|" << t[0] << "," << 0 << " - " << 0 << "," << 0 << ")");
mod0_runs.push(skew_quint_type(index, t[0], 0, 0, 0));
if (index != offset_type(0)) {
DBG(debug_mcreate, "Mod2 : (" << (index - 1) << "|" << old_t2 << "," << t[0] << " - " << old_mod2 << "," << 0 << ")");
mod2_runs.push(skew_quint_type((index - 1), old_mod2, old_t2, t[0], 0));
}
}
old_mod2 = mod_two;
old_t2 = t[2];
index += 3;
}
if ((a_size % 3) == 0) {
if (index != offset_type(0)) {
DBG(debug_mcreate, "Mod2 : (" << (index - 1) << "|" << old_t2 << "," << 0 << " - " << old_mod2 << "," << 0 << ")");
mod2_runs.push(skew_quint_type((index - 1), old_mod2, old_t2, 0, 0));
}
}
if (debug_S012)
{
offset_type i = 0;
for(mod0_rm_type mod0_result(mod0_runs.result(), less_mod0(), memsize / 3);
!mod0_result.empty(); ++mod0_result, ++i)
DBG(debug_S012, "S0[" << i << "]: " << *mod0_result);
i = 0;
for(mod1_rm_type mod1_result(mod1_runs.result(), less_quad_2nd(), memsize / 3);
!mod1_result.empty(); ++mod1_result, ++i)
DBG(debug_S012, "S1[" << i << "]: " << *mod1_result);
i = 0;
for(mod2_rm_type mod2_result(mod2_runs.result(), less_quint_2nd(), memsize / 3);
!mod2_result.empty(); ++mod2_result, ++i)
DBG(debug_S012, "S2[" << i << "]: " << *mod2_result);
}
mod0_runs.deallocate();
mod1_runs.deallocate();
mod2_runs.deallocate();
mod0_result = new mod0_rm_type(mod0_runs.result(), less_mod0(), memsize / 3);
mod1_result = new mod1_rm_type(mod1_runs.result(), less_quad_2nd(), memsize / 3);
mod2_result = new mod2_rm_type(mod2_runs.result(), less_quint_2nd(), memsize / 3);
#if !LCP_CALC
vmerge_sa = new merge_sa_type(*mod0_result, *mod1_result, *mod2_result);
#else
vmerge_sa = new merge_sa_type(*mod0_result, *mod1_result, *mod2_result, lcp_base, lcp_rmq1, lcp_rmq1_ranks);
#endif
result = *(*vmerge_sa);
}
const value_type & operator*() const
{
return result;
}
build_sa & operator++()
{
assert(vmerge_sa != 0 && !vmerge_sa->empty());
++(*vmerge_sa);
if (!vmerge_sa->empty()) {
result = *(*vmerge_sa);
}
else {
assert(vmerge_sa->empty());
ready = true;
assert(vmerge_sa != NULL);
delete vmerge_sa; vmerge_sa = NULL;
assert(mod0_result != NULL && mod1_result != NULL && mod2_result != NULL);
delete mod0_result; mod0_result = NULL;
delete mod1_result; mod1_result = NULL;
delete mod2_result; mod2_result = NULL;
}
return *this;
}
bool empty() const
{
return ready;
}
};
#if LCP_CALC
class build_lcp
{
public:
typedef offset_type value_type;
private:
std::auto_ptr<offset_array_type> lcp_base;
offset_array_it_rg lcp_baseiter;
std::auto_ptr<RMQ> lcp_rmq;
offset_type index;
value_type result;
void calc_next_lcp()
{
if (empty()) return;
result = *lcp_baseiter;
DBG(debug_lcp_build, "lcp[" << index << "] = " << result);
if (!lcp_rmq->empty())
{
if ((*lcp_rmq)->first == index)
{
result += (*lcp_rmq)->second;
DBG(debug_lcp_build, "lcp[" << index << "] += " << (*lcp_rmq)->second << " -> " << result);
++(*lcp_rmq);
}
assert((*lcp_rmq).empty() || (*lcp_rmq)->first > index);
}
}
public:
build_lcp(offset_array_type* _lcp_base, RMQ* _lcp_rmq, uint8_array_type& lcp_names)
: lcp_base(_lcp_base),
lcp_baseiter(lcp_base->begin(), lcp_base->end()),
lcp_rmq(_lcp_rmq),
index(0)
{
DBGMEM("computing RMQs on LCP_N");
uint8_array_it_rg lcp_names_stream(lcp_names.begin(), lcp_names.end());
lcp_rmq->compute(lcp_names_stream);
DBGMEM("done computing RMQs on LCP_N");
}
void start_output(size_t memsize)
{
lcp_rmq->start_output(memsize);
calc_next_lcp();
}
const value_type & operator*() const
{
return result;
}
build_lcp& operator++ ()
{
++lcp_baseiter, ++index;
calc_next_lcp();
return *this;
}
bool empty() const
{
return lcp_baseiter.empty();
}
};
#endif
@param
@param
@param
template <class Input, const int block_size = ::block_size>
class algorithm
{
#if LCP_CALC
static const int memdivA = 5;
#else
static const int memdivA = 4;
#endif
public:
typedef offset_type value_type;
typedef typename Input::value_type alphabet_type;
private:
typedef tuples::counter <offset_type> counter_stream_type;
typedef make_pairs <counter_stream_type, Input> make_pairs_input_type;
typedef make_quads <make_pairs_input_type, offset_type, 1> make_quads_input_type;
typedef extract_mod12 <make_quads_input_type> mod12_quads_input_type;
typedef typename stream::sort<mod12_quads_input_type, less_quad<offset_type>, block_size> sort_mod12_input_type;
typedef naming <sort_mod12_input_type> naming_input_type;
typedef typename stream::runs_creator <naming_input_type, less_skew, block_size> rc_order_type;
typedef typename rc_order_type::sorted_runs_type sr_order_type;
typedef typename stream::runs_merger <sr_order_type, less_skew> rm_order_type;
typedef make_quads <rm_order_type, offset_type, 1> make_quads_loop_type;
typedef extract_mod12 <make_quads_loop_type> mod12_quads_loop_type;
typedef typename stream::sort <mod12_quads_loop_type, less_quad<offset_type>, block_size> sort_loop_type;
typedef naming <sort_loop_type> naming_loop_type;
typedef typename stream::runs_creator <naming_loop_type, less_skew, block_size> rc_loop_type;
typedef build_sa <offset_array_it_rg, offset_array_it_rg, offset_array_it_rg, block_size, ram_use * 3/memdivA> build_sa_type;
typedef make_pairs < counter_stream_type, build_sa_type > prepare_isa_type;
typedef typename stream::use_push < skew_pair_type > isa1_push_type;
typedef typename stream::runs_creator < isa1_push_type, less_pair_2nd, block_size, RC > isa1_rc_type;
typedef typename isa1_rc_type::sorted_runs_type isa1_sr_type;
typedef typename stream::runs_merger < isa1_sr_type, less_pair_2nd > isa1_rm_type;
typedef typename stream::use_push < skew_pair_type > isa2_push_type;
typedef typename stream::runs_creator < isa2_push_type, less_pair_2nd, block_size, RC > isa2_rc_type;
typedef typename isa2_rc_type::sorted_runs_type isa2_sr_type;
typedef typename stream::runs_merger < isa2_sr_type, less_pair_2nd > isa2_rm_type;
typedef stream::choose <isa1_rm_type,1> isa1_first_type;
typedef stream::choose <isa2_rm_type,1> isa2_first_type;
typedef build_sa < offset_array_it_rg, isa1_first_type, isa2_first_type, block_size, ram_use * 3/memdivA> build_sa_loop_type;
typedef make_pairs < counter_stream_type, build_sa_loop_type > prepare_isa_loop_type;
typedef typename stream::use_push < skew_pair_type > isa_loop_push_type;
typedef typename stream::runs_creator < isa_loop_push_type, less_pair_2nd, block_size, RC > isa_loop_rc_type;
#if !LCP_CALC
build_sa_loop_type *out_sa;
#else
offset_array_it_rg* out_sa;
offset_array_type* out_sa_array;
build_lcp* out_lcp;
#endif
bool finished;
template <class Stream>
static inline void print(Stream& s)
{
while (!s.empty())
{
std::cout << *s << "\n";
++s;
}
}
protected:
#if LCP_CALC
static build_lcp* expand_build_lcp(build_lcp* prev_lcp, offset_array_type* prev_sa, isa1_sr_type &isa1_sr, isa2_sr_type& isa2_sr,
uint8_array_type& lcp_names,
uint8_array_type* lcp_base8, RMQ* lcp_rmq1_ptr, skew_pair_array_type* lcp_rmq1_ranks )
{
DBGMEM("computing RMQs on LCP_R");
RMQ& lcp_rmq1 = *lcp_rmq1_ptr;
prev_lcp->start_output(ram_use / 2);
lcp_rmq1.compute(*prev_lcp);
delete prev_lcp;
skew_pair_array_it_rg rmq1_ranks(lcp_rmq1_ranks->begin(), lcp_rmq1_ranks->end());
offset_array_type* lcp_next = new offset_array_type;
lcp_rmq1.start_output(ram_use / 3);
RMQ* lcp_rmq2 = new RMQ(ram_use / 3);
stxxl::sorter<skew_triple_type,less_triple_1st,block_size> SAqueries (less_triple_1st(), ram_use / 3);
{
uint8_array_it_rg lcp_base8_stream(lcp_base8->begin(), lcp_base8->end());
for (offset_type index = 0; !lcp_base8_stream.empty(); ++index, ++lcp_base8_stream)
{
offset_type lcp = *lcp_base8_stream;
if ( !lcp_rmq1.empty() && lcp_rmq1->first == index )
{
assert( !rmq1_ranks.empty() );
if (lcp_rmq1->second == offset_type(0))
{
DBG(debug_lcp_expand, "next_lcp[" << index << "] = " << lcp << " + direct RMQ on ranks " << *rmq1_ranks);
assert(rmq1_ranks->first <= rmq1_ranks->second);
lcp_rmq2->query( rmq1_ranks->first+1, rmq1_ranks->second, index );
}
else
{
lcp += 3 * lcp_rmq1->second;
DBG(debug_lcp_expand, "next_lcp[" << index << "] = " << lcp << " + far RMQ on ISA[SA[" << *rmq1_ranks << "] + " << lcp_rmq1->second << "]");
SAqueries.push( skew_triple_type(rmq1_ranks->first, lcp_rmq1->second, index) );
SAqueries.push( skew_triple_type(rmq1_ranks->second, lcp_rmq1->second, index) );
}
++lcp_rmq1, ++rmq1_ranks;
}
else
{
DBG(debug_lcp_expand, "next_lcp[" << index << "] = " << lcp);
}
lcp_next->push_back(lcp);
}
delete lcp_base8;
delete lcp_rmq1_ptr;
delete lcp_rmq1_ranks;
}
SAqueries.sort(ram_use / 3);
if (SAqueries.size())
{
stxxl::sorter<skew_pair_type,less_pair_1st,block_size> ISAqueries (less_pair_1st(), ram_use / 3);
offset_array_it_rg sa(prev_sa->begin(), prev_sa->end());
for (offset_type index = 0; !sa.empty() && !SAqueries.empty(); ++index, ++sa)
{
DBG(debug_lcp_expand, "SA[" << index << "] = " << *sa);
while ( !SAqueries.empty() && SAqueries->first == index )
{
DBG(debug_lcp_expand, "Matching SA[" << index << "] query with " << *sa << " + " << SAqueries->second << " for target " << SAqueries->third);
ISAqueries.push( skew_pair_type(*sa + SAqueries->second, SAqueries->third) );
++SAqueries;
}
}
assert( SAqueries.empty() );
SAqueries.finish_clear();
delete prev_sa;
stxxl::sorter<skew_pair_type,less_pair_1st,block_size> ISAanswers (less_pair_1st(), ram_use * 2/9);
ISAqueries.sort(ram_use * 2/9);
for (isa1_rm_type isa1_rm(isa1_sr, less_pair_2nd(), ram_use * 2/9);
!isa1_rm.empty() && !ISAqueries.empty(); ++isa1_rm)
{
DBG(debug_lcp_expand, "ISA1: " << *isa1_rm);
while ( !ISAqueries.empty() && ISAqueries->first == isa1_rm->second )
{
DBG(debug_lcp_expand, "Matching ISA[" << ISAqueries->first << "] query for target " << ISAqueries->second << " with " << isa1_rm->first);
ISAanswers.push( skew_pair_type(ISAqueries->second, isa1_rm->first) );
++ISAqueries;
}
}
for (isa2_rm_type isa2_rm(isa2_sr, less_pair_2nd(), ram_use * 2/9);
!isa2_rm.empty() && !ISAqueries.empty(); ++isa2_rm)
{
DBG(debug_lcp_expand, "ISA2: " << *isa2_rm);
while ( !ISAqueries.empty() && ISAqueries->first == isa2_rm->second )
{
DBG(debug_lcp_expand, "Matching ISA[" << ISAqueries->first << "] query for target " << ISAqueries->second << " with " << isa2_rm->first);
ISAanswers.push( skew_pair_type(ISAqueries->second, isa2_rm->first) );
++ISAqueries;
}
}
while ( !ISAqueries.empty() )
{
DBG(debug_lcp_expand, "Out-of-bounds ISA[" << ISAqueries->first << "] query for target " << ISAqueries->second << " with ");
++ISAqueries;
}
assert( ISAqueries.empty() );
ISAqueries.finish_clear();
ISAanswers.sort(ram_use / 3);
while ( !ISAanswers.empty() )
{
skew_pair_type p1 = *ISAanswers; ++ISAanswers;
if ( ISAanswers.empty() || ISAanswers->first != p1.first )
{
DBG(debug_lcp_expand, "Discarding out-of-bounds RMQ(" << p1.second << "+1,?) -> target " << p1.first);
continue;
}
skew_pair_type p2 = *ISAanswers; ++ISAanswers;
assert( p1.first == p2.first );
if (p1.second > p2.second) std::swap(p1.second, p2.second);
DBG(debug_lcp_expand, "Matching for RMQ(" << p1.second << "+1," << p2.second << ") -> target " << p1.first);
lcp_rmq2->query( p1.second+1, p2.second, p1.first );
}
}
DBGMEM("done");
return new build_lcp(lcp_next, lcp_rmq2, lcp_names);
}
#endif
public:
algorithm(Input & data_in)
: finished(false)
{
typename std::vector<offset_array_type *> vectors;
#if LCP_CALC
typename std::vector<uint8_array_type *> lcp_name_vectors;
#endif
<index,*,*,*>
vectors.push_back(new offset_array_type);
#if LCP_CALC
lcp_name_vectors.push_back(new uint8_array_type);
#endif
DBG(debug, "Lexnaming input in depth 0");
counter_stream_type dummy;
make_pairs_input_type pairs_input(dummy, data_in);
make_quads_input_type quads_input(pairs_input, *vectors.back());
mod12_quads_input_type mod12_quads_input(quads_input);
sort_mod12_input_type sort_mod12_input(mod12_quads_input, less_quad<offset_type>(), ram_use, ram_use / 2);
bool unique = false;
#if !LCP_CALC
naming_input_type names_input(sort_mod12_input, unique);
#else
naming_input_type names_input(sort_mod12_input, *lcp_name_vectors.back(), unique);
#endif
rc_order_type *resume_order_rc = new rc_order_type(names_input, less_skew(), ram_use / 2);
rm_order_type *resume_order_rm = new rm_order_type(resume_order_rc->result(), less_skew(), ram_use / 2);
rc_loop_type *loop_order_rc = NULL;
DBGMEM("First naming done");
size_t depth = 0;
while (!unique)
{
DBG(debug, "Lexnaming " << resume_order_rm->size() << " triples in depth " << ++depth);
vectors.push_back(new offset_array_type);
#if LCP_CALC
lcp_name_vectors.push_back(new uint8_array_type);
#endif
make_quads_loop_type quads_loop(*resume_order_rm, *vectors.back());
mod12_quads_loop_type mod12_quads_loop(quads_loop);
sort_loop_type sort_quads_loop(mod12_quads_loop, less_quad<offset_type>(), ram_use / 2);
if (resume_order_rc != NULL) {
delete resume_order_rc; resume_order_rc = NULL;
}
if (loop_order_rc != NULL) {
delete loop_order_rc; loop_order_rc = NULL;
}
#if !LCP_CALC
naming_loop_type name_quads_loop(sort_quads_loop, unique);
#else
naming_loop_type name_quads_loop(sort_quads_loop, *lcp_name_vectors.back(), unique);
#endif
assert(resume_order_rm != NULL);
delete resume_order_rm; resume_order_rm = NULL;
loop_order_rc = new rc_loop_type(name_quads_loop, less_skew(), ram_use / 2);
resume_order_rm = new rm_order_type(loop_order_rc->result(), less_skew(), ram_use / 2);
DBGMEM("Next naming level done");
if (unique)
{
vectors.push_back( new offset_array_type(resume_order_rm->size()) );
stream::choose<rm_order_type,2> merger(*resume_order_rm);
stream::materialize(merger, vectors.back()->begin(), vectors.back()->end());
}
}
DBG(debug, "Found unique lexnames.");
g_statscache >> "maxdepth" << depth;
if (loop_order_rc != NULL) {
delete loop_order_rc; loop_order_rc = NULL;
}
if (resume_order_rc != NULL) {
delete resume_order_rc; resume_order_rc = NULL;
}
if (resume_order_rm != NULL) {
delete resume_order_rm; resume_order_rm = NULL;
}
DBGMEM("Unique names found.");
#if SKEW_DEBUG
for (size_t i = 0; i < vectors.size(); ++i)
{
offset_array_type & test = *vectors[i];
for (size_t e = 0; e < test.size(); ++e)
std::cout << test[e] << " ";
if ((test.size() & 1) == 1)
std::cout << " Break: " << (test.size() >> 1) + 1;
else
std::cout << " Break: " << (test.size() >> 1);
std::cout << " Nextbreak: " << subp_size(test.size());
std::cout << " Size: " << test.size();
std::cout << std::endl;
}
#endif
<index, character, character+1, ~SA12+1, ~SA12+2>
<index, ~SA12 name, character, ~SA12+1 name>
<index, ~SA12 name, character, character+1, ~SA12+2 name>
assert(vectors.size() > 2);
isa1_sr_type isa1_sr;
isa2_sr_type isa2_sr;
#if LCP_CALC
offset_array_type* prev_sa;
build_lcp* prev_lcp;
#endif
{
DBG(debug, "Merging from base case in depth " << depth);
offset_array_type& base_source = *vectors.back();
offset_array_type& prev_array = *vectors[vectors.size() - 2];
typename offset_array_type::iterator start_mod2 = base_source.begin();
start_mod2 = start_mod2 + ((prev_array.size() / 3) + ((prev_array.size() % 3) == 2));
size_t special = (subp_size(prev_array.size()) != base_source.size()) ? 1 : 0;
offset_array_it_rg source(prev_array.begin(), prev_array.end());
offset_array_it_rg mod1(base_source.begin(), start_mod2);
offset_array_it_rg mod2(start_mod2 + special, base_source.end());
#if !LCP_CALC
build_sa_type get_sa(source, mod1, mod2, prev_array.size());
#else
uint8_array_type lcp_base8;
RMQ* lcp_rmq = new RMQ(ram_use / memdivA);
skew_pair_array_type lcp_rmq1_ranks;
build_sa_type get_sa(source, mod1, mod2, prev_array.size(), lcp_base8, *lcp_rmq, lcp_rmq1_ranks);
#endif
counter_stream_type isa_index;
prepare_isa_type isa_tuples(isa_index, get_sa);
isa1_rc_type isa1_rc(less_pair_2nd(), ram_use / memdivA/2);
isa2_rc_type isa2_rc(less_pair_2nd(), ram_use / memdivA/2);
offset_array_type *last_array = vectors[vectors.size() - 3];
special = (prev_array.size() != subp_size(last_array->size()));
offset_type mod2_pos = (subp_size(last_array->size()) >> 1) + (subp_size(last_array->size()) & 1) + special;
DBG(0, "mod2_pos: " << mod2_pos << " special:" << special << " minus2: " << last_array->size() << " prev2: " << prev_array.size());
#if LCP_CALC
prev_sa = new offset_array_type;
#endif
while (!isa_tuples.empty()) {
const skew_pair_type& tmp = *isa_tuples;
#if LCP_CALC
prev_sa->push_back(tmp.second);
#endif
if (tmp.second < mod2_pos) {
#if !LCP_CALC
if (tmp.second + special < mod2_pos)
#endif
isa1_rc.push(tmp);
}
else {
isa2_rc.push(tmp);
}
++isa_tuples;
}
isa1_rc.deallocate();
isa2_rc.deallocate();
#if LCP_CALC
assert(base_source.size() == lcp_name_vectors.back()->size());
offset_array_type* lcp_base = new offset_array_type(lcp_base8.size());
uint8_array_it_rg lcp_base8_iter(lcp_base8.begin(), lcp_base8.end());
stream::materialize(lcp_base8_iter, lcp_base->begin(), lcp_base->end());
prev_lcp = new build_lcp(lcp_base, lcp_rmq, *lcp_name_vectors.back());
delete lcp_name_vectors.back();
lcp_name_vectors.pop_back();
#endif
isa1_sr = isa1_rc.result();
isa2_sr = isa2_rc.result();
if (debug_isa)
{
for (isa1_rm_type isa1_rm(isa1_sr, less_pair_2nd(), ram_use / 8);
!isa1_rm.empty(); ++isa1_rm)
DBG(debug_isa, "ISA1: " << *isa1_rm);
for (isa2_rm_type isa2_rm(isa2_sr, less_pair_2nd(), ram_use / 8);
!isa2_rm.empty(); ++isa2_rm)
DBG(debug_isa, "ISA2: " << *isa2_rm);
}
}
delete vectors.back();
vectors.pop_back();
#if SKEW_DEBUG
for(size_t i = 0; i < vectors.size(); ++i) {
offset_array_type& test = *vectors[i];
for(size_t e = 0; e < test.size(); ++e)
std::cout << test[e] << " ";
std::cout.flush();
if ((test.size() & 1) == 1)
std::cout << " Break: " << (test.size() >> 1) + 1;
else
std::cout << " Break: " << (test.size() >> 1);
std::cout << " Nextbreak: " << subp_size(test.size());
std::cout << " Size: " << test.size();
std::cout << std::endl;
}
#endif
offset_array_type *prev_array = vectors[vectors.size() - 2];
while (vectors.size() > 2)
{
DBG(debug, "Exiting recursion depth " << --depth);
DBGMEM("Loop start");
isa1_rm_type isa1_rm(isa1_sr, less_pair_2nd(), ram_use / memdivA/2);
isa2_rm_type isa2_rm(isa2_sr, less_pair_2nd(), ram_use / memdivA/2);
isa1_first_type isa1_first(isa1_rm);
isa2_first_type isa2_first(isa2_rm);
prev_array = vectors[vectors.size() - 2];
offset_array_it_rg source(prev_array->begin(), prev_array->end());
#if !LCP_CALC
build_sa_loop_type get_sa(source, isa1_first, isa2_first, prev_array->size());
#else
uint8_array_type* lcp_base8 = new uint8_array_type;
RMQ* lcp_rmq1 = new RMQ(ram_use / memdivA);
skew_pair_array_type* lcp_rmq1_ranks = new skew_pair_array_type;
build_sa_loop_type get_sa(source, isa1_first, isa2_first, prev_array->size(),
*lcp_base8, *lcp_rmq1, *lcp_rmq1_ranks);
offset_array_type* suffixarray = new offset_array_type();
#endif
isa1_rm.deallocate(); isa2_rm.deallocate();
#if !LCP_CALC
isa1_sr->clear(); isa2_sr->clear();
#endif
counter_stream_type isa_loop_index;
prepare_isa_loop_type isa_loop_tuples(isa_loop_index, get_sa);
isa_loop_rc_type isa1_loop_rc(less_pair_2nd(), ram_use / memdivA/2);
isa_loop_rc_type isa2_loop_rc(less_pair_2nd(), ram_use / memdivA/2);
offset_array_type* last_array = vectors[vectors.size() - 3];
size_t special = (prev_array->size() != subp_size(last_array->size())) ? 1 : 0;
offset_type mod2_loop_pos = (subp_size(last_array->size()) >> 1) + (subp_size(last_array->size()) & 1) + special;
DBG(0, "Mod2_Pos: " << mod2_loop_pos << " Special:" << special << " Minus2: " << last_array->size() << " Prev2: " << prev_array->size() << " Vsize:" << vectors.size() );
while (!isa_loop_tuples.empty()) {
const skew_pair_type& tmp = *isa_loop_tuples;
#if LCP_CALC
suffixarray->push_back(tmp.second);
#endif
if (tmp.second < mod2_loop_pos) {
#if !LCP_CALC
if (tmp.second + special < mod2_loop_pos)
#endif
isa1_loop_rc.push(tmp);
}
else {
isa2_loop_rc.push(tmp);
}
++isa_loop_tuples;
}
isa1_rm.deallocate();
isa2_rm.deallocate();
isa1_loop_rc.deallocate();
isa2_loop_rc.deallocate();
#if LCP_CALC
assert( lcp_base8->size() == suffixarray->size() );
assert( vectors.back()->size() == lcp_name_vectors.back()->size() );
prev_lcp = expand_build_lcp(prev_lcp, prev_sa, isa1_sr, isa2_sr,
*lcp_name_vectors.back(),
lcp_base8, lcp_rmq1, lcp_rmq1_ranks);
delete lcp_name_vectors.back();
lcp_name_vectors.pop_back();
prev_sa = suffixarray;
#endif
isa1_sr = isa1_loop_rc.result();
isa2_sr = isa2_loop_rc.result();
if (debug_isa)
{
for (isa1_rm_type isa1_rm(isa1_sr, less_pair_2nd(), ram_use / 8);
!isa1_rm.empty(); ++isa1_rm)
DBG(debug_isa, "ISA1: " << *isa1_rm);
for (isa2_rm_type isa2_rm(isa2_sr, less_pair_2nd(), ram_use / 8);
!isa2_rm.empty(); ++isa2_rm)
DBG(debug_isa, "ISA2: " << *isa2_rm);
}
delete vectors.back();
vectors.pop_back();
}
DBG(debug, "Merging final suffix array.");
{
isa1_rm_type isa1_rm(isa1_sr, less_pair_2nd(), ram_use / memdivA/2);
isa2_rm_type isa2_rm(isa2_sr, less_pair_2nd(), ram_use / memdivA/2);
isa1_first_type isa1_first(isa1_rm);
isa2_first_type isa2_first(isa2_rm);
prev_array = vectors[vectors.size() - 2];
offset_array_it_rg source(prev_array->begin(), prev_array->end());
DBG(debug, "Final build_sa: source " << prev_array->size() << " - mod1 " << isa1_rm.size() << " - mod2 " << isa2_rm.size());
#if !LCP_CALC
out_sa = new build_sa_loop_type(source, isa1_first, isa2_first, prev_array->size());
#else
uint8_array_type* lcp_base8 = new uint8_array_type;
RMQ* lcp_rmq1 = new RMQ(ram_use / memdivA);
skew_pair_array_type* lcp_rmq1_ranks = new skew_pair_array_type;
out_sa_array = new offset_array_type(prev_array->size());
{
build_sa_loop_type get_sa(source, isa1_first, isa2_first, prev_array->size(),
*lcp_base8, *lcp_rmq1, *lcp_rmq1_ranks);
stream::materialize(get_sa, out_sa_array->begin(), out_sa_array->end());
}
isa1_rm.deallocate();
isa2_rm.deallocate();
assert( lcp_base8->size() == out_sa_array->size() );
assert( vectors.back()->size() == lcp_name_vectors.back()->size() );
prev_lcp = expand_build_lcp(prev_lcp, prev_sa, isa1_sr, isa2_sr,
*lcp_name_vectors.back(),
lcp_base8, lcp_rmq1, lcp_rmq1_ranks);
prev_lcp->start_output(ram_use / 4);
delete lcp_name_vectors.back();
lcp_name_vectors.pop_back();
out_sa = new offset_array_it_rg(out_sa_array->begin(), out_sa_array->end());
out_lcp = prev_lcp;
#endif
}
DBG(debug, "Done with final merge.");
DBGMEM("done");
}
const value_type & operator*() const
{
return *(*out_sa);
}
#if LCP_CALC
const value_type& lcp() const
{
return *(*out_lcp);
}
#endif
algorithm & operator++()
{
assert(!out_sa->empty());
++(*out_sa);
#if LCP_CALC
++(*out_lcp);
#endif
if ((out_sa != NULL) && (out_sa->empty())) {
finished = true;
delete out_sa; out_sa = NULL;
#if LCP_CALC
delete out_sa_array; out_sa_array = NULL;
delete out_lcp; out_lcp = NULL;
#endif
}
return *this;
}
bool empty() const
{
return finished;
}
};
};
template <typename StringContainer, typename SuffixArrayContainer>
struct SACA
{
std::string name()
{
#if !LCP_CALC
return "skew3";
#else
return "skew3-lcp";
#endif
}
void prepare(const StringContainer& string, SuffixArrayContainer& suffixarray, unsigned int K)
{
stxxl::STXXL_UNUSED(string);
stxxl::STXXL_UNUSED(suffixarray);
stxxl::STXXL_UNUSED(K);
}
void run(const StringContainer& string, SuffixArrayContainer& suffixarray, unsigned int K)
{
stxxl::STXXL_UNUSED(K);
typedef typename StringContainer::value_type alphabet_type;
typedef typename SuffixArrayContainer::value_type offset_type;
typedef stxxl::stream::vector_iterator2stream< typename StringContainer::const_iterator > input_type;
typedef typename skew<offset_type>::template algorithm<input_type> saca_type;
input_type input = input_type(string.begin(), string.end());
saca_type saca(input);
stxxl::stream::materialize(saca, suffixarray.begin(), suffixarray.end());
}
#if LCP_CALC
template <typename LCPArrayContainer>
void run_lcp(const StringContainer& string, SuffixArrayContainer& suffixarray, LCPArrayContainer& lcparray, unsigned int )
{
typedef typename StringContainer::value_type alphabet_type;
typedef typename SuffixArrayContainer::value_type offset_type;
typedef stxxl::stream::vector_iterator2stream< typename StringContainer::const_iterator > input_type;
typedef typename skew<offset_type>::template algorithm<input_type> saca_type;
input_type input = input_type(string.begin(), string.end());
saca_type saca(input);
stxxl::vector_bufwriter<SuffixArrayContainer> sa_writer (suffixarray.begin(), suffixarray.end());
stxxl::vector_bufwriter<LCPArrayContainer> lcp_writer (lcparray.begin(), lcparray.end());
for (size_t i = 0; !saca.empty(); ++saca, ++i)
{
sa_writer << *saca;
lcp_writer << saca.lcp();
}
}
#endif
};
}