STX B+ Tree Template Classes
0.9
|
00001 /** 00002 * \file include/stx/btree.h 00003 * Contains the main B+ tree implementation template class btree. 00004 */ 00005 00006 /* 00007 * STX B+ Tree Template Classes v0.9 00008 * Copyright (C) 2008-2013 Timo Bingmann <tb@panthema.net> 00009 * 00010 * Boost Software License - Version 1.0 - August 17th, 2003 00011 * 00012 * Permission is hereby granted, free of charge, to any person or organization 00013 * obtaining a copy of the software and accompanying documentation covered by 00014 * this license (the "Software") to use, reproduce, display, distribute, 00015 * execute, and transmit the Software, and to prepare derivative works of the 00016 * Software, and to permit third-parties to whom the Software is furnished to 00017 * do so, all subject to the following: 00018 * 00019 * The copyright notices in the Software and this entire statement, including 00020 * the above license grant, this restriction and the following disclaimer, must 00021 * be included in all copies of the Software, in whole or in part, and all 00022 * derivative works of the Software, unless such copies or derivative works are 00023 * solely in the form of machine-executable object code generated by a source 00024 * language processor. 00025 * 00026 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00027 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00028 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 00029 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 00030 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 00031 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00032 * DEALINGS IN THE SOFTWARE. 00033 */ 00034 00035 #ifndef _STX_BTREE_H_ 00036 #define _STX_BTREE_H_ 00037 00038 // *** Required Headers from the STL 00039 00040 #include <algorithm> 00041 #include <functional> 00042 #include <istream> 00043 #include <ostream> 00044 #include <memory> 00045 #include <cstddef> 00046 #include <assert.h> 00047 00048 // *** Debugging Macros 00049 00050 #ifdef BTREE_DEBUG 00051 00052 #include <iostream> 00053 00054 /// Print out debug information to std::cout if BTREE_DEBUG is defined. 00055 #define BTREE_PRINT(x) do { if (debug) (std::cout << x << std::endl); } while(0) 00056 00057 /// Assertion only if BTREE_DEBUG is defined. This is not used in verify(). 00058 #define BTREE_ASSERT(x) do { assert(x); } while(0) 00059 00060 #else 00061 00062 /// Print out debug information to std::cout if BTREE_DEBUG is defined. 00063 #define BTREE_PRINT(x) do { } while(0) 00064 00065 /// Assertion only if BTREE_DEBUG is defined. This is not used in verify(). 00066 #define BTREE_ASSERT(x) do { } while(0) 00067 00068 #endif 00069 00070 /// The maximum of a and b. Used in some compile-time formulas. 00071 #define BTREE_MAX(a,b) ((a) < (b) ? (b) : (a)) 00072 00073 #ifndef BTREE_FRIENDS 00074 /// The macro BTREE_FRIENDS can be used by outside class to access the B+ 00075 /// tree internals. This was added for wxBTreeDemo to be able to draw the 00076 /// tree. 00077 #define BTREE_FRIENDS friend class btree_friend; 00078 #endif 00079 00080 /// STX - Some Template Extensions namespace 00081 namespace stx { 00082 00083 /** Generates default traits for a B+ tree used as a set. It estimates leaf and 00084 * inner node sizes by assuming a cache line size of 256 bytes. */ 00085 template <typename _Key> 00086 struct btree_default_set_traits 00087 { 00088 /// If true, the tree will self verify it's invariants after each insert() 00089 /// or erase(). The header must have been compiled with BTREE_DEBUG defined. 00090 static const bool selfverify = false; 00091 00092 /// If true, the tree will print out debug information and a tree dump 00093 /// during insert() or erase() operation. The header must have been 00094 /// compiled with BTREE_DEBUG defined and key_type must be std::ostream 00095 /// printable. 00096 static const bool debug = false; 00097 00098 /// Number of slots in each leaf of the tree. Estimated so that each node 00099 /// has a size of about 256 bytes. 00100 static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key)) ); 00101 00102 /// Number of slots in each inner node of the tree. Estimated so that each node 00103 /// has a size of about 256 bytes. 00104 static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) ); 00105 00106 /// As of stx-btree-0.9, the code does linear search in find_lower() and 00107 /// find_upper() instead of binary_search, unless the node size is larger 00108 /// than this threshold. See notes at 00109 /// http://panthema.net/2013/0504-STX-B+Tree-Binary-vs-Linear-Search 00110 static const size_t binsearch_threshold = 256; 00111 }; 00112 00113 /** Generates default traits for a B+ tree used as a map. It estimates leaf and 00114 * inner node sizes by assuming a cache line size of 256 bytes. */ 00115 template <typename _Key, typename _Data> 00116 struct btree_default_map_traits 00117 { 00118 /// If true, the tree will self verify it's invariants after each insert() 00119 /// or erase(). The header must have been compiled with BTREE_DEBUG defined. 00120 static const bool selfverify = false; 00121 00122 /// If true, the tree will print out debug information and a tree dump 00123 /// during insert() or erase() operation. The header must have been 00124 /// compiled with BTREE_DEBUG defined and key_type must be std::ostream 00125 /// printable. 00126 static const bool debug = false; 00127 00128 /// Number of slots in each leaf of the tree. Estimated so that each node 00129 /// has a size of about 256 bytes. 00130 static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(_Data)) ); 00131 00132 /// Number of slots in each inner node of the tree. Estimated so that each node 00133 /// has a size of about 256 bytes. 00134 static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) ); 00135 00136 /// As of stx-btree-0.9, the code does linear search in find_lower() and 00137 /// find_upper() instead of binary_search, unless the node size is larger 00138 /// than this threshold. See notes at 00139 /// http://panthema.net/2013/0504-STX-B+Tree-Binary-vs-Linear-Search 00140 static const size_t binsearch_threshold = 256; 00141 }; 00142 00143 /** @brief Basic class implementing a base B+ tree data structure in memory. 00144 * 00145 * The base implementation of a memory B+ tree. It is based on the 00146 * implementation in Cormen's Introduction into Algorithms, Jan Jannink's paper 00147 * and other algorithm resources. Almost all STL-required function calls are 00148 * implemented. The asymptotic time requirements of the STL are not always 00149 * fulfilled in theory, however in practice this B+ tree performs better than a 00150 * red-black tree by using more memory. The insertion function splits the nodes 00151 * on the recursion unroll. Erase is largely based on Jannink's ideas. 00152 * 00153 * This class is specialized into btree_set, btree_multiset, btree_map and 00154 * btree_multimap using default template parameters and facade functions. 00155 */ 00156 template <typename _Key, typename _Data, 00157 typename _Value = std::pair<_Key, _Data>, 00158 typename _Compare = std::less<_Key>, 00159 typename _Traits = btree_default_map_traits<_Key, _Data>, 00160 bool _Duplicates = false, 00161 typename _Alloc = std::allocator<_Value>, 00162 bool _UsedAsSet = false > 00163 class btree 00164 { 00165 public: 00166 // *** Template Parameter Types 00167 00168 /// First template parameter: The key type of the B+ tree. This is stored 00169 /// in inner nodes and leaves 00170 typedef _Key key_type; 00171 00172 /// Second template parameter: The data type associated with each 00173 /// key. Stored in the B+ tree's leaves 00174 typedef _Data data_type; 00175 00176 /// Third template parameter: Composition pair of key and data types, this 00177 /// is required by the STL standard. The B+ tree does not store key and 00178 /// data together. If value_type == key_type then the B+ tree implements a 00179 /// set. 00180 typedef _Value value_type; 00181 00182 /// Fourth template parameter: Key comparison function object 00183 typedef _Compare key_compare; 00184 00185 /// Fifth template parameter: Traits object used to define more parameters 00186 /// of the B+ tree 00187 typedef _Traits traits; 00188 00189 /// Sixth template parameter: Allow duplicate keys in the B+ tree. Used to 00190 /// implement multiset and multimap. 00191 static const bool allow_duplicates = _Duplicates; 00192 00193 /// Seventh template parameter: STL allocator for tree nodes 00194 typedef _Alloc allocator_type; 00195 00196 /// Eighth template parameter: boolean indicator whether the btree is used 00197 /// as a set. In this case all operations on the data arrays are 00198 /// omitted. This flag is kind of hacky, but required because 00199 /// sizeof(empty_struct) = 1 due to the C standard. Without the flag, lots 00200 /// of superfluous copying would occur. 00201 static const bool used_as_set = _UsedAsSet; 00202 00203 // The macro BTREE_FRIENDS can be used by outside class to access the B+ 00204 // tree internals. This was added for wxBTreeDemo to be able to draw the 00205 // tree. 00206 BTREE_FRIENDS 00207 00208 public: 00209 // *** Constructed Types 00210 00211 /// Typedef of our own type 00212 typedef btree<key_type, data_type, value_type, key_compare, 00213 traits, allow_duplicates, allocator_type, used_as_set> btree_self; 00214 00215 /// Size type used to count keys 00216 typedef size_t size_type; 00217 00218 /// The pair of key_type and data_type, this may be different from value_type. 00219 typedef std::pair<key_type, data_type> pair_type; 00220 00221 public: 00222 // *** Static Constant Options and Values of the B+ Tree 00223 00224 /// Base B+ tree parameter: The number of key/data slots in each leaf 00225 static const unsigned short leafslotmax = traits::leafslots; 00226 00227 /// Base B+ tree parameter: The number of key slots in each inner node, 00228 /// this can differ from slots in each leaf. 00229 static const unsigned short innerslotmax = traits::innerslots; 00230 00231 /// Computed B+ tree parameter: The minimum number of key/data slots used 00232 /// in a leaf. If fewer slots are used, the leaf will be merged or slots 00233 /// shifted from it's siblings. 00234 static const unsigned short minleafslots = (leafslotmax / 2); 00235 00236 /// Computed B+ tree parameter: The minimum number of key slots used 00237 /// in an inner node. If fewer slots are used, the inner node will be 00238 /// merged or slots shifted from it's siblings. 00239 static const unsigned short mininnerslots = (innerslotmax / 2); 00240 00241 /// Debug parameter: Enables expensive and thorough checking of the B+ tree 00242 /// invariants after each insert/erase operation. 00243 static const bool selfverify = traits::selfverify; 00244 00245 /// Debug parameter: Prints out lots of debug information about how the 00246 /// algorithms change the tree. Requires the header file to be compiled 00247 /// with BTREE_DEBUG and the key type must be std::ostream printable. 00248 static const bool debug = traits::debug; 00249 00250 private: 00251 // *** Node Classes for In-Memory Nodes 00252 00253 /// The header structure of each node in-memory. This structure is extended 00254 /// by inner_node or leaf_node. 00255 struct node 00256 { 00257 /// Level in the b-tree, if level == 0 -> leaf node 00258 unsigned short level; 00259 00260 /// Number of key slotuse use, so number of valid children or data 00261 /// pointers 00262 unsigned short slotuse; 00263 00264 /// Delayed initialisation of constructed node 00265 inline void initialize(const unsigned short l) 00266 { 00267 level = l; 00268 slotuse = 0; 00269 } 00270 00271 /// True if this is a leaf node 00272 inline bool isleafnode() const 00273 { 00274 return (level == 0); 00275 } 00276 }; 00277 00278 /// Extended structure of a inner node in-memory. Contains only keys and no 00279 /// data items. 00280 struct inner_node : public node 00281 { 00282 /// Define an related allocator for the inner_node structs. 00283 typedef typename _Alloc::template rebind<inner_node>::other alloc_type; 00284 00285 /// Keys of children or data pointers 00286 key_type slotkey[innerslotmax]; 00287 00288 /// Pointers to children 00289 node* childid[innerslotmax+1]; 00290 00291 /// Set variables to initial values 00292 inline void initialize(const unsigned short l) 00293 { 00294 node::initialize(l); 00295 } 00296 00297 /// True if the node's slots are full 00298 inline bool isfull() const 00299 { 00300 return (node::slotuse == innerslotmax); 00301 } 00302 00303 /// True if few used entries, less than half full 00304 inline bool isfew() const 00305 { 00306 return (node::slotuse <= mininnerslots); 00307 } 00308 00309 /// True if node has too few entries 00310 inline bool isunderflow() const 00311 { 00312 return (node::slotuse < mininnerslots); 00313 } 00314 }; 00315 00316 /// Extended structure of a leaf node in memory. Contains pairs of keys and 00317 /// data items. Key and data slots are kept in separate arrays, because the 00318 /// key array is traversed very often compared to accessing the data items. 00319 struct leaf_node : public node 00320 { 00321 /// Define an related allocator for the leaf_node structs. 00322 typedef typename _Alloc::template rebind<leaf_node>::other alloc_type; 00323 00324 /// Double linked list pointers to traverse the leaves 00325 leaf_node *prevleaf; 00326 00327 /// Double linked list pointers to traverse the leaves 00328 leaf_node *nextleaf; 00329 00330 /// Keys of children or data pointers 00331 key_type slotkey[leafslotmax]; 00332 00333 /// Array of data 00334 data_type slotdata[used_as_set ? 1 : leafslotmax]; 00335 00336 /// Set variables to initial values 00337 inline void initialize() 00338 { 00339 node::initialize(0); 00340 prevleaf = nextleaf = NULL; 00341 } 00342 00343 /// True if the node's slots are full 00344 inline bool isfull() const 00345 { 00346 return (node::slotuse == leafslotmax); 00347 } 00348 00349 /// True if few used entries, less than half full 00350 inline bool isfew() const 00351 { 00352 return (node::slotuse <= minleafslots); 00353 } 00354 00355 /// True if node has too few entries 00356 inline bool isunderflow() const 00357 { 00358 return (node::slotuse < minleafslots); 00359 } 00360 00361 /// Set the (key,data) pair in slot. Overloaded function used by 00362 /// bulk_load(). 00363 inline void set_slot(unsigned short slot, const pair_type& value) 00364 { 00365 BTREE_ASSERT(used_as_set == false); 00366 BTREE_ASSERT(slot < node::slotuse); 00367 slotkey[slot] = value.first; 00368 slotdata[slot] = value.second; 00369 } 00370 00371 /// Set the key pair in slot. Overloaded function used by 00372 /// bulk_load(). 00373 inline void set_slot(unsigned short slot, const key_type& key) 00374 { 00375 BTREE_ASSERT(used_as_set == true); 00376 BTREE_ASSERT(slot < node::slotuse); 00377 slotkey[slot] = key; 00378 } 00379 }; 00380 00381 private: 00382 // *** Template Magic to Convert a pair or key/data types to a value_type 00383 00384 /// For sets the second pair_type is an empty struct, so the value_type 00385 /// should only be the first. 00386 template <typename value_type, typename pair_type> 00387 struct btree_pair_to_value 00388 { 00389 /// Convert a fake pair type to just the first component 00390 inline value_type operator()(pair_type& p) const { 00391 return p.first; 00392 } 00393 /// Convert a fake pair type to just the first component 00394 inline value_type operator()(const pair_type& p) const { 00395 return p.first; 00396 } 00397 }; 00398 00399 /// For maps value_type is the same as the pair_type 00400 template <typename value_type> 00401 struct btree_pair_to_value<value_type, value_type> 00402 { 00403 /// Identity "convert" a real pair type to just the first component 00404 inline value_type operator()(pair_type& p) const { 00405 return p; 00406 } 00407 /// Identity "convert" a real pair type to just the first component 00408 inline value_type operator()(const pair_type& p) const { 00409 return p; 00410 } 00411 }; 00412 00413 /// Using template specialization select the correct converter used by the 00414 /// iterators 00415 typedef btree_pair_to_value<value_type, pair_type> pair_to_value_type; 00416 00417 public: 00418 // *** Iterators and Reverse Iterators 00419 00420 class iterator; 00421 class const_iterator; 00422 class reverse_iterator; 00423 class const_reverse_iterator; 00424 00425 /// STL-like iterator object for B+ tree items. The iterator points to a 00426 /// specific slot number in a leaf. 00427 class iterator 00428 { 00429 public: 00430 // *** Types 00431 00432 /// The key type of the btree. Returned by key(). 00433 typedef typename btree::key_type key_type; 00434 00435 /// The data type of the btree. Returned by data(). 00436 typedef typename btree::data_type data_type; 00437 00438 /// The value type of the btree. Returned by operator*(). 00439 typedef typename btree::value_type value_type; 00440 00441 /// The pair type of the btree. 00442 typedef typename btree::pair_type pair_type; 00443 00444 /// Reference to the value_type. STL required. 00445 typedef value_type& reference; 00446 00447 /// Pointer to the value_type. STL required. 00448 typedef value_type* pointer; 00449 00450 /// STL-magic iterator category 00451 typedef std::bidirectional_iterator_tag iterator_category; 00452 00453 /// STL-magic 00454 typedef ptrdiff_t difference_type; 00455 00456 /// Our own type 00457 typedef iterator self; 00458 00459 private: 00460 // *** Members 00461 00462 /// The currently referenced leaf node of the tree 00463 typename btree::leaf_node* currnode; 00464 00465 /// Current key/data slot referenced 00466 unsigned short currslot; 00467 00468 /// Friendly to the const_iterator, so it may access the two data items 00469 /// directly. 00470 friend class const_iterator; 00471 00472 /// Also friendly to the reverse_iterator, so it may access the two 00473 /// data items directly. 00474 friend class reverse_iterator; 00475 00476 /// Also friendly to the const_reverse_iterator, so it may access the 00477 /// two data items directly. 00478 friend class const_reverse_iterator; 00479 00480 /// Also friendly to the base btree class, because erase_iter() needs 00481 /// to read the currnode and currslot values directly. 00482 friend class btree<key_type, data_type, value_type, key_compare, 00483 traits, allow_duplicates, allocator_type, used_as_set>; 00484 00485 /// Evil! A temporary value_type to STL-correctly deliver operator* and 00486 /// operator-> 00487 mutable value_type temp_value; 00488 00489 // The macro BTREE_FRIENDS can be used by outside class to access the B+ 00490 // tree internals. This was added for wxBTreeDemo to be able to draw the 00491 // tree. 00492 BTREE_FRIENDS 00493 00494 public: 00495 // *** Methods 00496 00497 /// Default-Constructor of a mutable iterator 00498 inline iterator() 00499 : currnode(NULL), currslot(0) 00500 { } 00501 00502 /// Initializing-Constructor of a mutable iterator 00503 inline iterator(typename btree::leaf_node *l, unsigned short s) 00504 : currnode(l), currslot(s) 00505 { } 00506 00507 /// Copy-constructor from a reverse iterator 00508 inline iterator(const reverse_iterator &it) 00509 : currnode(it.currnode), currslot(it.currslot) 00510 { } 00511 00512 /// Dereference the iterator, this is not a value_type& because key and 00513 /// value are not stored together 00514 inline reference operator*() const 00515 { 00516 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 00517 return temp_value; 00518 } 00519 00520 /// Dereference the iterator. Do not use this if possible, use key() 00521 /// and data() instead. The B+ tree does not stored key and data 00522 /// together. 00523 inline pointer operator->() const 00524 { 00525 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 00526 return &temp_value; 00527 } 00528 00529 /// Key of the current slot 00530 inline const key_type& key() const 00531 { 00532 return currnode->slotkey[currslot]; 00533 } 00534 00535 /// Writable reference to the current data object 00536 inline data_type& data() const 00537 { 00538 return currnode->slotdata[used_as_set ? 0 : currslot]; 00539 } 00540 00541 /// Prefix++ advance the iterator to the next slot 00542 inline self& operator++() 00543 { 00544 if (currslot + 1 < currnode->slotuse) { 00545 ++currslot; 00546 } 00547 else if (currnode->nextleaf != NULL) { 00548 currnode = currnode->nextleaf; 00549 currslot = 0; 00550 } 00551 else { 00552 // this is end() 00553 currslot = currnode->slotuse; 00554 } 00555 00556 return *this; 00557 } 00558 00559 /// Postfix++ advance the iterator to the next slot 00560 inline self operator++(int) 00561 { 00562 self tmp = *this; // copy ourselves 00563 00564 if (currslot + 1 < currnode->slotuse) { 00565 ++currslot; 00566 } 00567 else if (currnode->nextleaf != NULL) { 00568 currnode = currnode->nextleaf; 00569 currslot = 0; 00570 } 00571 else { 00572 // this is end() 00573 currslot = currnode->slotuse; 00574 } 00575 00576 return tmp; 00577 } 00578 00579 /// Prefix-- backstep the iterator to the last slot 00580 inline self& operator--() 00581 { 00582 if (currslot > 0) { 00583 --currslot; 00584 } 00585 else if (currnode->prevleaf != NULL) { 00586 currnode = currnode->prevleaf; 00587 currslot = currnode->slotuse - 1; 00588 } 00589 else { 00590 // this is begin() 00591 currslot = 0; 00592 } 00593 00594 return *this; 00595 } 00596 00597 /// Postfix-- backstep the iterator to the last slot 00598 inline self operator--(int) 00599 { 00600 self tmp = *this; // copy ourselves 00601 00602 if (currslot > 0) { 00603 --currslot; 00604 } 00605 else if (currnode->prevleaf != NULL) { 00606 currnode = currnode->prevleaf; 00607 currslot = currnode->slotuse - 1; 00608 } 00609 else { 00610 // this is begin() 00611 currslot = 0; 00612 } 00613 00614 return tmp; 00615 } 00616 00617 /// Equality of iterators 00618 inline bool operator==(const self& x) const 00619 { 00620 return (x.currnode == currnode) && (x.currslot == currslot); 00621 } 00622 00623 /// Inequality of iterators 00624 inline bool operator!=(const self& x) const 00625 { 00626 return (x.currnode != currnode) || (x.currslot != currslot); 00627 } 00628 }; 00629 00630 /// STL-like read-only iterator object for B+ tree items. The iterator 00631 /// points to a specific slot number in a leaf. 00632 class const_iterator 00633 { 00634 public: 00635 // *** Types 00636 00637 /// The key type of the btree. Returned by key(). 00638 typedef typename btree::key_type key_type; 00639 00640 /// The data type of the btree. Returned by data(). 00641 typedef typename btree::data_type data_type; 00642 00643 /// The value type of the btree. Returned by operator*(). 00644 typedef typename btree::value_type value_type; 00645 00646 /// The pair type of the btree. 00647 typedef typename btree::pair_type pair_type; 00648 00649 /// Reference to the value_type. STL required. 00650 typedef const value_type& reference; 00651 00652 /// Pointer to the value_type. STL required. 00653 typedef const value_type* pointer; 00654 00655 /// STL-magic iterator category 00656 typedef std::bidirectional_iterator_tag iterator_category; 00657 00658 /// STL-magic 00659 typedef ptrdiff_t difference_type; 00660 00661 /// Our own type 00662 typedef const_iterator self; 00663 00664 private: 00665 // *** Members 00666 00667 /// The currently referenced leaf node of the tree 00668 const typename btree::leaf_node* currnode; 00669 00670 /// Current key/data slot referenced 00671 unsigned short currslot; 00672 00673 /// Friendly to the reverse_const_iterator, so it may access the two 00674 /// data items directly 00675 friend class const_reverse_iterator; 00676 00677 /// Evil! A temporary value_type to STL-correctly deliver operator* and 00678 /// operator-> 00679 mutable value_type temp_value; 00680 00681 // The macro BTREE_FRIENDS can be used by outside class to access the B+ 00682 // tree internals. This was added for wxBTreeDemo to be able to draw the 00683 // tree. 00684 BTREE_FRIENDS 00685 00686 public: 00687 // *** Methods 00688 00689 /// Default-Constructor of a const iterator 00690 inline const_iterator() 00691 : currnode(NULL), currslot(0) 00692 { } 00693 00694 /// Initializing-Constructor of a const iterator 00695 inline const_iterator(const typename btree::leaf_node *l, unsigned short s) 00696 : currnode(l), currslot(s) 00697 { } 00698 00699 /// Copy-constructor from a mutable iterator 00700 inline const_iterator(const iterator &it) 00701 : currnode(it.currnode), currslot(it.currslot) 00702 { } 00703 00704 /// Copy-constructor from a mutable reverse iterator 00705 inline const_iterator(const reverse_iterator &it) 00706 : currnode(it.currnode), currslot(it.currslot) 00707 { } 00708 00709 /// Copy-constructor from a const reverse iterator 00710 inline const_iterator(const const_reverse_iterator &it) 00711 : currnode(it.currnode), currslot(it.currslot) 00712 { } 00713 00714 /// Dereference the iterator. Do not use this if possible, use key() 00715 /// and data() instead. The B+ tree does not stored key and data 00716 /// together. 00717 inline reference operator*() const 00718 { 00719 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 00720 return temp_value; 00721 } 00722 00723 /// Dereference the iterator. Do not use this if possible, use key() 00724 /// and data() instead. The B+ tree does not stored key and data 00725 /// together. 00726 inline pointer operator->() const 00727 { 00728 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 00729 return &temp_value; 00730 } 00731 00732 /// Key of the current slot 00733 inline const key_type& key() const 00734 { 00735 return currnode->slotkey[currslot]; 00736 } 00737 00738 /// Read-only reference to the current data object 00739 inline const data_type& data() const 00740 { 00741 return currnode->slotdata[used_as_set ? 0 : currslot]; 00742 } 00743 00744 /// Prefix++ advance the iterator to the next slot 00745 inline self& operator++() 00746 { 00747 if (currslot + 1 < currnode->slotuse) { 00748 ++currslot; 00749 } 00750 else if (currnode->nextleaf != NULL) { 00751 currnode = currnode->nextleaf; 00752 currslot = 0; 00753 } 00754 else { 00755 // this is end() 00756 currslot = currnode->slotuse; 00757 } 00758 00759 return *this; 00760 } 00761 00762 /// Postfix++ advance the iterator to the next slot 00763 inline self operator++(int) 00764 { 00765 self tmp = *this; // copy ourselves 00766 00767 if (currslot + 1 < currnode->slotuse) { 00768 ++currslot; 00769 } 00770 else if (currnode->nextleaf != NULL) { 00771 currnode = currnode->nextleaf; 00772 currslot = 0; 00773 } 00774 else { 00775 // this is end() 00776 currslot = currnode->slotuse; 00777 } 00778 00779 return tmp; 00780 } 00781 00782 /// Prefix-- backstep the iterator to the last slot 00783 inline self& operator--() 00784 { 00785 if (currslot > 0) { 00786 --currslot; 00787 } 00788 else if (currnode->prevleaf != NULL) { 00789 currnode = currnode->prevleaf; 00790 currslot = currnode->slotuse - 1; 00791 } 00792 else { 00793 // this is begin() 00794 currslot = 0; 00795 } 00796 00797 return *this; 00798 } 00799 00800 /// Postfix-- backstep the iterator to the last slot 00801 inline self operator--(int) 00802 { 00803 self tmp = *this; // copy ourselves 00804 00805 if (currslot > 0) { 00806 --currslot; 00807 } 00808 else if (currnode->prevleaf != NULL) { 00809 currnode = currnode->prevleaf; 00810 currslot = currnode->slotuse - 1; 00811 } 00812 else { 00813 // this is begin() 00814 currslot = 0; 00815 } 00816 00817 return tmp; 00818 } 00819 00820 /// Equality of iterators 00821 inline bool operator==(const self& x) const 00822 { 00823 return (x.currnode == currnode) && (x.currslot == currslot); 00824 } 00825 00826 /// Inequality of iterators 00827 inline bool operator!=(const self& x) const 00828 { 00829 return (x.currnode != currnode) || (x.currslot != currslot); 00830 } 00831 }; 00832 00833 /// STL-like mutable reverse iterator object for B+ tree items. The 00834 /// iterator points to a specific slot number in a leaf. 00835 class reverse_iterator 00836 { 00837 public: 00838 // *** Types 00839 00840 /// The key type of the btree. Returned by key(). 00841 typedef typename btree::key_type key_type; 00842 00843 /// The data type of the btree. Returned by data(). 00844 typedef typename btree::data_type data_type; 00845 00846 /// The value type of the btree. Returned by operator*(). 00847 typedef typename btree::value_type value_type; 00848 00849 /// The pair type of the btree. 00850 typedef typename btree::pair_type pair_type; 00851 00852 /// Reference to the value_type. STL required. 00853 typedef value_type& reference; 00854 00855 /// Pointer to the value_type. STL required. 00856 typedef value_type* pointer; 00857 00858 /// STL-magic iterator category 00859 typedef std::bidirectional_iterator_tag iterator_category; 00860 00861 /// STL-magic 00862 typedef ptrdiff_t difference_type; 00863 00864 /// Our own type 00865 typedef reverse_iterator self; 00866 00867 private: 00868 // *** Members 00869 00870 /// The currently referenced leaf node of the tree 00871 typename btree::leaf_node* currnode; 00872 00873 /// One slot past the current key/data slot referenced. 00874 unsigned short currslot; 00875 00876 /// Friendly to the const_iterator, so it may access the two data items 00877 /// directly 00878 friend class iterator; 00879 00880 /// Also friendly to the const_iterator, so it may access the two data 00881 /// items directly 00882 friend class const_iterator; 00883 00884 /// Also friendly to the const_iterator, so it may access the two data 00885 /// items directly 00886 friend class const_reverse_iterator; 00887 00888 /// Evil! A temporary value_type to STL-correctly deliver operator* and 00889 /// operator-> 00890 mutable value_type temp_value; 00891 00892 // The macro BTREE_FRIENDS can be used by outside class to access the B+ 00893 // tree internals. This was added for wxBTreeDemo to be able to draw the 00894 // tree. 00895 BTREE_FRIENDS 00896 00897 public: 00898 // *** Methods 00899 00900 /// Default-Constructor of a reverse iterator 00901 inline reverse_iterator() 00902 : currnode(NULL), currslot(0) 00903 { } 00904 00905 /// Initializing-Constructor of a mutable reverse iterator 00906 inline reverse_iterator(typename btree::leaf_node *l, unsigned short s) 00907 : currnode(l), currslot(s) 00908 { } 00909 00910 /// Copy-constructor from a mutable iterator 00911 inline reverse_iterator(const iterator &it) 00912 : currnode(it.currnode), currslot(it.currslot) 00913 { } 00914 00915 /// Dereference the iterator, this is not a value_type& because key and 00916 /// value are not stored together 00917 inline reference operator*() const 00918 { 00919 BTREE_ASSERT(currslot > 0); 00920 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 00921 return temp_value; 00922 } 00923 00924 /// Dereference the iterator. Do not use this if possible, use key() 00925 /// and data() instead. The B+ tree does not stored key and data 00926 /// together. 00927 inline pointer operator->() const 00928 { 00929 BTREE_ASSERT(currslot > 0); 00930 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 00931 return &temp_value; 00932 } 00933 00934 /// Key of the current slot 00935 inline const key_type& key() const 00936 { 00937 BTREE_ASSERT(currslot > 0); 00938 return currnode->slotkey[currslot - 1]; 00939 } 00940 00941 /// Writable reference to the current data object 00942 inline data_type& data() const 00943 { 00944 BTREE_ASSERT(currslot > 0); 00945 return currnode->slotdata[used_as_set ? 0 : currslot-1]; 00946 } 00947 00948 /// Prefix++ advance the iterator to the next slot 00949 inline self& operator++() 00950 { 00951 if (currslot > 1) { 00952 --currslot; 00953 } 00954 else if (currnode->prevleaf != NULL) { 00955 currnode = currnode->prevleaf; 00956 currslot = currnode->slotuse; 00957 } 00958 else { 00959 // this is begin() == rend() 00960 currslot = 0; 00961 } 00962 00963 return *this; 00964 } 00965 00966 /// Postfix++ advance the iterator to the next slot 00967 inline self operator++(int) 00968 { 00969 self tmp = *this; // copy ourselves 00970 00971 if (currslot > 1) { 00972 --currslot; 00973 } 00974 else if (currnode->prevleaf != NULL) { 00975 currnode = currnode->prevleaf; 00976 currslot = currnode->slotuse; 00977 } 00978 else { 00979 // this is begin() == rend() 00980 currslot = 0; 00981 } 00982 00983 return tmp; 00984 } 00985 00986 /// Prefix-- backstep the iterator to the last slot 00987 inline self& operator--() 00988 { 00989 if (currslot < currnode->slotuse) { 00990 ++currslot; 00991 } 00992 else if (currnode->nextleaf != NULL) { 00993 currnode = currnode->nextleaf; 00994 currslot = 1; 00995 } 00996 else { 00997 // this is end() == rbegin() 00998 currslot = currnode->slotuse; 00999 } 01000 01001 return *this; 01002 } 01003 01004 /// Postfix-- backstep the iterator to the last slot 01005 inline self operator--(int) 01006 { 01007 self tmp = *this; // copy ourselves 01008 01009 if (currslot < currnode->slotuse) { 01010 ++currslot; 01011 } 01012 else if (currnode->nextleaf != NULL) { 01013 currnode = currnode->nextleaf; 01014 currslot = 1; 01015 } 01016 else { 01017 // this is end() == rbegin() 01018 currslot = currnode->slotuse; 01019 } 01020 01021 return tmp; 01022 } 01023 01024 /// Equality of iterators 01025 inline bool operator==(const self& x) const 01026 { 01027 return (x.currnode == currnode) && (x.currslot == currslot); 01028 } 01029 01030 /// Inequality of iterators 01031 inline bool operator!=(const self& x) const 01032 { 01033 return (x.currnode != currnode) || (x.currslot != currslot); 01034 } 01035 }; 01036 01037 /// STL-like read-only reverse iterator object for B+ tree items. The 01038 /// iterator points to a specific slot number in a leaf. 01039 class const_reverse_iterator 01040 { 01041 public: 01042 // *** Types 01043 01044 /// The key type of the btree. Returned by key(). 01045 typedef typename btree::key_type key_type; 01046 01047 /// The data type of the btree. Returned by data(). 01048 typedef typename btree::data_type data_type; 01049 01050 /// The value type of the btree. Returned by operator*(). 01051 typedef typename btree::value_type value_type; 01052 01053 /// The pair type of the btree. 01054 typedef typename btree::pair_type pair_type; 01055 01056 /// Reference to the value_type. STL required. 01057 typedef const value_type& reference; 01058 01059 /// Pointer to the value_type. STL required. 01060 typedef const value_type* pointer; 01061 01062 /// STL-magic iterator category 01063 typedef std::bidirectional_iterator_tag iterator_category; 01064 01065 /// STL-magic 01066 typedef ptrdiff_t difference_type; 01067 01068 /// Our own type 01069 typedef const_reverse_iterator self; 01070 01071 private: 01072 // *** Members 01073 01074 /// The currently referenced leaf node of the tree 01075 const typename btree::leaf_node* currnode; 01076 01077 /// One slot past the current key/data slot referenced. 01078 unsigned short currslot; 01079 01080 /// Friendly to the const_iterator, so it may access the two data items 01081 /// directly. 01082 friend class reverse_iterator; 01083 01084 /// Evil! A temporary value_type to STL-correctly deliver operator* and 01085 /// operator-> 01086 mutable value_type temp_value; 01087 01088 // The macro BTREE_FRIENDS can be used by outside class to access the B+ 01089 // tree internals. This was added for wxBTreeDemo to be able to draw the 01090 // tree. 01091 BTREE_FRIENDS 01092 01093 public: 01094 // *** Methods 01095 01096 /// Default-Constructor of a const reverse iterator 01097 inline const_reverse_iterator() 01098 : currnode(NULL), currslot(0) 01099 { } 01100 01101 /// Initializing-Constructor of a const reverse iterator 01102 inline const_reverse_iterator(const typename btree::leaf_node *l, unsigned short s) 01103 : currnode(l), currslot(s) 01104 { } 01105 01106 /// Copy-constructor from a mutable iterator 01107 inline const_reverse_iterator(const iterator &it) 01108 : currnode(it.currnode), currslot(it.currslot) 01109 { } 01110 01111 /// Copy-constructor from a const iterator 01112 inline const_reverse_iterator(const const_iterator &it) 01113 : currnode(it.currnode), currslot(it.currslot) 01114 { } 01115 01116 /// Copy-constructor from a mutable reverse iterator 01117 inline const_reverse_iterator(const reverse_iterator &it) 01118 : currnode(it.currnode), currslot(it.currslot) 01119 { } 01120 01121 /// Dereference the iterator. Do not use this if possible, use key() 01122 /// and data() instead. The B+ tree does not stored key and data 01123 /// together. 01124 inline reference operator*() const 01125 { 01126 BTREE_ASSERT(currslot > 0); 01127 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 01128 return temp_value; 01129 } 01130 01131 /// Dereference the iterator. Do not use this if possible, use key() 01132 /// and data() instead. The B+ tree does not stored key and data 01133 /// together. 01134 inline pointer operator->() const 01135 { 01136 BTREE_ASSERT(currslot > 0); 01137 temp_value = pair_to_value_type()( pair_type(key(),data()) ); 01138 return &temp_value; 01139 } 01140 01141 /// Key of the current slot 01142 inline const key_type& key() const 01143 { 01144 BTREE_ASSERT(currslot > 0); 01145 return currnode->slotkey[currslot - 1]; 01146 } 01147 01148 /// Read-only reference to the current data object 01149 inline const data_type& data() const 01150 { 01151 BTREE_ASSERT(currslot > 0); 01152 return currnode->slotdata[used_as_set ? 0 : currslot-1]; 01153 } 01154 01155 /// Prefix++ advance the iterator to the previous slot 01156 inline self& operator++() 01157 { 01158 if (currslot > 1) { 01159 --currslot; 01160 } 01161 else if (currnode->prevleaf != NULL) { 01162 currnode = currnode->prevleaf; 01163 currslot = currnode->slotuse; 01164 } 01165 else { 01166 // this is begin() == rend() 01167 currslot = 0; 01168 } 01169 01170 return *this; 01171 } 01172 01173 /// Postfix++ advance the iterator to the previous slot 01174 inline self operator++(int) 01175 { 01176 self tmp = *this; // copy ourselves 01177 01178 if (currslot > 1) { 01179 --currslot; 01180 } 01181 else if (currnode->prevleaf != NULL) { 01182 currnode = currnode->prevleaf; 01183 currslot = currnode->slotuse; 01184 } 01185 else { 01186 // this is begin() == rend() 01187 currslot = 0; 01188 } 01189 01190 return tmp; 01191 } 01192 01193 /// Prefix-- backstep the iterator to the next slot 01194 inline self& operator--() 01195 { 01196 if (currslot < currnode->slotuse) { 01197 ++currslot; 01198 } 01199 else if (currnode->nextleaf != NULL) { 01200 currnode = currnode->nextleaf; 01201 currslot = 1; 01202 } 01203 else { 01204 // this is end() == rbegin() 01205 currslot = currnode->slotuse; 01206 } 01207 01208 return *this; 01209 } 01210 01211 /// Postfix-- backstep the iterator to the next slot 01212 inline self operator--(int) 01213 { 01214 self tmp = *this; // copy ourselves 01215 01216 if (currslot < currnode->slotuse) { 01217 ++currslot; 01218 } 01219 else if (currnode->nextleaf != NULL) { 01220 currnode = currnode->nextleaf; 01221 currslot = 1; 01222 } 01223 else { 01224 // this is end() == rbegin() 01225 currslot = currnode->slotuse; 01226 } 01227 01228 return tmp; 01229 } 01230 01231 /// Equality of iterators 01232 inline bool operator==(const self& x) const 01233 { 01234 return (x.currnode == currnode) && (x.currslot == currslot); 01235 } 01236 01237 /// Inequality of iterators 01238 inline bool operator!=(const self& x) const 01239 { 01240 return (x.currnode != currnode) || (x.currslot != currslot); 01241 } 01242 }; 01243 01244 public: 01245 // *** Small Statistics Structure 01246 01247 /** A small struct containing basic statistics about the B+ tree. It can be 01248 * fetched using get_stats(). */ 01249 struct tree_stats 01250 { 01251 /// Number of items in the B+ tree 01252 size_type itemcount; 01253 01254 /// Number of leaves in the B+ tree 01255 size_type leaves; 01256 01257 /// Number of inner nodes in the B+ tree 01258 size_type innernodes; 01259 01260 /// Base B+ tree parameter: The number of key/data slots in each leaf 01261 static const unsigned short leafslots = btree_self::leafslotmax; 01262 01263 /// Base B+ tree parameter: The number of key slots in each inner node. 01264 static const unsigned short innerslots = btree_self::innerslotmax; 01265 01266 /// Zero initialized 01267 inline tree_stats() 01268 : itemcount(0), 01269 leaves(0), innernodes(0) 01270 { 01271 } 01272 01273 /// Return the total number of nodes 01274 inline size_type nodes() const 01275 { 01276 return innernodes + leaves; 01277 } 01278 01279 /// Return the average fill of leaves 01280 inline double avgfill_leaves() const 01281 { 01282 return static_cast<double>(itemcount) / (leaves * leafslots); 01283 } 01284 }; 01285 01286 private: 01287 // *** Tree Object Data Members 01288 01289 /// Pointer to the B+ tree's root node, either leaf or inner node 01290 node* m_root; 01291 01292 /// Pointer to first leaf in the double linked leaf chain 01293 leaf_node *m_headleaf; 01294 01295 /// Pointer to last leaf in the double linked leaf chain 01296 leaf_node *m_tailleaf; 01297 01298 /// Other small statistics about the B+ tree 01299 tree_stats m_stats; 01300 01301 /// Key comparison object. More comparison functions are generated from 01302 /// this < relation. 01303 key_compare m_key_less; 01304 01305 /// Memory allocator. 01306 allocator_type m_allocator; 01307 01308 public: 01309 // *** Constructors and Destructor 01310 01311 /// Default constructor initializing an empty B+ tree with the standard key 01312 /// comparison function 01313 explicit inline btree(const allocator_type &alloc = allocator_type()) 01314 : m_root(NULL), m_headleaf(NULL), m_tailleaf(NULL), m_allocator(alloc) 01315 { 01316 } 01317 01318 /// Constructor initializing an empty B+ tree with a special key 01319 /// comparison object 01320 explicit inline btree(const key_compare &kcf, 01321 const allocator_type &alloc = allocator_type()) 01322 : m_root(NULL), m_headleaf(NULL), m_tailleaf(NULL), 01323 m_key_less(kcf), m_allocator(alloc) 01324 { 01325 } 01326 01327 /// Constructor initializing a B+ tree with the range [first,last). The 01328 /// range need not be sorted. To create a B+ tree from a sorted range, use 01329 /// bulk_load(). 01330 template <class InputIterator> 01331 inline btree(InputIterator first, InputIterator last, 01332 const allocator_type &alloc = allocator_type()) 01333 : m_root(NULL), m_headleaf(NULL), m_tailleaf(NULL), m_allocator(alloc) 01334 { 01335 insert(first, last); 01336 } 01337 01338 /// Constructor initializing a B+ tree with the range [first,last) and a 01339 /// special key comparison object. The range need not be sorted. To create 01340 /// a B+ tree from a sorted range, use bulk_load(). 01341 template <class InputIterator> 01342 inline btree(InputIterator first, InputIterator last, const key_compare &kcf, 01343 const allocator_type &alloc = allocator_type()) 01344 : m_root(NULL), m_headleaf(NULL), m_tailleaf(NULL), 01345 m_key_less(kcf), m_allocator(alloc) 01346 { 01347 insert(first, last); 01348 } 01349 01350 /// Frees up all used B+ tree memory pages 01351 inline ~btree() 01352 { 01353 clear(); 01354 } 01355 01356 /// Fast swapping of two identical B+ tree objects. 01357 void swap(btree_self& from) 01358 { 01359 std::swap(m_root, from.m_root); 01360 std::swap(m_headleaf, from.m_headleaf); 01361 std::swap(m_tailleaf, from.m_tailleaf); 01362 std::swap(m_stats, from.m_stats); 01363 std::swap(m_key_less, from.m_key_less); 01364 std::swap(m_allocator, from.m_allocator); 01365 } 01366 01367 public: 01368 // *** Key and Value Comparison Function Objects 01369 01370 /// Function class to compare value_type objects. Required by the STL 01371 class value_compare 01372 { 01373 protected: 01374 /// Key comparison function from the template parameter 01375 key_compare key_comp; 01376 01377 /// Constructor called from btree::value_comp() 01378 inline value_compare(key_compare kc) 01379 : key_comp(kc) 01380 { } 01381 01382 /// Friendly to the btree class so it may call the constructor 01383 friend class btree<key_type, data_type, value_type, key_compare, 01384 traits, allow_duplicates, allocator_type, used_as_set>; 01385 01386 public: 01387 /// Function call "less"-operator resulting in true if x < y. 01388 inline bool operator()(const value_type& x, const value_type& y) const 01389 { 01390 return key_comp(x.first, y.first); 01391 } 01392 }; 01393 01394 /// Constant access to the key comparison object sorting the B+ tree 01395 inline key_compare key_comp() const 01396 { 01397 return m_key_less; 01398 } 01399 01400 /// Constant access to a constructed value_type comparison object. Required 01401 /// by the STL 01402 inline value_compare value_comp() const 01403 { 01404 return value_compare(m_key_less); 01405 } 01406 01407 private: 01408 // *** Convenient Key Comparison Functions Generated From key_less 01409 01410 /// True if a < b ? "constructed" from m_key_less() 01411 inline bool key_less(const key_type &a, const key_type b) const 01412 { 01413 return m_key_less(a, b); 01414 } 01415 01416 /// True if a <= b ? constructed from key_less() 01417 inline bool key_lessequal(const key_type &a, const key_type b) const 01418 { 01419 return !m_key_less(b, a); 01420 } 01421 01422 /// True if a > b ? constructed from key_less() 01423 inline bool key_greater(const key_type &a, const key_type &b) const 01424 { 01425 return m_key_less(b, a); 01426 } 01427 01428 /// True if a >= b ? constructed from key_less() 01429 inline bool key_greaterequal(const key_type &a, const key_type b) const 01430 { 01431 return !m_key_less(a, b); 01432 } 01433 01434 /// True if a == b ? constructed from key_less(). This requires the < 01435 /// relation to be a total order, otherwise the B+ tree cannot be sorted. 01436 inline bool key_equal(const key_type &a, const key_type &b) const 01437 { 01438 return !m_key_less(a, b) && !m_key_less(b, a); 01439 } 01440 01441 public: 01442 // *** Allocators 01443 01444 /// Return the base node allocator provided during construction. 01445 allocator_type get_allocator() const 01446 { 01447 return m_allocator; 01448 } 01449 01450 private: 01451 // *** Node Object Allocation and Deallocation Functions 01452 01453 /// Return an allocator for leaf_node objects 01454 typename leaf_node::alloc_type leaf_node_allocator() 01455 { 01456 return typename leaf_node::alloc_type(m_allocator); 01457 } 01458 01459 /// Return an allocator for inner_node objects 01460 typename inner_node::alloc_type inner_node_allocator() 01461 { 01462 return typename inner_node::alloc_type(m_allocator); 01463 } 01464 01465 /// Allocate and initialize a leaf node 01466 inline leaf_node* allocate_leaf() 01467 { 01468 leaf_node *n = new (leaf_node_allocator().allocate(1)) leaf_node(); 01469 n->initialize(); 01470 m_stats.leaves++; 01471 return n; 01472 } 01473 01474 /// Allocate and initialize an inner node 01475 inline inner_node* allocate_inner(unsigned short level) 01476 { 01477 inner_node *n = new (inner_node_allocator().allocate(1)) inner_node(); 01478 n->initialize(level); 01479 m_stats.innernodes++; 01480 return n; 01481 } 01482 01483 /// Correctly free either inner or leaf node, destructs all contained key 01484 /// and value objects 01485 inline void free_node(node *n) 01486 { 01487 if (n->isleafnode()) { 01488 leaf_node *ln = static_cast<leaf_node*>(n); 01489 typename leaf_node::alloc_type a(leaf_node_allocator()); 01490 a.destroy(ln); 01491 a.deallocate(ln, 1); 01492 m_stats.leaves--; 01493 } 01494 else { 01495 inner_node *in = static_cast<inner_node*>(n); 01496 typename inner_node::alloc_type a(inner_node_allocator()); 01497 a.destroy(in); 01498 a.deallocate(in, 1); 01499 m_stats.innernodes--; 01500 } 01501 } 01502 01503 /// Convenient template function for conditional copying of slotdata. This 01504 /// should be used instead of std::copy for all slotdata manipulations. 01505 template<class InputIterator, class OutputIterator> 01506 static OutputIterator data_copy (InputIterator first, InputIterator last, 01507 OutputIterator result) 01508 { 01509 if (used_as_set) return result; // no operation 01510 else return std::copy(first, last, result); 01511 } 01512 01513 /// Convenient template function for conditional copying of slotdata. This 01514 /// should be used instead of std::copy for all slotdata manipulations. 01515 template<class InputIterator, class OutputIterator> 01516 static OutputIterator data_copy_backward (InputIterator first, InputIterator last, 01517 OutputIterator result) 01518 { 01519 if (used_as_set) return result; // no operation 01520 else return std::copy_backward(first, last, result); 01521 } 01522 01523 public: 01524 // *** Fast Destruction of the B+ Tree 01525 01526 /// Frees all key/data pairs and all nodes of the tree 01527 void clear() 01528 { 01529 if (m_root) 01530 { 01531 clear_recursive(m_root); 01532 free_node(m_root); 01533 01534 m_root = NULL; 01535 m_headleaf = m_tailleaf = NULL; 01536 01537 m_stats = tree_stats(); 01538 } 01539 01540 BTREE_ASSERT(m_stats.itemcount == 0); 01541 } 01542 01543 private: 01544 /// Recursively free up nodes 01545 void clear_recursive(node *n) 01546 { 01547 if (n->isleafnode()) 01548 { 01549 leaf_node *leafnode = static_cast<leaf_node*>(n); 01550 01551 for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot) 01552 { 01553 // data objects are deleted by leaf_node's destructor 01554 } 01555 } 01556 else 01557 { 01558 inner_node *innernode = static_cast<inner_node*>(n); 01559 01560 for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot) 01561 { 01562 clear_recursive(innernode->childid[slot]); 01563 free_node(innernode->childid[slot]); 01564 } 01565 } 01566 } 01567 01568 public: 01569 // *** STL Iterator Construction Functions 01570 01571 /// Constructs a read/data-write iterator that points to the first slot in 01572 /// the first leaf of the B+ tree. 01573 inline iterator begin() 01574 { 01575 return iterator(m_headleaf, 0); 01576 } 01577 01578 /// Constructs a read/data-write iterator that points to the first invalid 01579 /// slot in the last leaf of the B+ tree. 01580 inline iterator end() 01581 { 01582 return iterator(m_tailleaf, m_tailleaf ? m_tailleaf->slotuse : 0); 01583 } 01584 01585 /// Constructs a read-only constant iterator that points to the first slot 01586 /// in the first leaf of the B+ tree. 01587 inline const_iterator begin() const 01588 { 01589 return const_iterator(m_headleaf, 0); 01590 } 01591 01592 /// Constructs a read-only constant iterator that points to the first 01593 /// invalid slot in the last leaf of the B+ tree. 01594 inline const_iterator end() const 01595 { 01596 return const_iterator(m_tailleaf, m_tailleaf ? m_tailleaf->slotuse : 0); 01597 } 01598 01599 /// Constructs a read/data-write reverse iterator that points to the first 01600 /// invalid slot in the last leaf of the B+ tree. Uses STL magic. 01601 inline reverse_iterator rbegin() 01602 { 01603 return reverse_iterator(end()); 01604 } 01605 01606 /// Constructs a read/data-write reverse iterator that points to the first 01607 /// slot in the first leaf of the B+ tree. Uses STL magic. 01608 inline reverse_iterator rend() 01609 { 01610 return reverse_iterator(begin()); 01611 } 01612 01613 /// Constructs a read-only reverse iterator that points to the first 01614 /// invalid slot in the last leaf of the B+ tree. Uses STL magic. 01615 inline const_reverse_iterator rbegin() const 01616 { 01617 return const_reverse_iterator(end()); 01618 } 01619 01620 /// Constructs a read-only reverse iterator that points to the first slot 01621 /// in the first leaf of the B+ tree. Uses STL magic. 01622 inline const_reverse_iterator rend() const 01623 { 01624 return const_reverse_iterator(begin()); 01625 } 01626 01627 private: 01628 // *** B+ Tree Node Binary Search Functions 01629 01630 /// Searches for the first key in the node n greater or equal to key. Uses 01631 /// binary search with an optional linear self-verification. This is a 01632 /// template function, because the slotkey array is located at different 01633 /// places in leaf_node and inner_node. 01634 template <typename node_type> 01635 inline int find_lower(const node_type *n, const key_type& key) const 01636 { 01637 if ( 0 && sizeof(n->slotkey) > traits::binsearch_threshold ) 01638 { 01639 if (n->slotuse == 0) return 0; 01640 01641 int lo = 0, hi = n->slotuse; 01642 01643 while (lo < hi) 01644 { 01645 int mid = (lo + hi) >> 1; 01646 01647 if (key_lessequal(key, n->slotkey[mid])) { 01648 hi = mid; // key <= mid 01649 } 01650 else { 01651 lo = mid + 1; // key > mid 01652 } 01653 } 01654 01655 BTREE_PRINT("btree::find_lower: on " << n << " key " << key << " -> " << lo << " / " << hi); 01656 01657 // verify result using simple linear search 01658 if (selfverify) 01659 { 01660 int i = 0; 01661 while (i < n->slotuse && key_less(n->slotkey[i],key)) ++i; 01662 01663 BTREE_PRINT("btree::find_lower: testfind: " << i); 01664 BTREE_ASSERT(i == lo); 01665 } 01666 01667 return lo; 01668 } 01669 else // for nodes <= binsearch_threshold do linear search. 01670 { 01671 int lo = 0; 01672 while (lo < n->slotuse && key_less(n->slotkey[lo],key)) ++lo; 01673 return lo; 01674 } 01675 } 01676 01677 /// Searches for the first key in the node n greater than key. Uses binary 01678 /// search with an optional linear self-verification. This is a template 01679 /// function, because the slotkey array is located at different places in 01680 /// leaf_node and inner_node. 01681 template <typename node_type> 01682 inline int find_upper(const node_type *n, const key_type& key) const 01683 { 01684 if ( 0 && sizeof(n->slotkey) > traits::binsearch_threshold ) 01685 { 01686 if (n->slotuse == 0) return 0; 01687 01688 int lo = 0, hi = n->slotuse; 01689 01690 while (lo < hi) 01691 { 01692 int mid = (lo + hi) >> 1; 01693 01694 if (key_less(key, n->slotkey[mid])) { 01695 hi = mid; // key < mid 01696 } 01697 else { 01698 lo = mid + 1; // key >= mid 01699 } 01700 } 01701 01702 BTREE_PRINT("btree::find_upper: on " << n << " key " << key << " -> " << lo << " / " << hi); 01703 01704 // verify result using simple linear search 01705 if (selfverify) 01706 { 01707 int i = 0; 01708 while (i < n->slotuse && key_lessequal(n->slotkey[i],key)) ++i; 01709 01710 BTREE_PRINT("btree::find_upper testfind: " << i); 01711 BTREE_ASSERT(i == hi); 01712 } 01713 01714 return lo; 01715 } 01716 else // for nodes <= binsearch_threshold do linear search. 01717 { 01718 int lo = 0; 01719 while (lo < n->slotuse && key_lessequal(n->slotkey[lo],key)) ++lo; 01720 return lo; 01721 } 01722 } 01723 01724 public: 01725 // *** Access Functions to the Item Count 01726 01727 /// Return the number of key/data pairs in the B+ tree 01728 inline size_type size() const 01729 { 01730 return m_stats.itemcount; 01731 } 01732 01733 /// Returns true if there is at least one key/data pair in the B+ tree 01734 inline bool empty() const 01735 { 01736 return (size() == size_type(0)); 01737 } 01738 01739 /// Returns the largest possible size of the B+ Tree. This is just a 01740 /// function required by the STL standard, the B+ Tree can hold more items. 01741 inline size_type max_size() const 01742 { 01743 return size_type(-1); 01744 } 01745 01746 /// Return a const reference to the current statistics. 01747 inline const struct tree_stats& get_stats() const 01748 { 01749 return m_stats; 01750 } 01751 01752 public: 01753 // *** Standard Access Functions Querying the Tree by Descending to a Leaf 01754 01755 /// Non-STL function checking whether a key is in the B+ tree. The same as 01756 /// (find(k) != end()) or (count() != 0). 01757 bool exists(const key_type &key) const 01758 { 01759 const node *n = m_root; 01760 if (!n) return false; 01761 01762 while(!n->isleafnode()) 01763 { 01764 const inner_node *inner = static_cast<const inner_node*>(n); 01765 int slot = find_lower(inner, key); 01766 01767 n = inner->childid[slot]; 01768 } 01769 01770 const leaf_node *leaf = static_cast<const leaf_node*>(n); 01771 01772 int slot = find_lower(leaf, key); 01773 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])); 01774 } 01775 01776 /// Tries to locate a key in the B+ tree and returns an iterator to the 01777 /// key/data slot if found. If unsuccessful it returns end(). 01778 iterator find(const key_type &key) 01779 { 01780 node *n = m_root; 01781 if (!n) return end(); 01782 01783 while(!n->isleafnode()) 01784 { 01785 const inner_node *inner = static_cast<const inner_node*>(n); 01786 int slot = find_lower(inner, key); 01787 01788 n = inner->childid[slot]; 01789 } 01790 01791 leaf_node *leaf = static_cast<leaf_node*>(n); 01792 01793 int slot = find_lower(leaf, key); 01794 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) 01795 ? iterator(leaf, slot) : end(); 01796 } 01797 01798 /// Tries to locate a key in the B+ tree and returns an constant iterator 01799 /// to the key/data slot if found. If unsuccessful it returns end(). 01800 const_iterator find(const key_type &key) const 01801 { 01802 const node *n = m_root; 01803 if (!n) return end(); 01804 01805 while(!n->isleafnode()) 01806 { 01807 const inner_node *inner = static_cast<const inner_node*>(n); 01808 int slot = find_lower(inner, key); 01809 01810 n = inner->childid[slot]; 01811 } 01812 01813 const leaf_node *leaf = static_cast<const leaf_node*>(n); 01814 01815 int slot = find_lower(leaf, key); 01816 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) 01817 ? const_iterator(leaf, slot) : end(); 01818 } 01819 01820 /// Tries to locate a key in the B+ tree and returns the number of 01821 /// identical key entries found. 01822 size_type count(const key_type &key) const 01823 { 01824 const node *n = m_root; 01825 if (!n) return 0; 01826 01827 while(!n->isleafnode()) 01828 { 01829 const inner_node *inner = static_cast<const inner_node*>(n); 01830 int slot = find_lower(inner, key); 01831 01832 n = inner->childid[slot]; 01833 } 01834 01835 const leaf_node *leaf = static_cast<const leaf_node*>(n); 01836 01837 int slot = find_lower(leaf, key); 01838 size_type num = 0; 01839 01840 while (leaf && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) 01841 { 01842 ++num; 01843 if (++slot >= leaf->slotuse) 01844 { 01845 leaf = leaf->nextleaf; 01846 slot = 0; 01847 } 01848 } 01849 01850 return num; 01851 } 01852 01853 /// Searches the B+ tree and returns an iterator to the first pair 01854 /// equal to or greater than key, or end() if all keys are smaller. 01855 iterator lower_bound(const key_type& key) 01856 { 01857 node *n = m_root; 01858 if (!n) return end(); 01859 01860 while(!n->isleafnode()) 01861 { 01862 const inner_node *inner = static_cast<const inner_node*>(n); 01863 int slot = find_lower(inner, key); 01864 01865 n = inner->childid[slot]; 01866 } 01867 01868 leaf_node *leaf = static_cast<leaf_node*>(n); 01869 01870 int slot = find_lower(leaf, key); 01871 return iterator(leaf, slot); 01872 } 01873 01874 /// Searches the B+ tree and returns a constant iterator to the 01875 /// first pair equal to or greater than key, or end() if all keys 01876 /// are smaller. 01877 const_iterator lower_bound(const key_type& key) const 01878 { 01879 const node *n = m_root; 01880 if (!n) return end(); 01881 01882 while(!n->isleafnode()) 01883 { 01884 const inner_node *inner = static_cast<const inner_node*>(n); 01885 int slot = find_lower(inner, key); 01886 01887 n = inner->childid[slot]; 01888 } 01889 01890 const leaf_node *leaf = static_cast<const leaf_node*>(n); 01891 01892 int slot = find_lower(leaf, key); 01893 return const_iterator(leaf, slot); 01894 } 01895 01896 /// Searches the B+ tree and returns an iterator to the first pair 01897 /// greater than key, or end() if all keys are smaller or equal. 01898 iterator upper_bound(const key_type& key) 01899 { 01900 node *n = m_root; 01901 if (!n) return end(); 01902 01903 while(!n->isleafnode()) 01904 { 01905 const inner_node *inner = static_cast<const inner_node*>(n); 01906 int slot = find_upper(inner, key); 01907 01908 n = inner->childid[slot]; 01909 } 01910 01911 leaf_node *leaf = static_cast<leaf_node*>(n); 01912 01913 int slot = find_upper(leaf, key); 01914 return iterator(leaf, slot); 01915 } 01916 01917 /// Searches the B+ tree and returns a constant iterator to the 01918 /// first pair greater than key, or end() if all keys are smaller 01919 /// or equal. 01920 const_iterator upper_bound(const key_type& key) const 01921 { 01922 const node *n = m_root; 01923 if (!n) return end(); 01924 01925 while(!n->isleafnode()) 01926 { 01927 const inner_node *inner = static_cast<const inner_node*>(n); 01928 int slot = find_upper(inner, key); 01929 01930 n = inner->childid[slot]; 01931 } 01932 01933 const leaf_node *leaf = static_cast<const leaf_node*>(n); 01934 01935 int slot = find_upper(leaf, key); 01936 return const_iterator(leaf, slot); 01937 } 01938 01939 /// Searches the B+ tree and returns both lower_bound() and upper_bound(). 01940 inline std::pair<iterator, iterator> equal_range(const key_type& key) 01941 { 01942 return std::pair<iterator, iterator>(lower_bound(key), upper_bound(key)); 01943 } 01944 01945 /// Searches the B+ tree and returns both lower_bound() and upper_bound(). 01946 inline std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const 01947 { 01948 return std::pair<const_iterator, const_iterator>(lower_bound(key), upper_bound(key)); 01949 } 01950 01951 public: 01952 // *** B+ Tree Object Comparison Functions 01953 01954 /// Equality relation of B+ trees of the same type. B+ trees of the same 01955 /// size and equal elements (both key and data) are considered 01956 /// equal. Beware of the random ordering of duplicate keys. 01957 inline bool operator==(const btree_self &other) const 01958 { 01959 return (size() == other.size()) && std::equal(begin(), end(), other.begin()); 01960 } 01961 01962 /// Inequality relation. Based on operator==. 01963 inline bool operator!=(const btree_self &other) const 01964 { 01965 return !(*this == other); 01966 } 01967 01968 /// Total ordering relation of B+ trees of the same type. It uses 01969 /// std::lexicographical_compare() for the actual comparison of elements. 01970 inline bool operator<(const btree_self &other) const 01971 { 01972 return std::lexicographical_compare(begin(), end(), other.begin(), other.end()); 01973 } 01974 01975 /// Greater relation. Based on operator<. 01976 inline bool operator>(const btree_self &other) const 01977 { 01978 return other < *this; 01979 } 01980 01981 /// Less-equal relation. Based on operator<. 01982 inline bool operator<=(const btree_self &other) const 01983 { 01984 return !(other < *this); 01985 } 01986 01987 /// Greater-equal relation. Based on operator<. 01988 inline bool operator>=(const btree_self &other) const 01989 { 01990 return !(*this < other); 01991 } 01992 01993 public: 01994 /// *** Fast Copy: Assign Operator and Copy Constructors 01995 01996 /// Assignment operator. All the key/data pairs are copied 01997 inline btree_self& operator= (const btree_self &other) 01998 { 01999 if (this != &other) 02000 { 02001 clear(); 02002 02003 m_key_less = other.key_comp(); 02004 m_allocator = other.get_allocator(); 02005 02006 if (other.size() != 0) 02007 { 02008 m_stats.leaves = m_stats.innernodes = 0; 02009 if (other.m_root) { 02010 m_root = copy_recursive(other.m_root); 02011 } 02012 m_stats = other.m_stats; 02013 } 02014 02015 if (selfverify) verify(); 02016 } 02017 return *this; 02018 } 02019 02020 /// Copy constructor. The newly initialized B+ tree object will contain a 02021 /// copy of all key/data pairs. 02022 inline btree(const btree_self &other) 02023 : m_root(NULL), m_headleaf(NULL), m_tailleaf(NULL), 02024 m_stats( other.m_stats ), 02025 m_key_less( other.key_comp() ), 02026 m_allocator( other.get_allocator() ) 02027 { 02028 if (size() > 0) 02029 { 02030 m_stats.leaves = m_stats.innernodes = 0; 02031 if (other.m_root) { 02032 m_root = copy_recursive(other.m_root); 02033 } 02034 if (selfverify) verify(); 02035 } 02036 } 02037 02038 private: 02039 /// Recursively copy nodes from another B+ tree object 02040 struct node* copy_recursive(const node *n) 02041 { 02042 if (n->isleafnode()) 02043 { 02044 const leaf_node *leaf = static_cast<const leaf_node*>(n); 02045 leaf_node *newleaf = allocate_leaf(); 02046 02047 newleaf->slotuse = leaf->slotuse; 02048 std::copy(leaf->slotkey, leaf->slotkey + leaf->slotuse, newleaf->slotkey); 02049 data_copy(leaf->slotdata, leaf->slotdata + leaf->slotuse, newleaf->slotdata); 02050 02051 if (m_headleaf == NULL) 02052 { 02053 m_headleaf = m_tailleaf = newleaf; 02054 newleaf->prevleaf = newleaf->nextleaf = NULL; 02055 } 02056 else 02057 { 02058 newleaf->prevleaf = m_tailleaf; 02059 m_tailleaf->nextleaf = newleaf; 02060 m_tailleaf = newleaf; 02061 } 02062 02063 return newleaf; 02064 } 02065 else 02066 { 02067 const inner_node *inner = static_cast<const inner_node*>(n); 02068 inner_node *newinner = allocate_inner(inner->level); 02069 02070 newinner->slotuse = inner->slotuse; 02071 std::copy(inner->slotkey, inner->slotkey + inner->slotuse, newinner->slotkey); 02072 02073 for (unsigned short slot = 0; slot <= inner->slotuse; ++slot) 02074 { 02075 newinner->childid[slot] = copy_recursive(inner->childid[slot]); 02076 } 02077 02078 return newinner; 02079 } 02080 } 02081 02082 public: 02083 // *** Public Insertion Functions 02084 02085 /// Attempt to insert a key/data pair into the B+ tree. If the tree does not 02086 /// allow duplicate keys, then the insert may fail if it is already 02087 /// present. 02088 inline std::pair<iterator, bool> insert(const pair_type& x) 02089 { 02090 return insert_start(x.first, x.second); 02091 } 02092 02093 /// Attempt to insert a key/data pair into the B+ tree. Beware that if 02094 /// key_type == data_type, then the template iterator insert() is called 02095 /// instead. If the tree does not allow duplicate keys, then the insert may 02096 /// fail if it is already present. 02097 inline std::pair<iterator, bool> insert(const key_type& key, const data_type& data) 02098 { 02099 return insert_start(key, data); 02100 } 02101 02102 /// Attempt to insert a key/data pair into the B+ tree. This function is the 02103 /// same as the other insert, however if key_type == data_type then the 02104 /// non-template function cannot be called. If the tree does not allow 02105 /// duplicate keys, then the insert may fail if it is already present. 02106 inline std::pair<iterator, bool> insert2(const key_type& key, const data_type& data) 02107 { 02108 return insert_start(key, data); 02109 } 02110 02111 /// Attempt to insert a key/data pair into the B+ tree. The iterator hint 02112 /// is currently ignored by the B+ tree insertion routine. 02113 inline iterator insert(iterator /* hint */, const pair_type &x) 02114 { 02115 return insert_start(x.first, x.second).first; 02116 } 02117 02118 /// Attempt to insert a key/data pair into the B+ tree. The iterator hint is 02119 /// currently ignored by the B+ tree insertion routine. 02120 inline iterator insert2(iterator /* hint */, const key_type& key, const data_type& data) 02121 { 02122 return insert_start(key, data).first; 02123 } 02124 02125 /// Attempt to insert the range [first,last) of value_type pairs into the 02126 /// B+ tree. Each key/data pair is inserted individually; to bulk load the 02127 /// tree, use a constructor with range. 02128 template <typename InputIterator> 02129 inline void insert(InputIterator first, InputIterator last) 02130 { 02131 InputIterator iter = first; 02132 while(iter != last) 02133 { 02134 insert(*iter); 02135 ++iter; 02136 } 02137 } 02138 02139 private: 02140 // *** Private Insertion Functions 02141 02142 /// Start the insertion descent at the current root and handle root 02143 /// splits. Returns true if the item was inserted 02144 std::pair<iterator, bool> insert_start(const key_type& key, const data_type& value) 02145 { 02146 node *newchild = NULL; 02147 key_type newkey = key_type(); 02148 02149 if (m_root == NULL) { 02150 m_root = m_headleaf = m_tailleaf = allocate_leaf(); 02151 } 02152 02153 std::pair<iterator, bool> r = insert_descend(m_root, key, value, &newkey, &newchild); 02154 02155 if (newchild) 02156 { 02157 inner_node *newroot = allocate_inner(m_root->level + 1); 02158 newroot->slotkey[0] = newkey; 02159 02160 newroot->childid[0] = m_root; 02161 newroot->childid[1] = newchild; 02162 02163 newroot->slotuse = 1; 02164 02165 m_root = newroot; 02166 } 02167 02168 // increment itemcount if the item was inserted 02169 if (r.second) ++m_stats.itemcount; 02170 02171 #ifdef BTREE_DEBUG 02172 if (debug) print(std::cout); 02173 #endif 02174 02175 if (selfverify) { 02176 verify(); 02177 BTREE_ASSERT(exists(key)); 02178 } 02179 02180 return r; 02181 } 02182 02183 /** 02184 * @brief Insert an item into the B+ tree. 02185 * 02186 * Descend down the nodes to a leaf, insert the key/data pair in a free 02187 * slot. If the node overflows, then it must be split and the new split 02188 * node inserted into the parent. Unroll / this splitting up to the root. 02189 */ 02190 std::pair<iterator, bool> insert_descend(node* n, 02191 const key_type& key, const data_type& value, 02192 key_type* splitkey, node** splitnode) 02193 { 02194 if (!n->isleafnode()) 02195 { 02196 inner_node *inner = static_cast<inner_node*>(n); 02197 02198 key_type newkey = key_type(); 02199 node *newchild = NULL; 02200 02201 int slot = find_lower(inner, key); 02202 02203 BTREE_PRINT("btree::insert_descend into " << inner->childid[slot]); 02204 02205 std::pair<iterator, bool> r = insert_descend(inner->childid[slot], 02206 key, value, &newkey, &newchild); 02207 02208 if (newchild) 02209 { 02210 BTREE_PRINT("btree::insert_descend newchild with key " << newkey << " node " << newchild << " at slot " << slot); 02211 02212 if (inner->isfull()) 02213 { 02214 split_inner_node(inner, splitkey, splitnode, slot); 02215 02216 BTREE_PRINT("btree::insert_descend done split_inner: putslot: " << slot << " putkey: " << newkey << " upkey: " << *splitkey); 02217 02218 #ifdef BTREE_DEBUG 02219 if (debug) 02220 { 02221 print_node(std::cout, inner); 02222 print_node(std::cout, *splitnode); 02223 } 02224 #endif 02225 02226 // check if insert slot is in the split sibling node 02227 BTREE_PRINT("btree::insert_descend switch: " << slot << " > " << inner->slotuse+1); 02228 02229 if (slot == inner->slotuse+1 && inner->slotuse < (*splitnode)->slotuse) 02230 { 02231 // special case when the insert slot matches the split 02232 // place between the two nodes, then the insert key 02233 // becomes the split key. 02234 02235 BTREE_ASSERT(inner->slotuse + 1 < innerslotmax); 02236 02237 inner_node *splitinner = static_cast<inner_node*>(*splitnode); 02238 02239 // move the split key and it's datum into the left node 02240 inner->slotkey[inner->slotuse] = *splitkey; 02241 inner->childid[inner->slotuse+1] = splitinner->childid[0]; 02242 inner->slotuse++; 02243 02244 // set new split key and move corresponding datum into right node 02245 splitinner->childid[0] = newchild; 02246 *splitkey = newkey; 02247 02248 return r; 02249 } 02250 else if (slot >= inner->slotuse+1) 02251 { 02252 // in case the insert slot is in the newly create split 02253 // node, we reuse the code below. 02254 02255 slot -= inner->slotuse+1; 02256 inner = static_cast<inner_node*>(*splitnode); 02257 BTREE_PRINT("btree::insert_descend switching to splitted node " << inner << " slot " << slot); 02258 } 02259 } 02260 02261 // move items and put pointer to child node into correct slot 02262 BTREE_ASSERT(slot >= 0 && slot <= inner->slotuse); 02263 02264 std::copy_backward(inner->slotkey + slot, inner->slotkey + inner->slotuse, 02265 inner->slotkey + inner->slotuse+1); 02266 std::copy_backward(inner->childid + slot, inner->childid + inner->slotuse+1, 02267 inner->childid + inner->slotuse+2); 02268 02269 inner->slotkey[slot] = newkey; 02270 inner->childid[slot + 1] = newchild; 02271 inner->slotuse++; 02272 } 02273 02274 return r; 02275 } 02276 else // n->isleafnode() == true 02277 { 02278 leaf_node *leaf = static_cast<leaf_node*>(n); 02279 02280 int slot = find_lower(leaf, key); 02281 02282 if (!allow_duplicates && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) { 02283 return std::pair<iterator, bool>(iterator(leaf, slot), false); 02284 } 02285 02286 if (leaf->isfull()) 02287 { 02288 split_leaf_node(leaf, splitkey, splitnode); 02289 02290 // check if insert slot is in the split sibling node 02291 if (slot >= leaf->slotuse) 02292 { 02293 slot -= leaf->slotuse; 02294 leaf = static_cast<leaf_node*>(*splitnode); 02295 } 02296 } 02297 02298 // move items and put data item into correct data slot 02299 BTREE_ASSERT(slot >= 0 && slot <= leaf->slotuse); 02300 02301 std::copy_backward(leaf->slotkey + slot, leaf->slotkey + leaf->slotuse, 02302 leaf->slotkey + leaf->slotuse+1); 02303 data_copy_backward(leaf->slotdata + slot, leaf->slotdata + leaf->slotuse, 02304 leaf->slotdata + leaf->slotuse+1); 02305 02306 leaf->slotkey[slot] = key; 02307 if (!used_as_set) leaf->slotdata[slot] = value; 02308 leaf->slotuse++; 02309 02310 if (splitnode && leaf != *splitnode && slot == leaf->slotuse-1) 02311 { 02312 // special case: the node was split, and the insert is at the 02313 // last slot of the old node. then the splitkey must be 02314 // updated. 02315 *splitkey = key; 02316 } 02317 02318 return std::pair<iterator, bool>(iterator(leaf, slot), true); 02319 } 02320 } 02321 02322 /// Split up a leaf node into two equally-filled sibling leaves. Returns 02323 /// the new nodes and it's insertion key in the two parameters. 02324 void split_leaf_node(leaf_node* leaf, key_type* _newkey, node** _newleaf) 02325 { 02326 BTREE_ASSERT(leaf->isfull()); 02327 02328 unsigned int mid = (leaf->slotuse >> 1); 02329 02330 BTREE_PRINT("btree::split_leaf_node on " << leaf); 02331 02332 leaf_node *newleaf = allocate_leaf(); 02333 02334 newleaf->slotuse = leaf->slotuse - mid; 02335 02336 newleaf->nextleaf = leaf->nextleaf; 02337 if (newleaf->nextleaf == NULL) { 02338 BTREE_ASSERT(leaf == m_tailleaf); 02339 m_tailleaf = newleaf; 02340 } 02341 else { 02342 newleaf->nextleaf->prevleaf = newleaf; 02343 } 02344 02345 std::copy(leaf->slotkey + mid, leaf->slotkey + leaf->slotuse, 02346 newleaf->slotkey); 02347 data_copy(leaf->slotdata + mid, leaf->slotdata + leaf->slotuse, 02348 newleaf->slotdata); 02349 02350 leaf->slotuse = mid; 02351 leaf->nextleaf = newleaf; 02352 newleaf->prevleaf = leaf; 02353 02354 *_newkey = leaf->slotkey[leaf->slotuse-1]; 02355 *_newleaf = newleaf; 02356 } 02357 02358 /// Split up an inner node into two equally-filled sibling nodes. Returns 02359 /// the new nodes and it's insertion key in the two parameters. Requires 02360 /// the slot of the item will be inserted, so the nodes will be the same 02361 /// size after the insert. 02362 void split_inner_node(inner_node* inner, key_type* _newkey, node** _newinner, unsigned int addslot) 02363 { 02364 BTREE_ASSERT(inner->isfull()); 02365 02366 unsigned int mid = (inner->slotuse >> 1); 02367 02368 BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot); 02369 02370 // if the split is uneven and the overflowing item will be put into the 02371 // larger node, then the smaller split node may underflow 02372 if (addslot <= mid && mid > inner->slotuse - (mid + 1)) 02373 mid--; 02374 02375 BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot); 02376 02377 BTREE_PRINT("btree::split_inner_node on " << inner << " into two nodes " << mid << " and " << inner->slotuse - (mid + 1) << " sized"); 02378 02379 inner_node *newinner = allocate_inner(inner->level); 02380 02381 newinner->slotuse = inner->slotuse - (mid + 1); 02382 02383 std::copy(inner->slotkey + mid+1, inner->slotkey + inner->slotuse, 02384 newinner->slotkey); 02385 std::copy(inner->childid + mid+1, inner->childid + inner->slotuse+1, 02386 newinner->childid); 02387 02388 inner->slotuse = mid; 02389 02390 *_newkey = inner->slotkey[mid]; 02391 *_newinner = newinner; 02392 } 02393 02394 public: 02395 02396 // *** Bulk Loader - Construct Tree from Sorted Sequence 02397 02398 /// Bulk load a sorted range. Loads items into leaves and constructs a 02399 /// B-tree above them. The tree must be empty when calling this function. 02400 template <typename Iterator> 02401 void bulk_load(Iterator ibegin, Iterator iend) 02402 { 02403 BTREE_ASSERT(empty()); 02404 02405 m_stats.itemcount = iend - ibegin; 02406 02407 // calculate number of leaves needed, round up. 02408 size_t num_items = iend - ibegin; 02409 size_t num_leaves = (num_items + leafslotmax-1) / leafslotmax; 02410 02411 BTREE_PRINT("btree::bulk_load, level 0: " << m_stats.itemcount << " items into " << num_leaves << " leaves with up to " << ((iend - ibegin + num_leaves-1) / num_leaves) << " items per leaf."); 02412 02413 Iterator it = ibegin; 02414 for (size_t i = 0; i < num_leaves; ++i) 02415 { 02416 // allocate new leaf node 02417 leaf_node* leaf = allocate_leaf(); 02418 02419 // copy keys or (key,value) pairs into leaf nodes, uses template 02420 // switch leaf->set_slot(). 02421 leaf->slotuse = static_cast<int>(num_items / (num_leaves-i)); 02422 for (size_t s = 0; s < leaf->slotuse; ++s, ++it) 02423 leaf->set_slot(s, *it); 02424 02425 if (m_tailleaf != NULL) { 02426 m_tailleaf->nextleaf = leaf; 02427 leaf->prevleaf = m_tailleaf; 02428 } 02429 else { 02430 m_headleaf = leaf; 02431 } 02432 m_tailleaf = leaf; 02433 02434 num_items -= leaf->slotuse; 02435 } 02436 02437 BTREE_ASSERT( it == iend && num_items == 0 ); 02438 02439 // if the btree is so small to fit into one leaf, then we're done. 02440 if (m_headleaf == m_tailleaf) { 02441 m_root = m_headleaf; 02442 return; 02443 } 02444 02445 BTREE_ASSERT( m_stats.leaves == num_leaves ); 02446 02447 // create first level of inner nodes, pointing to the leaves. 02448 size_t num_parents = (num_leaves + (innerslotmax+1)-1) / (innerslotmax+1); 02449 02450 BTREE_PRINT("btree::bulk_load, level 1: " << num_leaves << " leaves in " << num_parents << " inner nodes with up to " << ((num_leaves + num_parents-1) / num_parents) << " leaves per inner node."); 02451 02452 // save inner nodes and maxkey for next level. 02453 typedef std::pair<inner_node*, const key_type*> nextlevel_type; 02454 nextlevel_type* nextlevel = new nextlevel_type[num_parents]; 02455 02456 leaf_node* leaf = m_headleaf; 02457 for (size_t i = 0; i < num_parents; ++i) 02458 { 02459 // allocate new inner node at level 1 02460 inner_node* n = allocate_inner(1); 02461 02462 n->slotuse = static_cast<int>(num_leaves / (num_parents-i)); 02463 BTREE_ASSERT(n->slotuse > 0); 02464 --n->slotuse; // this counts keys, but an inner node has keys+1 children. 02465 02466 // copy last key from each leaf and set child 02467 for (unsigned short s = 0; s < n->slotuse; ++s) 02468 { 02469 n->slotkey[s] = leaf->slotkey[leaf->slotuse-1]; 02470 n->childid[s] = leaf; 02471 leaf = leaf->nextleaf; 02472 } 02473 n->childid[n->slotuse] = leaf; 02474 02475 // track max key of any descendant. 02476 nextlevel[i].first = n; 02477 nextlevel[i].second = &leaf->slotkey[leaf->slotuse-1]; 02478 02479 leaf = leaf->nextleaf; 02480 num_leaves -= n->slotuse+1; 02481 } 02482 02483 BTREE_ASSERT( leaf == NULL && num_leaves == 0 ); 02484 02485 // recursively build inner nodes pointing to inner nodes. 02486 for (int level = 2; num_parents != 1; ++level) 02487 { 02488 size_t num_children = num_parents; 02489 num_parents = (num_children + (innerslotmax+1)-1) / (innerslotmax+1); 02490 02491 BTREE_PRINT("btree::bulk_load, level " << level << ": " << num_children << " children in " << num_parents << " inner nodes with up to " << ((num_children + num_parents-1) / num_parents) << " children per inner node."); 02492 02493 size_t inner_index = 0; 02494 for (size_t i = 0; i < num_parents; ++i) 02495 { 02496 // allocate new inner node at level 02497 inner_node* n = allocate_inner(level); 02498 02499 n->slotuse = static_cast<int>(num_children / (num_parents-i)); 02500 BTREE_ASSERT(n->slotuse > 0); 02501 --n->slotuse; // this counts keys, but an inner node has keys+1 children. 02502 02503 // copy children and maxkeys from nextlevel 02504 for (unsigned short s = 0; s < n->slotuse; ++s) 02505 { 02506 n->slotkey[s] = *nextlevel[inner_index].second; 02507 n->childid[s] = nextlevel[inner_index].first; 02508 ++inner_index; 02509 } 02510 n->childid[n->slotuse] = nextlevel[inner_index].first; 02511 02512 // reuse nextlevel array for parents, because we can overwrite 02513 // slots we've already consumed. 02514 nextlevel[i].first = n; 02515 nextlevel[i].second = nextlevel[inner_index].second; 02516 02517 ++inner_index; 02518 num_children -= n->slotuse+1; 02519 } 02520 02521 BTREE_ASSERT( num_children == 0 ); 02522 } 02523 02524 m_root = nextlevel[0].first; 02525 delete [] nextlevel; 02526 02527 if (selfverify) verify(); 02528 } 02529 02530 private: 02531 // *** Support Class Encapsulating Deletion Results 02532 02533 /// Result flags of recursive deletion. 02534 enum result_flags_t 02535 { 02536 /// Deletion successful and no fix-ups necessary. 02537 btree_ok = 0, 02538 02539 /// Deletion not successful because key was not found. 02540 btree_not_found = 1, 02541 02542 /// Deletion successful, the last key was updated so parent slotkeys 02543 /// need updates. 02544 btree_update_lastkey = 2, 02545 02546 /// Deletion successful, children nodes were merged and the parent 02547 /// needs to remove the empty node. 02548 btree_fixmerge = 4 02549 }; 02550 02551 /// B+ tree recursive deletion has much information which is needs to be 02552 /// passed upward. 02553 struct result_t 02554 { 02555 /// Merged result flags 02556 result_flags_t flags; 02557 02558 /// The key to be updated at the parent's slot 02559 key_type lastkey; 02560 02561 /// Constructor of a result with a specific flag, this can also be used 02562 /// as for implicit conversion. 02563 inline result_t(result_flags_t f = btree_ok) 02564 : flags(f), lastkey() 02565 { } 02566 02567 /// Constructor with a lastkey value. 02568 inline result_t(result_flags_t f, const key_type &k) 02569 : flags(f), lastkey(k) 02570 { } 02571 02572 /// Test if this result object has a given flag set. 02573 inline bool has(result_flags_t f) const 02574 { 02575 return (flags & f) != 0; 02576 } 02577 02578 /// Merge two results OR-ing the result flags and overwriting lastkeys. 02579 inline result_t& operator|= (const result_t &other) 02580 { 02581 flags = result_flags_t(flags | other.flags); 02582 02583 // we overwrite existing lastkeys on purpose 02584 if (other.has(btree_update_lastkey)) 02585 lastkey = other.lastkey; 02586 02587 return *this; 02588 } 02589 }; 02590 02591 public: 02592 // *** Public Erase Functions 02593 02594 /// Erases one (the first) of the key/data pairs associated with the given 02595 /// key. 02596 bool erase_one(const key_type &key) 02597 { 02598 BTREE_PRINT("btree::erase_one(" << key << ") on btree size " << size()); 02599 02600 if (selfverify) verify(); 02601 02602 if (!m_root) return false; 02603 02604 result_t result = erase_one_descend(key, m_root, NULL, NULL, NULL, NULL, NULL, 0); 02605 02606 if (!result.has(btree_not_found)) 02607 --m_stats.itemcount; 02608 02609 #ifdef BTREE_DEBUG 02610 if (debug) print(std::cout); 02611 #endif 02612 if (selfverify) verify(); 02613 02614 return !result.has(btree_not_found); 02615 } 02616 02617 /// Erases all the key/data pairs associated with the given key. This is 02618 /// implemented using erase_one(). 02619 size_type erase(const key_type &key) 02620 { 02621 size_type c = 0; 02622 02623 while( erase_one(key) ) 02624 { 02625 ++c; 02626 if (!allow_duplicates) break; 02627 } 02628 02629 return c; 02630 } 02631 02632 /// Erase the key/data pair referenced by the iterator. 02633 void erase(iterator iter) 02634 { 02635 BTREE_PRINT("btree::erase_iter(" << iter.currnode << "," << iter.currslot << ") on btree size " << size()); 02636 02637 if (selfverify) verify(); 02638 02639 if (!m_root) return; 02640 02641 result_t result = erase_iter_descend(iter, m_root, NULL, NULL, NULL, NULL, NULL, 0); 02642 02643 if (!result.has(btree_not_found)) 02644 --m_stats.itemcount; 02645 02646 #ifdef BTREE_DEBUG 02647 if (debug) print(std::cout); 02648 #endif 02649 if (selfverify) verify(); 02650 } 02651 02652 #ifdef BTREE_TODO 02653 /// Erase all key/data pairs in the range [first,last). This function is 02654 /// currently not implemented by the B+ Tree. 02655 void erase(iterator /* first */, iterator /* last */) 02656 { 02657 abort(); 02658 } 02659 #endif 02660 02661 private: 02662 // *** Private Erase Functions 02663 02664 /** @brief Erase one (the first) key/data pair in the B+ tree matching key. 02665 * 02666 * Descends down the tree in search of key. During the descent the parent, 02667 * left and right siblings and their parents are computed and passed 02668 * down. Once the key/data pair is found, it is removed from the leaf. If 02669 * the leaf underflows 6 different cases are handled. These cases resolve 02670 * the underflow by shifting key/data pairs from adjacent sibling nodes, 02671 * merging two sibling nodes or trimming the tree. 02672 */ 02673 result_t erase_one_descend(const key_type& key, 02674 node *curr, 02675 node *left, node *right, 02676 inner_node *leftparent, inner_node *rightparent, 02677 inner_node *parent, unsigned int parentslot) 02678 { 02679 if (curr->isleafnode()) 02680 { 02681 leaf_node *leaf = static_cast<leaf_node*>(curr); 02682 leaf_node *leftleaf = static_cast<leaf_node*>(left); 02683 leaf_node *rightleaf = static_cast<leaf_node*>(right); 02684 02685 int slot = find_lower(leaf, key); 02686 02687 if (slot >= leaf->slotuse || !key_equal(key, leaf->slotkey[slot])) 02688 { 02689 BTREE_PRINT("Could not find key " << key << " to erase."); 02690 02691 return btree_not_found; 02692 } 02693 02694 BTREE_PRINT("Found key in leaf " << curr << " at slot " << slot); 02695 02696 std::copy(leaf->slotkey + slot+1, leaf->slotkey + leaf->slotuse, 02697 leaf->slotkey + slot); 02698 data_copy(leaf->slotdata + slot+1, leaf->slotdata + leaf->slotuse, 02699 leaf->slotdata + slot); 02700 02701 leaf->slotuse--; 02702 02703 result_t myres = btree_ok; 02704 02705 // if the last key of the leaf was changed, the parent is notified 02706 // and updates the key of this leaf 02707 if (slot == leaf->slotuse) 02708 { 02709 if (parent && parentslot < parent->slotuse) 02710 { 02711 BTREE_ASSERT(parent->childid[parentslot] == curr); 02712 parent->slotkey[parentslot] = leaf->slotkey[leaf->slotuse - 1]; 02713 } 02714 else 02715 { 02716 if (leaf->slotuse >= 1) 02717 { 02718 BTREE_PRINT("Scheduling lastkeyupdate: key " << leaf->slotkey[leaf->slotuse - 1]); 02719 myres |= result_t(btree_update_lastkey, leaf->slotkey[leaf->slotuse - 1]); 02720 } 02721 else 02722 { 02723 BTREE_ASSERT(leaf == m_root); 02724 } 02725 } 02726 } 02727 02728 if (leaf->isunderflow() && !(leaf == m_root && leaf->slotuse >= 1)) 02729 { 02730 // determine what to do about the underflow 02731 02732 // case : if this empty leaf is the root, then delete all nodes 02733 // and set root to NULL. 02734 if (leftleaf == NULL && rightleaf == NULL) 02735 { 02736 BTREE_ASSERT(leaf == m_root); 02737 BTREE_ASSERT(leaf->slotuse == 0); 02738 02739 free_node(m_root); 02740 02741 m_root = leaf = NULL; 02742 m_headleaf = m_tailleaf = NULL; 02743 02744 // will be decremented soon by insert_start() 02745 BTREE_ASSERT(m_stats.itemcount == 1); 02746 BTREE_ASSERT(m_stats.leaves == 0); 02747 BTREE_ASSERT(m_stats.innernodes == 0); 02748 02749 return btree_ok; 02750 } 02751 // case : if both left and right leaves would underflow in case of 02752 // a shift, then merging is necessary. choose the more local merger 02753 // with our parent 02754 else if ( (leftleaf == NULL || leftleaf->isfew()) && (rightleaf == NULL || rightleaf->isfew()) ) 02755 { 02756 if (leftparent == parent) 02757 myres |= merge_leaves(leftleaf, leaf, leftparent); 02758 else 02759 myres |= merge_leaves(leaf, rightleaf, rightparent); 02760 } 02761 // case : the right leaf has extra data, so balance right with current 02762 else if ( (leftleaf != NULL && leftleaf->isfew()) && (rightleaf != NULL && !rightleaf->isfew()) ) 02763 { 02764 if (rightparent == parent) 02765 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); 02766 else 02767 myres |= merge_leaves(leftleaf, leaf, leftparent); 02768 } 02769 // case : the left leaf has extra data, so balance left with current 02770 else if ( (leftleaf != NULL && !leftleaf->isfew()) && (rightleaf != NULL && rightleaf->isfew()) ) 02771 { 02772 if (leftparent == parent) 02773 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); 02774 else 02775 myres |= merge_leaves(leaf, rightleaf, rightparent); 02776 } 02777 // case : both the leaf and right leaves have extra data and our 02778 // parent, choose the leaf with more data 02779 else if (leftparent == rightparent) 02780 { 02781 if (leftleaf->slotuse <= rightleaf->slotuse) 02782 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); 02783 else 02784 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); 02785 } 02786 else 02787 { 02788 if (leftparent == parent) 02789 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); 02790 else 02791 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); 02792 } 02793 } 02794 02795 return myres; 02796 } 02797 else // !curr->isleafnode() 02798 { 02799 inner_node *inner = static_cast<inner_node*>(curr); 02800 inner_node *leftinner = static_cast<inner_node*>(left); 02801 inner_node *rightinner = static_cast<inner_node*>(right); 02802 02803 node *myleft, *myright; 02804 inner_node *myleftparent, *myrightparent; 02805 02806 int slot = find_lower(inner, key); 02807 02808 if (slot == 0) { 02809 myleft = (left == NULL) ? NULL : (static_cast<inner_node*>(left))->childid[left->slotuse - 1]; 02810 myleftparent = leftparent; 02811 } 02812 else { 02813 myleft = inner->childid[slot - 1]; 02814 myleftparent = inner; 02815 } 02816 02817 if (slot == inner->slotuse) { 02818 myright = (right == NULL) ? NULL : (static_cast<inner_node*>(right))->childid[0]; 02819 myrightparent = rightparent; 02820 } 02821 else { 02822 myright = inner->childid[slot + 1]; 02823 myrightparent = inner; 02824 } 02825 02826 BTREE_PRINT("erase_one_descend into " << inner->childid[slot]); 02827 02828 result_t result = erase_one_descend(key, 02829 inner->childid[slot], 02830 myleft, myright, 02831 myleftparent, myrightparent, 02832 inner, slot); 02833 02834 result_t myres = btree_ok; 02835 02836 if (result.has(btree_not_found)) 02837 { 02838 return result; 02839 } 02840 02841 if (result.has(btree_update_lastkey)) 02842 { 02843 if (parent && parentslot < parent->slotuse) 02844 { 02845 BTREE_PRINT("Fixing lastkeyupdate: key " << result.lastkey << " into parent " << parent << " at parentslot " << parentslot); 02846 02847 BTREE_ASSERT(parent->childid[parentslot] == curr); 02848 parent->slotkey[parentslot] = result.lastkey; 02849 } 02850 else 02851 { 02852 BTREE_PRINT("Forwarding lastkeyupdate: key " << result.lastkey); 02853 myres |= result_t(btree_update_lastkey, result.lastkey); 02854 } 02855 } 02856 02857 if (result.has(btree_fixmerge)) 02858 { 02859 // either the current node or the next is empty and should be removed 02860 if (inner->childid[slot]->slotuse != 0) 02861 slot++; 02862 02863 // this is the child slot invalidated by the merge 02864 BTREE_ASSERT(inner->childid[slot]->slotuse == 0); 02865 02866 free_node(inner->childid[slot]); 02867 02868 std::copy(inner->slotkey + slot, inner->slotkey + inner->slotuse, 02869 inner->slotkey + slot-1); 02870 std::copy(inner->childid + slot+1, inner->childid + inner->slotuse+1, 02871 inner->childid + slot); 02872 02873 inner->slotuse--; 02874 02875 if (inner->level == 1) 02876 { 02877 // fix split key for children leaves 02878 slot--; 02879 leaf_node *child = static_cast<leaf_node*>(inner->childid[slot]); 02880 inner->slotkey[slot] = child->slotkey[ child->slotuse-1 ]; 02881 } 02882 } 02883 02884 if (inner->isunderflow() && !(inner == m_root && inner->slotuse >= 1)) 02885 { 02886 // case: the inner node is the root and has just one child. that child becomes the new root 02887 if (leftinner == NULL && rightinner == NULL) 02888 { 02889 BTREE_ASSERT(inner == m_root); 02890 BTREE_ASSERT(inner->slotuse == 0); 02891 02892 m_root = inner->childid[0]; 02893 02894 inner->slotuse = 0; 02895 free_node(inner); 02896 02897 return btree_ok; 02898 } 02899 // case : if both left and right leaves would underflow in case of 02900 // a shift, then merging is necessary. choose the more local merger 02901 // with our parent 02902 else if ( (leftinner == NULL || leftinner->isfew()) && (rightinner == NULL || rightinner->isfew()) ) 02903 { 02904 if (leftparent == parent) 02905 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1); 02906 else 02907 myres |= merge_inner(inner, rightinner, rightparent, parentslot); 02908 } 02909 // case : the right leaf has extra data, so balance right with current 02910 else if ( (leftinner != NULL && leftinner->isfew()) && (rightinner != NULL && !rightinner->isfew()) ) 02911 { 02912 if (rightparent == parent) 02913 shift_left_inner(inner, rightinner, rightparent, parentslot); 02914 else 02915 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1); 02916 } 02917 // case : the left leaf has extra data, so balance left with current 02918 else if ( (leftinner != NULL && !leftinner->isfew()) && (rightinner != NULL && rightinner->isfew()) ) 02919 { 02920 if (leftparent == parent) 02921 shift_right_inner(leftinner, inner, leftparent, parentslot - 1); 02922 else 02923 myres |= merge_inner(inner, rightinner, rightparent, parentslot); 02924 } 02925 // case : both the leaf and right leaves have extra data and our 02926 // parent, choose the leaf with more data 02927 else if (leftparent == rightparent) 02928 { 02929 if (leftinner->slotuse <= rightinner->slotuse) 02930 shift_left_inner(inner, rightinner, rightparent, parentslot); 02931 else 02932 shift_right_inner(leftinner, inner, leftparent, parentslot - 1); 02933 } 02934 else 02935 { 02936 if (leftparent == parent) 02937 shift_right_inner(leftinner, inner, leftparent, parentslot - 1); 02938 else 02939 shift_left_inner(inner, rightinner, rightparent, parentslot); 02940 } 02941 } 02942 02943 return myres; 02944 } 02945 } 02946 02947 /** @brief Erase one key/data pair referenced by an iterator in the B+ 02948 * tree. 02949 * 02950 * Descends down the tree in search of an iterator. During the descent the 02951 * parent, left and right siblings and their parents are computed and 02952 * passed down. The difficulty is that the iterator contains only a pointer 02953 * to a leaf_node, which means that this function must do a recursive depth 02954 * first search for that leaf node in the subtree containing all pairs of 02955 * the same key. This subtree can be very large, even the whole tree, 02956 * though in practice it would not make sense to have so many duplicate 02957 * keys. 02958 * 02959 * Once the referenced key/data pair is found, it is removed from the leaf 02960 * and the same underflow cases are handled as in erase_one_descend. 02961 */ 02962 result_t erase_iter_descend(const iterator& iter, 02963 node *curr, 02964 node *left, node *right, 02965 inner_node *leftparent, inner_node *rightparent, 02966 inner_node *parent, unsigned int parentslot) 02967 { 02968 if (curr->isleafnode()) 02969 { 02970 leaf_node *leaf = static_cast<leaf_node*>(curr); 02971 leaf_node *leftleaf = static_cast<leaf_node*>(left); 02972 leaf_node *rightleaf = static_cast<leaf_node*>(right); 02973 02974 // if this is not the correct leaf, get next step in recursive 02975 // search 02976 if (leaf != iter.currnode) 02977 { 02978 return btree_not_found; 02979 } 02980 02981 if (iter.currslot >= leaf->slotuse) 02982 { 02983 BTREE_PRINT("Could not find iterator (" << iter.currnode << "," << iter.currslot << ") to erase. Invalid leaf node?"); 02984 02985 return btree_not_found; 02986 } 02987 02988 int slot = iter.currslot; 02989 02990 BTREE_PRINT("Found iterator in leaf " << curr << " at slot " << slot); 02991 02992 std::copy(leaf->slotkey + slot+1, leaf->slotkey + leaf->slotuse, 02993 leaf->slotkey + slot); 02994 data_copy(leaf->slotdata + slot+1, leaf->slotdata + leaf->slotuse, 02995 leaf->slotdata + slot); 02996 02997 leaf->slotuse--; 02998 02999 result_t myres = btree_ok; 03000 03001 // if the last key of the leaf was changed, the parent is notified 03002 // and updates the key of this leaf 03003 if (slot == leaf->slotuse) 03004 { 03005 if (parent && parentslot < parent->slotuse) 03006 { 03007 BTREE_ASSERT(parent->childid[parentslot] == curr); 03008 parent->slotkey[parentslot] = leaf->slotkey[leaf->slotuse - 1]; 03009 } 03010 else 03011 { 03012 if (leaf->slotuse >= 1) 03013 { 03014 BTREE_PRINT("Scheduling lastkeyupdate: key " << leaf->slotkey[leaf->slotuse - 1]); 03015 myres |= result_t(btree_update_lastkey, leaf->slotkey[leaf->slotuse - 1]); 03016 } 03017 else 03018 { 03019 BTREE_ASSERT(leaf == m_root); 03020 } 03021 } 03022 } 03023 03024 if (leaf->isunderflow() && !(leaf == m_root && leaf->slotuse >= 1)) 03025 { 03026 // determine what to do about the underflow 03027 03028 // case : if this empty leaf is the root, then delete all nodes 03029 // and set root to NULL. 03030 if (leftleaf == NULL && rightleaf == NULL) 03031 { 03032 BTREE_ASSERT(leaf == m_root); 03033 BTREE_ASSERT(leaf->slotuse == 0); 03034 03035 free_node(m_root); 03036 03037 m_root = leaf = NULL; 03038 m_headleaf = m_tailleaf = NULL; 03039 03040 // will be decremented soon by insert_start() 03041 BTREE_ASSERT(m_stats.itemcount == 1); 03042 BTREE_ASSERT(m_stats.leaves == 0); 03043 BTREE_ASSERT(m_stats.innernodes == 0); 03044 03045 return btree_ok; 03046 } 03047 // case : if both left and right leaves would underflow in case of 03048 // a shift, then merging is necessary. choose the more local merger 03049 // with our parent 03050 else if ( (leftleaf == NULL || leftleaf->isfew()) && (rightleaf == NULL || rightleaf->isfew()) ) 03051 { 03052 if (leftparent == parent) 03053 myres |= merge_leaves(leftleaf, leaf, leftparent); 03054 else 03055 myres |= merge_leaves(leaf, rightleaf, rightparent); 03056 } 03057 // case : the right leaf has extra data, so balance right with current 03058 else if ( (leftleaf != NULL && leftleaf->isfew()) && (rightleaf != NULL && !rightleaf->isfew()) ) 03059 { 03060 if (rightparent == parent) 03061 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); 03062 else 03063 myres |= merge_leaves(leftleaf, leaf, leftparent); 03064 } 03065 // case : the left leaf has extra data, so balance left with current 03066 else if ( (leftleaf != NULL && !leftleaf->isfew()) && (rightleaf != NULL && rightleaf->isfew()) ) 03067 { 03068 if (leftparent == parent) 03069 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); 03070 else 03071 myres |= merge_leaves(leaf, rightleaf, rightparent); 03072 } 03073 // case : both the leaf and right leaves have extra data and our 03074 // parent, choose the leaf with more data 03075 else if (leftparent == rightparent) 03076 { 03077 if (leftleaf->slotuse <= rightleaf->slotuse) 03078 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); 03079 else 03080 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); 03081 } 03082 else 03083 { 03084 if (leftparent == parent) 03085 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); 03086 else 03087 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); 03088 } 03089 } 03090 03091 return myres; 03092 } 03093 else // !curr->isleafnode() 03094 { 03095 inner_node *inner = static_cast<inner_node*>(curr); 03096 inner_node *leftinner = static_cast<inner_node*>(left); 03097 inner_node *rightinner = static_cast<inner_node*>(right); 03098 03099 // find first slot below which the searched iterator might be 03100 // located. 03101 03102 result_t result; 03103 int slot = find_lower(inner, iter.key()); 03104 03105 while (slot <= inner->slotuse) 03106 { 03107 node *myleft, *myright; 03108 inner_node *myleftparent, *myrightparent; 03109 03110 if (slot == 0) { 03111 myleft = (left == NULL) ? NULL : (static_cast<inner_node*>(left))->childid[left->slotuse - 1]; 03112 myleftparent = leftparent; 03113 } 03114 else { 03115 myleft = inner->childid[slot - 1]; 03116 myleftparent = inner; 03117 } 03118 03119 if (slot == inner->slotuse) { 03120 myright = (right == NULL) ? NULL : (static_cast<inner_node*>(right))->childid[0]; 03121 myrightparent = rightparent; 03122 } 03123 else { 03124 myright = inner->childid[slot + 1]; 03125 myrightparent = inner; 03126 } 03127 03128 BTREE_PRINT("erase_iter_descend into " << inner->childid[slot]); 03129 03130 result = erase_iter_descend(iter, 03131 inner->childid[slot], 03132 myleft, myright, 03133 myleftparent, myrightparent, 03134 inner, slot); 03135 03136 if (!result.has(btree_not_found)) 03137 break; 03138 03139 // continue recursive search for leaf on next slot 03140 03141 if (slot < inner->slotuse && key_less(inner->slotkey[slot],iter.key())) 03142 return btree_not_found; 03143 03144 ++slot; 03145 } 03146 03147 if (slot > inner->slotuse) 03148 return btree_not_found; 03149 03150 result_t myres = btree_ok; 03151 03152 if (result.has(btree_update_lastkey)) 03153 { 03154 if (parent && parentslot < parent->slotuse) 03155 { 03156 BTREE_PRINT("Fixing lastkeyupdate: key " << result.lastkey << " into parent " << parent << " at parentslot " << parentslot); 03157 03158 BTREE_ASSERT(parent->childid[parentslot] == curr); 03159 parent->slotkey[parentslot] = result.lastkey; 03160 } 03161 else 03162 { 03163 BTREE_PRINT("Forwarding lastkeyupdate: key " << result.lastkey); 03164 myres |= result_t(btree_update_lastkey, result.lastkey); 03165 } 03166 } 03167 03168 if (result.has(btree_fixmerge)) 03169 { 03170 // either the current node or the next is empty and should be removed 03171 if (inner->childid[slot]->slotuse != 0) 03172 slot++; 03173 03174 // this is the child slot invalidated by the merge 03175 BTREE_ASSERT(inner->childid[slot]->slotuse == 0); 03176 03177 free_node(inner->childid[slot]); 03178 03179 std::copy(inner->slotkey + slot, inner->slotkey + inner->slotuse, 03180 inner->slotkey + slot-1); 03181 std::copy(inner->childid + slot+1, inner->childid + inner->slotuse+1, 03182 inner->childid + slot); 03183 03184 inner->slotuse--; 03185 03186 if (inner->level == 1) 03187 { 03188 // fix split key for children leaves 03189 slot--; 03190 leaf_node *child = static_cast<leaf_node*>(inner->childid[slot]); 03191 inner->slotkey[slot] = child->slotkey[ child->slotuse-1 ]; 03192 } 03193 } 03194 03195 if (inner->isunderflow() && !(inner == m_root && inner->slotuse >= 1)) 03196 { 03197 // case: the inner node is the root and has just one 03198 // child. that child becomes the new root 03199 if (leftinner == NULL && rightinner == NULL) 03200 { 03201 BTREE_ASSERT(inner == m_root); 03202 BTREE_ASSERT(inner->slotuse == 0); 03203 03204 m_root = inner->childid[0]; 03205 03206 inner->slotuse = 0; 03207 free_node(inner); 03208 03209 return btree_ok; 03210 } 03211 // case : if both left and right leaves would underflow in case of 03212 // a shift, then merging is necessary. choose the more local merger 03213 // with our parent 03214 else if ( (leftinner == NULL || leftinner->isfew()) && (rightinner == NULL || rightinner->isfew()) ) 03215 { 03216 if (leftparent == parent) 03217 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1); 03218 else 03219 myres |= merge_inner(inner, rightinner, rightparent, parentslot); 03220 } 03221 // case : the right leaf has extra data, so balance right with current 03222 else if ( (leftinner != NULL && leftinner->isfew()) && (rightinner != NULL && !rightinner->isfew()) ) 03223 { 03224 if (rightparent == parent) 03225 shift_left_inner(inner, rightinner, rightparent, parentslot); 03226 else 03227 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1); 03228 } 03229 // case : the left leaf has extra data, so balance left with current 03230 else if ( (leftinner != NULL && !leftinner->isfew()) && (rightinner != NULL && rightinner->isfew()) ) 03231 { 03232 if (leftparent == parent) 03233 shift_right_inner(leftinner, inner, leftparent, parentslot - 1); 03234 else 03235 myres |= merge_inner(inner, rightinner, rightparent, parentslot); 03236 } 03237 // case : both the leaf and right leaves have extra data and our 03238 // parent, choose the leaf with more data 03239 else if (leftparent == rightparent) 03240 { 03241 if (leftinner->slotuse <= rightinner->slotuse) 03242 shift_left_inner(inner, rightinner, rightparent, parentslot); 03243 else 03244 shift_right_inner(leftinner, inner, leftparent, parentslot - 1); 03245 } 03246 else 03247 { 03248 if (leftparent == parent) 03249 shift_right_inner(leftinner, inner, leftparent, parentslot - 1); 03250 else 03251 shift_left_inner(inner, rightinner, rightparent, parentslot); 03252 } 03253 } 03254 03255 return myres; 03256 } 03257 } 03258 03259 /// Merge two leaf nodes. The function moves all key/data pairs from right 03260 /// to left and sets right's slotuse to zero. The right slot is then 03261 /// removed by the calling parent node. 03262 result_t merge_leaves(leaf_node* left, leaf_node* right, inner_node* parent) 03263 { 03264 BTREE_PRINT("Merge leaf nodes " << left << " and " << right << " with common parent " << parent << "."); 03265 (void)parent; 03266 03267 BTREE_ASSERT(left->isleafnode() && right->isleafnode()); 03268 BTREE_ASSERT(parent->level == 1); 03269 03270 BTREE_ASSERT(left->slotuse + right->slotuse < leafslotmax); 03271 03272 std::copy(right->slotkey, right->slotkey + right->slotuse, 03273 left->slotkey + left->slotuse); 03274 data_copy(right->slotdata, right->slotdata + right->slotuse, 03275 left->slotdata + left->slotuse); 03276 03277 left->slotuse += right->slotuse; 03278 03279 left->nextleaf = right->nextleaf; 03280 if (left->nextleaf) 03281 left->nextleaf->prevleaf = left; 03282 else 03283 m_tailleaf = left; 03284 03285 right->slotuse = 0; 03286 03287 return btree_fixmerge; 03288 } 03289 03290 /// Merge two inner nodes. The function moves all key/childid pairs from 03291 /// right to left and sets right's slotuse to zero. The right slot is then 03292 /// removed by the calling parent node. 03293 static result_t merge_inner(inner_node* left, inner_node* right, inner_node* parent, unsigned int parentslot) 03294 { 03295 BTREE_PRINT("Merge inner nodes " << left << " and " << right << " with common parent " << parent << "."); 03296 03297 BTREE_ASSERT(left->level == right->level); 03298 BTREE_ASSERT(parent->level == left->level + 1); 03299 03300 BTREE_ASSERT(parent->childid[parentslot] == left); 03301 03302 BTREE_ASSERT(left->slotuse + right->slotuse < innerslotmax); 03303 03304 if (selfverify) 03305 { 03306 // find the left node's slot in the parent's children 03307 unsigned int leftslot = 0; 03308 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) 03309 ++leftslot; 03310 03311 BTREE_ASSERT(leftslot < parent->slotuse); 03312 BTREE_ASSERT(parent->childid[leftslot] == left); 03313 BTREE_ASSERT(parent->childid[leftslot+1] == right); 03314 03315 BTREE_ASSERT(parentslot == leftslot); 03316 } 03317 03318 // retrieve the decision key from parent 03319 left->slotkey[left->slotuse] = parent->slotkey[parentslot]; 03320 left->slotuse++; 03321 03322 // copy over keys and children from right 03323 std::copy(right->slotkey, right->slotkey + right->slotuse, 03324 left->slotkey + left->slotuse); 03325 std::copy(right->childid, right->childid + right->slotuse+1, 03326 left->childid + left->slotuse); 03327 03328 left->slotuse += right->slotuse; 03329 right->slotuse = 0; 03330 03331 return btree_fixmerge; 03332 } 03333 03334 /// Balance two leaf nodes. The function moves key/data pairs from right to 03335 /// left so that both nodes are equally filled. The parent node is updated 03336 /// if possible. 03337 static result_t shift_left_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot) 03338 { 03339 BTREE_ASSERT(left->isleafnode() && right->isleafnode()); 03340 BTREE_ASSERT(parent->level == 1); 03341 03342 BTREE_ASSERT(left->nextleaf == right); 03343 BTREE_ASSERT(left == right->prevleaf); 03344 03345 BTREE_ASSERT(left->slotuse < right->slotuse); 03346 BTREE_ASSERT(parent->childid[parentslot] == left); 03347 03348 unsigned int shiftnum = (right->slotuse - left->slotuse) >> 1; 03349 03350 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "."); 03351 03352 BTREE_ASSERT(left->slotuse + shiftnum < leafslotmax); 03353 03354 // copy the first items from the right node to the last slot in the left node. 03355 03356 std::copy(right->slotkey, right->slotkey + shiftnum, 03357 left->slotkey + left->slotuse); 03358 data_copy(right->slotdata, right->slotdata + shiftnum, 03359 left->slotdata + left->slotuse); 03360 03361 left->slotuse += shiftnum; 03362 03363 // shift all slots in the right node to the left 03364 03365 std::copy(right->slotkey + shiftnum, right->slotkey + right->slotuse, 03366 right->slotkey); 03367 data_copy(right->slotdata + shiftnum, right->slotdata + right->slotuse, 03368 right->slotdata); 03369 03370 right->slotuse -= shiftnum; 03371 03372 // fixup parent 03373 if (parentslot < parent->slotuse) { 03374 parent->slotkey[parentslot] = left->slotkey[left->slotuse - 1]; 03375 return btree_ok; 03376 } 03377 else { // the update is further up the tree 03378 return result_t(btree_update_lastkey, left->slotkey[left->slotuse - 1]); 03379 } 03380 } 03381 03382 /// Balance two inner nodes. The function moves key/data pairs from right 03383 /// to left so that both nodes are equally filled. The parent node is 03384 /// updated if possible. 03385 static void shift_left_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot) 03386 { 03387 BTREE_ASSERT(left->level == right->level); 03388 BTREE_ASSERT(parent->level == left->level + 1); 03389 03390 BTREE_ASSERT(left->slotuse < right->slotuse); 03391 BTREE_ASSERT(parent->childid[parentslot] == left); 03392 03393 unsigned int shiftnum = (right->slotuse - left->slotuse) >> 1; 03394 03395 BTREE_PRINT("Shifting (inner) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "."); 03396 03397 BTREE_ASSERT(left->slotuse + shiftnum < innerslotmax); 03398 03399 if (selfverify) 03400 { 03401 // find the left node's slot in the parent's children and compare to parentslot 03402 03403 unsigned int leftslot = 0; 03404 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) 03405 ++leftslot; 03406 03407 BTREE_ASSERT(leftslot < parent->slotuse); 03408 BTREE_ASSERT(parent->childid[leftslot] == left); 03409 BTREE_ASSERT(parent->childid[leftslot+1] == right); 03410 03411 BTREE_ASSERT(leftslot == parentslot); 03412 } 03413 03414 // copy the parent's decision slotkey and childid to the first new key on the left 03415 left->slotkey[left->slotuse] = parent->slotkey[parentslot]; 03416 left->slotuse++; 03417 03418 // copy the other items from the right node to the last slots in the left node. 03419 03420 std::copy(right->slotkey, right->slotkey + shiftnum-1, 03421 left->slotkey + left->slotuse); 03422 std::copy(right->childid, right->childid + shiftnum, 03423 left->childid + left->slotuse); 03424 03425 left->slotuse += shiftnum - 1; 03426 03427 // fixup parent 03428 parent->slotkey[parentslot] = right->slotkey[shiftnum - 1]; 03429 03430 // shift all slots in the right node 03431 03432 std::copy(right->slotkey + shiftnum, right->slotkey + right->slotuse, 03433 right->slotkey); 03434 std::copy(right->childid + shiftnum, right->childid + right->slotuse+1, 03435 right->childid); 03436 03437 right->slotuse -= shiftnum; 03438 } 03439 03440 /// Balance two leaf nodes. The function moves key/data pairs from left to 03441 /// right so that both nodes are equally filled. The parent node is updated 03442 /// if possible. 03443 static void shift_right_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot) 03444 { 03445 BTREE_ASSERT(left->isleafnode() && right->isleafnode()); 03446 BTREE_ASSERT(parent->level == 1); 03447 03448 BTREE_ASSERT(left->nextleaf == right); 03449 BTREE_ASSERT(left == right->prevleaf); 03450 BTREE_ASSERT(parent->childid[parentslot] == left); 03451 03452 BTREE_ASSERT(left->slotuse > right->slotuse); 03453 03454 unsigned int shiftnum = (left->slotuse - right->slotuse) >> 1; 03455 03456 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "."); 03457 03458 if (selfverify) 03459 { 03460 // find the left node's slot in the parent's children 03461 unsigned int leftslot = 0; 03462 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) 03463 ++leftslot; 03464 03465 BTREE_ASSERT(leftslot < parent->slotuse); 03466 BTREE_ASSERT(parent->childid[leftslot] == left); 03467 BTREE_ASSERT(parent->childid[leftslot+1] == right); 03468 03469 BTREE_ASSERT(leftslot == parentslot); 03470 } 03471 03472 // shift all slots in the right node 03473 03474 BTREE_ASSERT(right->slotuse + shiftnum < leafslotmax); 03475 03476 std::copy_backward(right->slotkey, right->slotkey + right->slotuse, 03477 right->slotkey + right->slotuse + shiftnum); 03478 data_copy_backward(right->slotdata, right->slotdata + right->slotuse, 03479 right->slotdata + right->slotuse + shiftnum); 03480 03481 right->slotuse += shiftnum; 03482 03483 // copy the last items from the left node to the first slot in the right node. 03484 std::copy(left->slotkey + left->slotuse - shiftnum, left->slotkey + left->slotuse, 03485 right->slotkey); 03486 data_copy(left->slotdata + left->slotuse - shiftnum, left->slotdata + left->slotuse, 03487 right->slotdata); 03488 03489 left->slotuse -= shiftnum; 03490 03491 parent->slotkey[parentslot] = left->slotkey[left->slotuse-1]; 03492 } 03493 03494 /// Balance two inner nodes. The function moves key/data pairs from left to 03495 /// right so that both nodes are equally filled. The parent node is updated 03496 /// if possible. 03497 static void shift_right_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot) 03498 { 03499 BTREE_ASSERT(left->level == right->level); 03500 BTREE_ASSERT(parent->level == left->level + 1); 03501 03502 BTREE_ASSERT(left->slotuse > right->slotuse); 03503 BTREE_ASSERT(parent->childid[parentslot] == left); 03504 03505 unsigned int shiftnum = (left->slotuse - right->slotuse) >> 1; 03506 03507 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "."); 03508 03509 if (selfverify) 03510 { 03511 // find the left node's slot in the parent's children 03512 unsigned int leftslot = 0; 03513 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) 03514 ++leftslot; 03515 03516 BTREE_ASSERT(leftslot < parent->slotuse); 03517 BTREE_ASSERT(parent->childid[leftslot] == left); 03518 BTREE_ASSERT(parent->childid[leftslot+1] == right); 03519 03520 BTREE_ASSERT(leftslot == parentslot); 03521 } 03522 03523 // shift all slots in the right node 03524 03525 BTREE_ASSERT(right->slotuse + shiftnum < innerslotmax); 03526 03527 std::copy_backward(right->slotkey, right->slotkey + right->slotuse, 03528 right->slotkey + right->slotuse + shiftnum); 03529 std::copy_backward(right->childid, right->childid + right->slotuse+1, 03530 right->childid + right->slotuse+1 + shiftnum); 03531 03532 right->slotuse += shiftnum; 03533 03534 // copy the parent's decision slotkey and childid to the last new key on the right 03535 right->slotkey[shiftnum - 1] = parent->slotkey[parentslot]; 03536 03537 // copy the remaining last items from the left node to the first slot in the right node. 03538 std::copy(left->slotkey + left->slotuse - shiftnum+1, left->slotkey + left->slotuse, 03539 right->slotkey); 03540 std::copy(left->childid + left->slotuse - shiftnum+1, left->childid + left->slotuse+1, 03541 right->childid); 03542 03543 // copy the first to-be-removed key from the left node to the parent's decision slot 03544 parent->slotkey[parentslot] = left->slotkey[left->slotuse - shiftnum]; 03545 03546 left->slotuse -= shiftnum; 03547 } 03548 03549 #ifdef BTREE_DEBUG 03550 public: 03551 // *** Debug Printing 03552 03553 /// Print out the B+ tree structure with keys onto the given ostream. This 03554 /// function requires that the header is compiled with BTREE_DEBUG and that 03555 /// key_type is printable via std::ostream. 03556 void print(std::ostream &os) const 03557 { 03558 if (m_root) { 03559 print_node(os, m_root, 0, true); 03560 } 03561 } 03562 03563 /// Print out only the leaves via the double linked list. 03564 void print_leaves(std::ostream &os) const 03565 { 03566 os << "leaves:" << std::endl; 03567 03568 const leaf_node *n = m_headleaf; 03569 03570 while(n) 03571 { 03572 os << " " << n << std::endl; 03573 03574 n = n->nextleaf; 03575 } 03576 } 03577 03578 private: 03579 03580 /// Recursively descend down the tree and print out nodes. 03581 static void print_node(std::ostream &os, const node* node, unsigned int depth=0, bool recursive=false) 03582 { 03583 for(unsigned int i = 0; i < depth; i++) os << " "; 03584 03585 os << "node " << node << " level " << node->level << " slotuse " << node->slotuse << std::endl; 03586 03587 if (node->isleafnode()) 03588 { 03589 const leaf_node *leafnode = static_cast<const leaf_node*>(node); 03590 03591 for(unsigned int i = 0; i < depth; i++) os << " "; 03592 os << " leaf prev " << leafnode->prevleaf << " next " << leafnode->nextleaf << std::endl; 03593 03594 for(unsigned int i = 0; i < depth; i++) os << " "; 03595 03596 for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot) 03597 { 03598 os << leafnode->slotkey[slot] << " "; // << "(data: " << leafnode->slotdata[slot] << ") "; 03599 } 03600 os << std::endl; 03601 } 03602 else 03603 { 03604 const inner_node *innernode = static_cast<const inner_node*>(node); 03605 03606 for(unsigned int i = 0; i < depth; i++) os << " "; 03607 03608 for (unsigned short slot = 0; slot < innernode->slotuse; ++slot) 03609 { 03610 os << "(" << innernode->childid[slot] << ") " << innernode->slotkey[slot] << " "; 03611 } 03612 os << "(" << innernode->childid[innernode->slotuse] << ")" << std::endl; 03613 03614 if (recursive) 03615 { 03616 for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot) 03617 { 03618 print_node(os, innernode->childid[slot], depth + 1, recursive); 03619 } 03620 } 03621 } 03622 } 03623 #endif 03624 03625 public: 03626 // *** Verification of B+ Tree Invariants 03627 03628 /// Run a thorough verification of all B+ tree invariants. The program 03629 /// aborts via assert() if something is wrong. 03630 void verify() const 03631 { 03632 key_type minkey, maxkey; 03633 tree_stats vstats; 03634 03635 if (m_root) 03636 { 03637 verify_node(m_root, &minkey, &maxkey, vstats); 03638 03639 assert( vstats.itemcount == m_stats.itemcount ); 03640 assert( vstats.leaves == m_stats.leaves ); 03641 assert( vstats.innernodes == m_stats.innernodes ); 03642 03643 verify_leaflinks(); 03644 } 03645 } 03646 03647 private: 03648 03649 /// Recursively descend down the tree and verify each node 03650 void verify_node(const node* n, key_type* minkey, key_type* maxkey, tree_stats &vstats) const 03651 { 03652 BTREE_PRINT("verifynode " << n); 03653 03654 if (n->isleafnode()) 03655 { 03656 const leaf_node *leaf = static_cast<const leaf_node*>(n); 03657 03658 assert( leaf == m_root || !leaf->isunderflow() ); 03659 assert( leaf->slotuse > 0 ); 03660 03661 for(unsigned short slot = 0; slot < leaf->slotuse - 1; ++slot) 03662 { 03663 assert(key_lessequal(leaf->slotkey[slot], leaf->slotkey[slot + 1])); 03664 } 03665 03666 *minkey = leaf->slotkey[0]; 03667 *maxkey = leaf->slotkey[leaf->slotuse - 1]; 03668 03669 vstats.leaves++; 03670 vstats.itemcount += leaf->slotuse; 03671 } 03672 else // !n->isleafnode() 03673 { 03674 const inner_node *inner = static_cast<const inner_node*>(n); 03675 vstats.innernodes++; 03676 03677 assert( inner == m_root || !inner->isunderflow() ); 03678 assert( inner->slotuse > 0 ); 03679 03680 for(unsigned short slot = 0; slot < inner->slotuse - 1; ++slot) 03681 { 03682 assert(key_lessequal(inner->slotkey[slot], inner->slotkey[slot + 1])); 03683 } 03684 03685 for(unsigned short slot = 0; slot <= inner->slotuse; ++slot) 03686 { 03687 const node *subnode = inner->childid[slot]; 03688 key_type subminkey = key_type(); 03689 key_type submaxkey = key_type(); 03690 03691 assert(subnode->level + 1 == inner->level); 03692 verify_node(subnode, &subminkey, &submaxkey, vstats); 03693 03694 BTREE_PRINT("verify subnode " << subnode << ": " << subminkey << " - " << submaxkey); 03695 03696 if (slot == 0) 03697 *minkey = subminkey; 03698 else 03699 assert(key_greaterequal(subminkey, inner->slotkey[slot-1])); 03700 03701 if (slot == inner->slotuse) 03702 *maxkey = submaxkey; 03703 else 03704 assert(key_equal(inner->slotkey[slot], submaxkey)); 03705 03706 if (inner->level == 1 && slot < inner->slotuse) 03707 { 03708 // children are leaves and must be linked together in the 03709 // correct order 03710 const leaf_node *leafa = static_cast<const leaf_node*>(inner->childid[slot]); 03711 const leaf_node *leafb = static_cast<const leaf_node*>(inner->childid[slot + 1]); 03712 03713 assert(leafa->nextleaf == leafb); 03714 assert(leafa == leafb->prevleaf); 03715 (void)leafa; (void)leafb; 03716 } 03717 if (inner->level == 2 && slot < inner->slotuse) 03718 { 03719 // verify leaf links between the adjacent inner nodes 03720 const inner_node *parenta = static_cast<const inner_node*>(inner->childid[slot]); 03721 const inner_node *parentb = static_cast<const inner_node*>(inner->childid[slot+1]); 03722 03723 const leaf_node *leafa = static_cast<const leaf_node*>(parenta->childid[parenta->slotuse]); 03724 const leaf_node *leafb = static_cast<const leaf_node*>(parentb->childid[0]); 03725 03726 assert(leafa->nextleaf == leafb); 03727 assert(leafa == leafb->prevleaf); 03728 (void)leafa; (void)leafb; 03729 } 03730 } 03731 } 03732 } 03733 03734 /// Verify the double linked list of leaves. 03735 void verify_leaflinks() const 03736 { 03737 const leaf_node *n = m_headleaf; 03738 03739 assert(n->level == 0); 03740 assert(!n || n->prevleaf == NULL); 03741 03742 unsigned int testcount = 0; 03743 03744 while(n) 03745 { 03746 assert(n->level == 0); 03747 assert(n->slotuse > 0); 03748 03749 for(unsigned short slot = 0; slot < n->slotuse - 1; ++slot) 03750 { 03751 assert(key_lessequal(n->slotkey[slot], n->slotkey[slot + 1])); 03752 } 03753 03754 testcount += n->slotuse; 03755 03756 if (n->nextleaf) 03757 { 03758 assert(key_lessequal(n->slotkey[n->slotuse-1], n->nextleaf->slotkey[0])); 03759 03760 assert(n == n->nextleaf->prevleaf); 03761 } 03762 else 03763 { 03764 assert(m_tailleaf == n); 03765 } 03766 03767 n = n->nextleaf; 03768 } 03769 03770 assert(testcount == size()); 03771 } 03772 03773 private: 03774 // *** Dump and Restore of B+ Trees 03775 03776 /// A header for the binary image containing the base properties of the B+ 03777 /// tree. These properties have to match the current template 03778 /// instantiation. 03779 struct dump_header 03780 { 03781 /// "stx-btree", just to stop the restore() function from loading garbage 03782 char signature[12]; 03783 03784 /// Currently 0 03785 unsigned short version; 03786 03787 /// sizeof(key_type) 03788 unsigned short key_type_size; 03789 03790 /// sizeof(data_type) 03791 unsigned short data_type_size; 03792 03793 /// Number of slots in the leaves 03794 unsigned short leafslots; 03795 03796 /// Number of slots in the inner nodes 03797 unsigned short innerslots; 03798 03799 /// Allow duplicates 03800 bool allow_duplicates; 03801 03802 /// The item count of the tree 03803 size_type itemcount; 03804 03805 /// Fill the struct with the current B+ tree's properties, itemcount is 03806 /// not filled. 03807 inline void fill() 03808 { 03809 // don't want to include string.h just for this signature 03810 signature[0] = 's'; signature[1] = 't'; signature[2] = 'x'; signature[3] = '-'; 03811 signature[4] = 'b'; signature[5] = 't'; signature[6] = 'r'; signature[7] = 'e'; 03812 signature[8] = 'e'; signature[9] = 0; signature[10] = 0; signature[11] = 0; 03813 03814 version = 0; 03815 key_type_size = sizeof(typename btree_self::key_type); 03816 data_type_size = sizeof(typename btree_self::data_type); 03817 leafslots = btree_self::leafslotmax; 03818 innerslots = btree_self::innerslotmax; 03819 allow_duplicates = btree_self::allow_duplicates; 03820 } 03821 03822 /// Returns true if the headers have the same vital properties 03823 inline bool same(const struct dump_header &o) const 03824 { 03825 return (signature[0] == 's' && signature[1] == 't' && signature[2] == 'x' && signature[3] == '-' && 03826 signature[4] == 'b' && signature[5] == 't' && signature[6] == 'r' && signature[7] == 'e' && 03827 signature[8] == 'e' && signature[9] == 0 && signature[10] == 0 && signature[11] == 0) 03828 && (version == o.version) 03829 && (key_type_size == o.key_type_size) 03830 && (data_type_size == o.data_type_size) 03831 && (leafslots == o.leafslots) 03832 && (innerslots == o.innerslots) 03833 && (allow_duplicates == o.allow_duplicates); 03834 } 03835 }; 03836 03837 public: 03838 03839 /// Dump the contents of the B+ tree out onto an ostream as a binary 03840 /// image. The image contains memory pointers which will be fixed when the 03841 /// image is restored. For this to work your key_type and data_type must be 03842 /// integral types and contain no pointers or references. 03843 void dump(std::ostream &os) const 03844 { 03845 struct dump_header header; 03846 header.fill(); 03847 header.itemcount = size(); 03848 03849 os.write(reinterpret_cast<char*>(&header), sizeof(header)); 03850 03851 if (m_root) { 03852 dump_node(os, m_root); 03853 } 03854 } 03855 03856 /// Restore a binary image of a dumped B+ tree from an istream. The B+ tree 03857 /// pointers are fixed using the dump order. For dump and restore to work 03858 /// your key_type and data_type must be integral types and contain no 03859 /// pointers or references. Returns true if the restore was successful. 03860 bool restore(std::istream &is) 03861 { 03862 struct dump_header fileheader; 03863 is.read(reinterpret_cast<char*>(&fileheader), sizeof(fileheader)); 03864 if (!is.good()) return false; 03865 03866 struct dump_header myheader; 03867 myheader.fill(); 03868 myheader.itemcount = fileheader.itemcount; 03869 03870 if (!myheader.same(fileheader)) 03871 { 03872 BTREE_PRINT("btree::restore: file header does not match instantiation signature."); 03873 return false; 03874 } 03875 03876 clear(); 03877 03878 if (fileheader.itemcount > 0) 03879 { 03880 m_root = restore_node(is); 03881 if (m_root == NULL) return false; 03882 03883 m_stats.itemcount = fileheader.itemcount; 03884 } 03885 03886 #ifdef BTREE_DEBUG 03887 if (debug) print(std::cout); 03888 #endif 03889 if (selfverify) verify(); 03890 03891 return true; 03892 } 03893 03894 private: 03895 03896 /// Recursively descend down the tree and dump each node in a precise order 03897 void dump_node(std::ostream &os, const node* n) const 03898 { 03899 BTREE_PRINT("dump_node " << n << std::endl); 03900 03901 if (n->isleafnode()) 03902 { 03903 const leaf_node *leaf = static_cast<const leaf_node*>(n); 03904 03905 os.write(reinterpret_cast<const char*>(leaf), sizeof(*leaf)); 03906 } 03907 else // !n->isleafnode() 03908 { 03909 const inner_node *inner = static_cast<const inner_node*>(n); 03910 03911 os.write(reinterpret_cast<const char*>(inner), sizeof(*inner)); 03912 03913 for(unsigned short slot = 0; slot <= inner->slotuse; ++slot) 03914 { 03915 const node *subnode = inner->childid[slot]; 03916 03917 dump_node(os, subnode); 03918 } 03919 } 03920 } 03921 03922 /// Read the dump image and construct a tree from the node order in the 03923 /// serialization. 03924 node* restore_node(std::istream &is) 03925 { 03926 union { 03927 node top; 03928 leaf_node leaf; 03929 inner_node inner; 03930 } nu; 03931 03932 // first read only the top of the node 03933 is.read(reinterpret_cast<char*>(&nu.top), sizeof(nu.top)); 03934 if (!is.good()) return NULL; 03935 03936 if (nu.top.isleafnode()) 03937 { 03938 // read remaining data of leaf node 03939 is.read(reinterpret_cast<char*>(&nu.leaf) + sizeof(nu.top), sizeof(nu.leaf) - sizeof(nu.top)); 03940 if (!is.good()) return NULL; 03941 03942 leaf_node *newleaf = allocate_leaf(); 03943 03944 // copy over all data, the leaf nodes contain only their double linked list pointers 03945 *newleaf = nu.leaf; 03946 03947 // reconstruct the linked list from the order in the file 03948 if (m_headleaf == NULL) { 03949 BTREE_ASSERT(newleaf->prevleaf == NULL); 03950 m_headleaf = m_tailleaf = newleaf; 03951 } 03952 else { 03953 newleaf->prevleaf = m_tailleaf; 03954 m_tailleaf->nextleaf = newleaf; 03955 m_tailleaf = newleaf; 03956 } 03957 03958 return newleaf; 03959 } 03960 else 03961 { 03962 // read remaining data of inner node 03963 is.read(reinterpret_cast<char*>(&nu.inner) + sizeof(nu.top), sizeof(nu.inner) - sizeof(nu.top)); 03964 if (!is.good()) return NULL; 03965 03966 inner_node *newinner = allocate_inner(0); 03967 03968 // copy over all data, the inner nodes contain only pointers to their children 03969 *newinner = nu.inner; 03970 03971 for(unsigned short slot = 0; slot <= newinner->slotuse; ++slot) 03972 { 03973 newinner->childid[slot] = restore_node(is); 03974 } 03975 03976 return newinner; 03977 } 03978 } 03979 }; 03980 03981 } // namespace stx 03982 03983 #endif // _STX_BTREE_H_