Fairport  v1.0.38
fairport/ltp/heap.h
Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 #ifndef FAIRPORT_LTP_HEAP_H
00007 #define FAIRPORT_LTP_HEAP_H
00008 
00009 #include <vector>
00010 #include <algorithm>
00011 #include <boost/noncopyable.hpp>
00012 #include <boost/iostreams/concepts.hpp>
00013 #ifdef _MSC_VER
00014 #pragma warning(push)
00015 #pragma warning(disable:4244)
00016 #endif
00017 #include <boost/iostreams/stream.hpp>
00018 #ifdef _MSC_VER
00019 #pragma warning(pop)
00020 #endif
00021 
00022 #if __GNUC__
00023 #include <tr1/memory>
00024 #endif
00025 
00026 #include "fairport/util/primitives.h"
00027 
00028 #include "fairport/disk/disk.h"
00029 
00030 #include "fairport/ndb/node.h"
00031 
00032 #ifdef _MSC_VER
00033 #pragma warning(push)
00034 #pragma warning(disable:4250)
00035 #endif
00036 
00037 namespace fairport
00038 {
00039 
00042 
00043 template<typename K, typename V>
00044 class bth_nonleaf_node;
00045 
00046 template<typename K, typename V>
00047 class bth_leaf_node;
00048 
00049 template<typename K, typename V>
00050 class bth_node;
00051 
00052 class heap_impl;
00053 typedef std::tr1::shared_ptr<heap_impl> heap_ptr;
00054 
00061 class hid_stream_device : public boost::iostreams::device<boost::iostreams::input_seekable>
00062 {
00063 public:
00064     hid_stream_device() : m_pos(0), m_hid(0) { }
00066     std::streamsize read(char* pbuffer, std::streamsize n);
00068     std::streampos seek(boost::iostreams::stream_offset off, std::ios_base::seekdir way);
00069 
00070 private:
00071     friend class heap_impl;
00072     hid_stream_device(const heap_ptr& _heap, heap_id id) : m_pos(0), m_hid(id), m_pheap(_heap) { }
00073 
00074     std::streamsize m_pos;
00075     heap_id m_hid;
00076     heap_ptr m_pheap;
00077 };
00078 
00082 typedef boost::iostreams::stream<hid_stream_device> hid_stream;
00083 
00093 class heap_impl : public std::tr1::enable_shared_from_this<heap_impl>
00094 {
00095 public:
00100     size_t size(heap_id id) const;
00101     
00104     uint get_page_count() const { return m_node.get_page_count(); }
00105     
00115     heap_id get_root_id() const;
00116     
00124     byte get_client_signature() const;
00125     
00132     size_t read(std::vector<byte>& buffer, heap_id id, ulong offset) const;
00133     
00138     std::vector<byte> read(heap_id id) const;
00139     
00150     hid_stream_device open_stream(heap_id id);
00153     const node& get_node() const { return m_node; }
00156     node& get_node() { return m_node; }
00157 
00163     template<typename K, typename V>
00164     std::tr1::shared_ptr<bth_node<K,V> > open_bth(heap_id root);
00165 
00166     friend class heap;
00167 
00168 private:
00169     heap_impl();
00170     explicit heap_impl(const node& n);
00171     heap_impl(const node& n, byte client_sig);
00172     heap_impl(const heap_impl& other) 
00173         : m_node(other.m_node) { }
00174 
00175     node m_node;
00176 };
00177 
00195 class heap
00196 {
00197 public:
00200     explicit heap(const node& n)
00201         : m_pheap(new heap_impl(n)) { }
00206     heap(const node& n, byte client_sig)
00207         : m_pheap(new heap_impl(n, client_sig)) { }
00210     heap(const heap& other)
00211         : m_pheap(other.m_pheap) { }
00212 
00213 #ifndef BOOST_NO_RVALUE_REFERENCES
00214 
00215 
00216     heap(heap&& other)
00217         : m_pheap(std::move(other.m_pheap)) { }
00218 #endif
00219 
00221     size_t size(heap_id id) const
00222         { return m_pheap->size(id); }
00224     heap_id get_root_id() const
00225         { return m_pheap->get_root_id(); }
00227     byte get_client_signature() const
00228         { return m_pheap->get_client_signature(); }
00230     size_t read(std::vector<byte>& buffer, heap_id id, ulong offset) const
00231         { return m_pheap->read(buffer, id, offset); }
00233     std::vector<byte> read(heap_id id) const
00234         { return m_pheap->read(id); }
00236     hid_stream_device open_stream(heap_id id)
00237         { return m_pheap->open_stream(id); }
00238 
00240     const node& get_node() const
00241         { return m_pheap->get_node(); }
00243     node& get_node()
00244         { return m_pheap->get_node(); }
00245     
00247     template<typename K, typename V>
00248     std::tr1::shared_ptr<bth_node<K,V> > open_bth(heap_id root)
00249         { return m_pheap->open_bth<K,V>(root); }
00250 
00251 private:
00252     heap& operator=(const heap& other); // = delete
00253     heap_ptr m_pheap;
00254 };
00255 
00258 
00277 template<typename K, typename V>
00278 class bth_node : 
00279     public virtual btree_node<K,V>, 
00280     private boost::noncopyable
00281 {
00282 public:
00288     static std::tr1::shared_ptr<bth_node<K,V> > open_root(const heap_ptr& h, heap_id bth_root);
00293     static std::tr1::shared_ptr<bth_nonleaf_node<K,V> > open_nonleaf(const heap_ptr& h, heap_id id, ushort level);
00297     static std::tr1::shared_ptr<bth_leaf_node<K,V> > open_leaf(const heap_ptr& h, heap_id id);
00298 
00303     bth_node(const heap_ptr& h, heap_id id, ushort level)
00304         : m_heap(h), m_id(id), m_level(level) { }
00305     virtual ~bth_node() { }
00306 
00309     heap_id get_id() const { return m_id; }
00312     ushort get_level() const { return m_level; }
00315     size_t get_key_size() const { return sizeof(K); }
00318     size_t get_value_size() const { return sizeof(V); }
00319 
00322     const heap_ptr get_heap_ptr() const { return m_heap; }
00325     heap_ptr get_heap_ptr() { return m_heap; }
00326 
00329     const node& get_node() const { return m_heap->get_node(); }
00332     node& get_node() { return m_heap->get_node(); }
00333 
00334 protected:
00335     heap_ptr m_heap;
00336 
00337 private:
00338     heap_id m_id;   
00339     ushort m_level; 
00340 };
00341 
00347 template<typename K, typename V>
00348 class bth_nonleaf_node : 
00349     public bth_node<K,V>, 
00350     public btree_node_nonleaf<K,V>
00351 {
00352 public:
00358 #ifndef BOOST_NO_RVALUE_REFERENCES
00359     bth_nonleaf_node(const heap_ptr& h, heap_id id, ushort level, std::vector<std::pair<K, heap_id> > bth_info)
00360         : bth_node<K,V>(h, id, level), m_bth_info(std::move(bth_info)), m_child_nodes(m_bth_info.size()) { }
00361 #else
00362     bth_nonleaf_node(const heap_ptr& h, heap_id id, ushort level, const std::vector<std::pair<K, heap_id> >& bth_info)
00363         : bth_node<K,V>(h, id, level), m_bth_info(bth_info), m_child_nodes(m_bth_info.size()) { }
00364 #endif
00365     // btree_node_nonleaf implementation
00366     const K& get_key(uint pos) const { return m_bth_info[pos].first; }
00367     bth_node<K,V>* get_child(uint pos);
00368     const bth_node<K,V>* get_child(uint pos) const;
00369     uint num_values() const { return m_child_nodes.size(); }
00370 
00371 private:
00372     std::vector<std::pair<K, heap_id> > m_bth_info;
00373     mutable std::vector<std::tr1::shared_ptr<bth_node<K,V> > > m_child_nodes;
00374 };
00375 
00381 template<typename K, typename V>
00382 class bth_leaf_node : 
00383     public bth_node<K,V>, 
00384     public btree_node_leaf<K,V>
00385 {
00386 public:
00391 #ifndef BOOST_NO_RVALUE_REFERENCES
00392     bth_leaf_node(const heap_ptr& h, heap_id id, std::vector<std::pair<K,V> > data)
00393         : bth_node<K,V>(h, id, 0), m_bth_data(std::move(data)) { }
00394 #else
00395     bth_leaf_node(const heap_ptr& h, heap_id id, const std::vector<std::pair<K,V> >& data)
00396         : bth_node<K,V>(h, id, 0), m_bth_data(data) { }
00397 #endif
00398 
00399     virtual ~bth_leaf_node() { }
00400 
00401     // btree_node_leaf implementation
00402     const V& get_value(uint pos) const
00403         { return m_bth_data[pos].second; }
00404     const K& get_key(uint pos) const
00405         { return m_bth_data[pos].first; }
00406     uint num_values() const
00407         { return m_bth_data.size(); }
00408 
00409 private:
00410     std::vector<std::pair<K,V> > m_bth_data;
00411 };
00412 
00413 } // end fairport namespace
00414 
00415 template<typename K, typename V>
00416 inline std::tr1::shared_ptr<fairport::bth_node<K,V> > fairport::bth_node<K,V>::open_root(const heap_ptr& h, heap_id bth_root)
00417 {
00418     disk::bth_header* pheader;
00419     std::vector<byte> buffer(sizeof(disk::bth_header));
00420     pheader = (disk::bth_header*)&buffer[0];
00421 
00422     h->read(buffer, bth_root, 0);
00423 
00424 #ifdef FAIRPORT_VALIDATION_LEVEL_WEAK
00425     if(pheader->bth_signature != disk::heap_sig_bth)
00426         throw sig_mismatch("bth_signature expected", 0, bth_root, pheader->bth_signature, disk::heap_sig_bth);
00427 
00428     if(pheader->key_size != sizeof(K))
00429         throw std::logic_error("invalid key size");
00430 
00431     if(pheader->entry_size != sizeof(V))
00432         throw std::logic_error("invalid entry size");
00433 #endif
00434 
00435     if(pheader->num_levels > 0)
00436         return open_nonleaf(h, pheader->root, pheader->num_levels);
00437     else
00438         return open_leaf(h, pheader->root);
00439 }
00440 
00441 template<typename K, typename V>
00442 inline std::tr1::shared_ptr<fairport::bth_nonleaf_node<K,V> > fairport::bth_node<K,V>::open_nonleaf(const heap_ptr& h, heap_id id, ushort level)
00443 {
00444     uint num_entries = h->size(id) / sizeof(disk::bth_nonleaf_entry<K>);
00445     std::vector<byte> buffer(h->size(id));
00446     disk::bth_nonleaf_node<K>* pbth_nonleaf_node = (disk::bth_nonleaf_node<K>*)&buffer[0];
00447     std::vector<std::pair<K, heap_id> > child_nodes;
00448 
00449     h->read(buffer, id, 0);
00450 
00451     child_nodes.reserve(num_entries);
00452 
00453     for(uint i = 0; i < num_entries; ++i)
00454     {
00455         // Copy 'page' to a local variable so that it has standard
00456         // alignment before we pass it by reference to make_pair (required
00457         // by GCC on Mac and probably other platforms).
00458 #ifdef _MSC_VER
00459 #pragma warning(suppress:6385)
00460 #endif
00461         heap_id page = pbth_nonleaf_node->entries[i].page;
00462         child_nodes.push_back(std::make_pair(pbth_nonleaf_node->entries[i].key, page));
00463     }
00464 
00465 #ifndef BOOST_NO_RVALUE_REFERENCES
00466     return std::tr1::shared_ptr<bth_nonleaf_node<K,V> >(new bth_nonleaf_node<K,V>(h, id, level, std::move(child_nodes)));
00467 #else
00468     return std::tr1::shared_ptr<bth_nonleaf_node<K,V> >(new bth_nonleaf_node<K,V>(h, id, level, child_nodes));
00469 #endif
00470 }
00471 
00472 template<typename K, typename V>
00473 inline std::tr1::shared_ptr<fairport::bth_leaf_node<K,V> > fairport::bth_node<K,V>::open_leaf(const heap_ptr& h, heap_id id)
00474 {
00475     std::vector<std::pair<K, V> > entries; 
00476 
00477     if(id)
00478     {
00479         uint num_entries = h->size(id) / sizeof(disk::bth_leaf_entry<K,V>);
00480         std::vector<byte> buffer(h->size(id));
00481         disk::bth_leaf_node<K,V>* pbth_leaf_node = (disk::bth_leaf_node<K,V>*)&buffer[0];
00482 
00483         h->read(buffer, id, 0);
00484 
00485         entries.reserve(num_entries);
00486 
00487         for(uint i = 0; i < num_entries; ++i)
00488         {
00489 #ifdef _MSC_VER
00490 #pragma warning(suppress:6385)
00491 #endif
00492             entries.push_back(std::make_pair(pbth_leaf_node->entries[i].key, pbth_leaf_node->entries[i].value));
00493         }
00494 #ifndef BOOST_NO_RVALUE_REFERENCES
00495         return std::tr1::shared_ptr<bth_leaf_node<K,V> >(new bth_leaf_node<K,V>(h, id, std::move(entries)));
00496 #else
00497         return std::tr1::shared_ptr<bth_leaf_node<K,V> >(new bth_leaf_node<K,V>(h, id, entries));
00498 #endif
00499     }
00500     else
00501     {
00502         // id == 0 means an empty tree
00503         return std::tr1::shared_ptr<bth_leaf_node<K,V> >(new bth_leaf_node<K,V>(h, id, entries));
00504     }
00505 }
00506 
00507 template<typename K, typename V>
00508 inline fairport::bth_node<K,V>* fairport::bth_nonleaf_node<K,V>::get_child(uint pos)
00509 {
00510     if(m_child_nodes[pos] == NULL)
00511     {
00512         if(this->get_level() > 1)
00513             m_child_nodes[pos] = bth_node<K,V>::open_nonleaf(this->get_heap_ptr(), m_bth_info[pos].second, this->get_level()-1);
00514         else
00515             m_child_nodes[pos] = bth_node<K,V>::open_leaf(this->get_heap_ptr(), m_bth_info[pos].second);
00516     }
00517 
00518     return m_child_nodes[pos].get();
00519 }
00520 
00521 template<typename K, typename V>
00522 inline const fairport::bth_node<K,V>* fairport::bth_nonleaf_node<K,V>::get_child(uint pos) const
00523 {
00524     if(m_child_nodes[pos] == NULL)
00525     {
00526         if(this->get_level() > 1)
00527             m_child_nodes[pos] = bth_node<K,V>::open_nonleaf(this->get_heap_ptr(), m_bth_info[pos].second, this->get_level()-1);
00528         else
00529             m_child_nodes[pos] = bth_node<K,V>::open_leaf(this->get_heap_ptr(), m_bth_info[pos].second);
00530     }
00531 
00532     return m_child_nodes[pos].get();
00533 }
00534 
00535 inline fairport::heap_impl::heap_impl(const node& n)
00536 : m_node(n)
00537 {
00538     // need to throw if the node is smaller than first_header
00539     disk::heap_first_header first_header = m_node.read<disk::heap_first_header>(0);
00540 
00541 #ifdef FAIRPORT_VALIDATION_LEVEL_WEAK
00542     if(first_header.signature != disk::heap_signature)
00543         throw sig_mismatch("invalid heap_sig", 0, n.get_id(), first_header.signature, disk::heap_signature);
00544 #endif
00545 }
00546 
00547 inline fairport::heap_impl::heap_impl(const node& n, byte client_sig)
00548 : m_node(n)
00549 {
00550     // need to throw if the node is smaller than first_header
00551     disk::heap_first_header first_header = m_node.read<disk::heap_first_header>(0);
00552 
00553 #ifdef FAIRPORT_VALIDATION_LEVEL_WEAK
00554     if(first_header.signature != disk::heap_signature)
00555         throw sig_mismatch("invalid heap_sig", 0, n.get_id(), first_header.signature, disk::heap_signature);
00556 #endif
00557     if(first_header.client_signature != client_sig)
00558         throw sig_mismatch("invalid client_sig", 0, n.get_id(), first_header.client_signature, client_sig);
00559 }
00560 
00561 inline fairport::heap_id fairport::heap_impl::get_root_id() const
00562 {
00563     disk::heap_first_header first_header = m_node.read<disk::heap_first_header>(0);
00564     return first_header.root_id;
00565 }
00566 
00567 inline fairport::byte fairport::heap_impl::get_client_signature() const
00568 {
00569     disk::heap_first_header first_header = m_node.read<disk::heap_first_header>(0);
00570     return first_header.client_signature;
00571 }
00572 
00573 inline size_t fairport::heap_impl::size(heap_id id) const
00574 {
00575     if(id == 0)
00576         return 0;
00577 
00578     disk::heap_page_header header = m_node.read<disk::heap_page_header>(get_heap_page(id), 0);
00579 
00580 #ifdef FAIRPORT_VALIDATION_LEVEL_WEAK
00581     if(header.page_map_offset > m_node.get_page_size(get_heap_page(id)))
00582         throw std::length_error("page_map_offset > node size");
00583 #endif
00584 
00585     std::vector<byte> buffer(m_node.get_page_size(get_heap_page(id)) - header.page_map_offset);
00586     m_node.read(buffer, get_heap_page(id), header.page_map_offset);
00587     disk::heap_page_map* pmap = reinterpret_cast<disk::heap_page_map*>(&buffer[0]);
00588 
00589 #ifdef FAIRPORT_VALIDATION_LEVEL_WEAK
00590     if(get_heap_index(id) > pmap->num_allocs)
00591         throw std::length_error("index > num_allocs");
00592 #endif
00593 
00594     return pmap->allocs[get_heap_index(id) + 1] - pmap->allocs[get_heap_index(id)];
00595 }
00596 
00597 inline size_t fairport::heap_impl::read(std::vector<byte>& buffer, heap_id id, ulong offset) const
00598 {
00599     size_t hid_size = size(id);
00600 
00601 #ifdef FAIRPORT_VALIDATION_LEVEL_WEAK
00602     if(buffer.size() > hid_size)
00603         throw std::length_error("buffer.size() > size()");
00604 
00605     if(offset > hid_size)
00606         throw std::length_error("offset > size()");
00607 
00608     if(offset + buffer.size() > hid_size)
00609         throw std::length_error("size + offset > size()");
00610 #endif
00611 
00612     if(hid_size == 0)
00613         return 0;
00614 
00615     disk::heap_page_header header = m_node.read<disk::heap_page_header>(get_heap_page(id), 0);
00616     std::vector<byte> map_buffer(m_node.get_page_size(get_heap_page(id)) - header.page_map_offset);
00617     m_node.read(map_buffer, get_heap_page(id), header.page_map_offset);
00618     disk::heap_page_map* pmap = reinterpret_cast<disk::heap_page_map*>(&map_buffer[0]);
00619 
00620     return m_node.read(buffer, get_heap_page(id), pmap->allocs[get_heap_index(id)] + offset);
00621 }
00622 
00623 inline fairport::hid_stream_device fairport::heap_impl::open_stream(heap_id id)
00624 {
00625     return hid_stream_device(shared_from_this(), id);
00626 }
00627 
00628 inline std::streamsize fairport::hid_stream_device::read(char* pbuffer, std::streamsize n)
00629 {
00630     if(m_hid && (static_cast<size_t>(m_pos) + n > m_pheap->size(m_hid)))
00631         n = m_pheap->size(m_hid) - m_pos;
00632 
00633     if(n == 0 || m_hid == 0)
00634         return -1;
00635 
00636     std::vector<byte> buff(static_cast<uint>(n));
00637     size_t read = m_pheap->read(buff, m_hid, static_cast<ulong>(m_pos));
00638 
00639     memcpy(pbuffer, &buff[0], read);
00640 
00641     m_pos += read;
00642 
00643     return read;
00644 }
00645 
00646 inline std::streampos fairport::hid_stream_device::seek(boost::iostreams::stream_offset off, std::ios_base::seekdir way)
00647 {
00648 #if defined(_MSC_VER) && (_MSC_VER < 1600)
00649 #pragma warning(push)
00650 #pragma warning(disable:4244)
00651 #endif
00652     if(way == std::ios_base::beg)
00653         m_pos = off;
00654     else if(way == std::ios_base::end)
00655         m_pos = m_pheap->size(m_hid) + off;
00656     else
00657         m_pos += off;
00658 #if defined(_MSC_VER) && (_MSC_VER < 1600)
00659 #pragma warning(pop)
00660 #endif
00661 
00662     if(m_pos < 0)
00663         m_pos = 0;
00664     else if(static_cast<size_t>(m_pos) > m_pheap->size(m_hid))
00665         m_pos = m_pheap->size(m_hid);
00666 
00667     return m_pos;
00668 }
00669 
00670 inline std::vector<fairport::byte> fairport::heap_impl::read(heap_id id) const
00671 {
00672     std::vector<byte> result(size(id));
00673     read(result, id, 0);
00674     return result;
00675 }
00676 
00677 template<typename K, typename V>
00678 inline std::tr1::shared_ptr<fairport::bth_node<K,V> > fairport::heap_impl::open_bth(heap_id root)
00679 { 
00680     return bth_node<K,V>::open_root(shared_from_this(), root); 
00681 }
00682 
00683 #ifdef _MSC_VER
00684 #pragma warning(pop)
00685 #endif
00686 
00687 #endif