include/lshkit/lsh-index.h

Go to the documentation of this file.
00001 /* 
00002     Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved.
00003   
00004     This file is part of LSHKIT.
00005   
00006     LSHKIT is free software: you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation, either version 3 of the License, or
00009     (at your option) any later version.
00010 
00011     LSHKIT is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with LSHKIT.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00025 #ifndef __LSHKIT_FLAT__
00026 #define __LSHKIT_FLAT__
00027 
00028 #include <stdexcept>
00029 #include <algorithm>
00030 #include <lshkit/common.h>
00031 #include <lshkit/topk.h>
00032 #include <lshkit/archive.h>
00033 
00034 namespace lshkit {
00035 
00037 
00044 template <typename LSH, typename KEY>
00045 class LshIndex
00046 {
00047 
00048     BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00049 public:
00050     typedef typename LSH::Parameter Parameter;
00051     typedef typename LSH::Domain Domain;
00052     typedef KEY Key;
00053 
00054 protected:
00055     typedef std::vector<Key> Bin;
00056 
00057     std::vector<LSH> lshs_;
00058     std::vector<std::vector<Bin> > tables_;
00059 
00060 public:
00062     LshIndex() {
00063     }
00064 
00066 
00072     template <typename Engine>
00073     void init (const Parameter &param, Engine &engine, unsigned L)
00074     {
00075         verify(lshs_.size() == 0);
00076         verify(tables_.size() == 0);
00077         lshs_.resize(L);
00078         tables_.resize(L);
00079 
00080         for (unsigned i = 0; i < L; ++i) {
00081             lshs_[i].reset(param, engine);
00082             if (lshs_[i].getRange() == 0) {
00083                 throw std::logic_error("LSH with unlimited range should not be used to construct an LSH index.  Use lshkit::Tail<> to wrapp the LSH.");
00084             }
00085             tables_[i].resize(lshs_[i].getRange());
00086         }
00087     }
00088 
00090 
00091     void load (std::istream &ar)
00092     {
00093         unsigned L;
00094         ar & L;
00095         lshs_.resize(L);
00096         tables_.resize(L);
00097         for (unsigned i = 0; i < L; ++i) {
00098             lshs_[i].serialize(ar, 0);
00099             unsigned l;
00100             ar & l;
00101             std::vector<Bin> &table = tables_[i];
00102             table.resize(l);
00103             for (;;) {
00104                 unsigned idx, ll;
00105                 ar & idx;
00106                 ar & ll;
00107                 if (ll == 0) break;
00108                 table[idx].resize(ll);
00109                 ar.read((char *)&table[idx][0], ll * sizeof(Key));
00110             }
00111         }
00112     }
00113 
00115     void save (std::ostream &ar)
00116     {
00117         unsigned L;
00118         L = lshs_.size();
00119         ar & L;
00120         for (unsigned i = 0; i < L; ++i) {
00121             lshs_[i].serialize(ar, 0);
00122             std::vector<Bin> &table = tables_[i];
00123             unsigned l = table.size();
00124             ar & l;
00125             unsigned idx, ll;
00126             for (unsigned j = 0; j < l; ++j) {
00127                 if (table[j].empty()) continue;
00128                 idx = j;
00129                 ll = table[j].size();
00130                 ar & idx;
00131                 ar & ll;
00132                 ar.write((char *)&table[j][0], ll * sizeof(Key));
00133             }
00134             idx = ll = 0;
00135             ar & idx;
00136             ar & ll;
00137         }
00138     }
00139 
00141 
00148     void insert (Key key, Domain value)
00149     {
00150         for (unsigned i = 0; i < lshs_.size(); ++i) {
00151             unsigned index = lshs_[i](value);
00152             tables_[i][index].push_back(key);
00153         }
00154     }
00155 
00157 
00164     template <typename SCANNER>
00165     void query (Domain obj, SCANNER &scanner) const
00166     {
00167         for (unsigned i = 0; i < lshs_.size(); ++i) {
00168             unsigned index = lshs_[i](obj);
00169             BOOST_FOREACH(Key key, tables_[i][index]) {
00170                 scanner(key);
00171             }
00172         }
00173     }
00174 };
00175 
00176 
00177 }
00178 
00179 #endif
00180 

Get LSHKIT at SourceForge.net. Fast, secure and Free Open Source software downloads doxygen