00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00025 #ifndef __LSHKIT_FLAT__
00026 #define __LSHKIT_FLAT__
00027
00028 #include <stdexcept>
00029 #include <algorithm>
00030 #include <lshkit/common.h>
00031 #include <lshkit/topk.h>
00032 #include <lshkit/archive.h>
00033
00034 namespace lshkit {
00035
00037
00044 template <typename LSH, typename KEY>
00045 class LshIndex
00046 {
00047
00048 BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00049 public:
00050 typedef typename LSH::Parameter Parameter;
00051 typedef typename LSH::Domain Domain;
00052 typedef KEY Key;
00053
00054 protected:
00055 typedef std::vector<Key> Bin;
00056
00057 std::vector<LSH> lshs_;
00058 std::vector<std::vector<Bin> > tables_;
00059
00060 public:
00062 LshIndex() {
00063 }
00064
00066
00072 template <typename Engine>
00073 void init (const Parameter ¶m, Engine &engine, unsigned L)
00074 {
00075 verify(lshs_.size() == 0);
00076 verify(tables_.size() == 0);
00077 lshs_.resize(L);
00078 tables_.resize(L);
00079
00080 for (unsigned i = 0; i < L; ++i) {
00081 lshs_[i].reset(param, engine);
00082 if (lshs_[i].getRange() == 0) {
00083 throw std::logic_error("LSH with unlimited range should not be used to construct an LSH index. Use lshkit::Tail<> to wrapp the LSH.");
00084 }
00085 tables_[i].resize(lshs_[i].getRange());
00086 }
00087 }
00088
00090
00091 void load (std::istream &ar)
00092 {
00093 unsigned L;
00094 ar & L;
00095 lshs_.resize(L);
00096 tables_.resize(L);
00097 for (unsigned i = 0; i < L; ++i) {
00098 lshs_[i].serialize(ar, 0);
00099 unsigned l;
00100 ar & l;
00101 std::vector<Bin> &table = tables_[i];
00102 table.resize(l);
00103 for (;;) {
00104 unsigned idx, ll;
00105 ar & idx;
00106 ar & ll;
00107 if (ll == 0) break;
00108 table[idx].resize(ll);
00109 ar.read((char *)&table[idx][0], ll * sizeof(Key));
00110 }
00111 }
00112 }
00113
00115 void save (std::ostream &ar)
00116 {
00117 unsigned L;
00118 L = lshs_.size();
00119 ar & L;
00120 for (unsigned i = 0; i < L; ++i) {
00121 lshs_[i].serialize(ar, 0);
00122 std::vector<Bin> &table = tables_[i];
00123 unsigned l = table.size();
00124 ar & l;
00125 unsigned idx, ll;
00126 for (unsigned j = 0; j < l; ++j) {
00127 if (table[j].empty()) continue;
00128 idx = j;
00129 ll = table[j].size();
00130 ar & idx;
00131 ar & ll;
00132 ar.write((char *)&table[j][0], ll * sizeof(Key));
00133 }
00134 idx = ll = 0;
00135 ar & idx;
00136 ar & ll;
00137 }
00138 }
00139
00141
00148 void insert (Key key, Domain value)
00149 {
00150 for (unsigned i = 0; i < lshs_.size(); ++i) {
00151 unsigned index = lshs_[i](value);
00152 tables_[i][index].push_back(key);
00153 }
00154 }
00155
00157
00164 template <typename SCANNER>
00165 void query (Domain obj, SCANNER &scanner) const
00166 {
00167 for (unsigned i = 0; i < lshs_.size(); ++i) {
00168 unsigned index = lshs_[i](obj);
00169 BOOST_FOREACH(Key key, tables_[i][index]) {
00170 scanner(key);
00171 }
00172 }
00173 }
00174 };
00175
00176
00177 }
00178
00179 #endif
00180