00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __LSHKIT_SKETCH__
00022 #define __LSHKIT_SKETCH__
00023
00104 #include <lshkit/matrix.h>
00105
00106
00107 namespace lshkit {
00108
00110 template <typename LSH, typename CHUNK = unsigned char>
00111 class Sketch
00112 {
00113 BOOST_CONCEPT_ASSERT((DeltaLshConcept<LSH>));
00114
00115 unsigned dim_;
00116 std::vector<LSH> lsh_;
00117
00118 public:
00120 typedef typename LSH::Parameter Parameter;
00122 typedef typename LSH::Domain Domain;
00124 static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8;
00125
00128 Sketch() {
00129 }
00130
00136 template <typename Engine>
00137 void reset (unsigned chunks, Parameter param, Engine &engine) {
00138 dim_ = chunks;
00139 lsh_.resize(dim_ * CHUNK_BIT);
00140 for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine);
00141 }
00142
00148 template <typename Engine>
00149 Sketch(unsigned chunks, Parameter param, Engine &engine) :
00150 dim_(chunks), lsh_(dim_ * CHUNK_BIT)
00151 {
00152 for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine);
00153 }
00154
00156 void apply (Domain in, CHUNK *out) const
00157 {
00158 unsigned l = 0;
00159 for (unsigned i = 0; i < dim_; ++i) {
00160 out[i] = 0;
00161 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00162 unsigned k = lsh_[l++](in);
00163 out[i] = out[i] | (k << j);
00164 }
00165 }
00166 }
00167
00169
00174 void apply (Domain in, CHUNK *out, float *asym) const
00175 {
00176 unsigned l = 0;
00177 for (unsigned i = 0; i < dim_; ++i) {
00178 out[i] = 0;
00179 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00180 unsigned k = lsh_[l](in, &asym[l]);
00181 out[i] = out[i] | (k << j);
00182 l++;
00183 }
00184 }
00185 }
00186
00188 template<class Archive>
00189 void serialize(Archive & ar, const unsigned int version)
00190 {
00191 ar & dim_;
00192 ar & lsh_;
00193 }
00194
00196 void load (std::istream &is) {
00197 serialize(is, 0);
00198 }
00199
00201 void save (std::ostream &os) {
00202 serialize(os, 0);
00203 }
00204
00206 unsigned getBits () const {
00207 return dim_ * CHUNK_BIT;
00208 }
00209
00211 unsigned getChunks () const {
00212 return dim_;
00213 }
00214 };
00215
00217
00228 template <typename CHUNK = unsigned char>
00229 class WeightedHammingHelper
00230 {
00231 public:
00232 static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8;
00234
00237 WeightedHammingHelper(unsigned chunks)
00238 : nchunk_(chunks), lookup_(1 << CHUNK_BIT, chunks)
00239 {
00240 }
00241
00243
00247 void update (const CHUNK *in, const float *asym)
00248 {
00249 unsigned l = 0;
00250 for (unsigned i = 0; i < nchunk_; ++i) {
00251 CHUNK q = in[i];
00252 CHUNK p = 0;
00253 for (;;) {
00254 CHUNK c = q ^ p;
00255 float v = 0;
00256 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00257 if (c & (1 << j)) {
00258 v += asym[l + j];
00259 }
00260 }
00261 lookup_[i][p] = v;
00262 ++p;
00263 if (p == 0) break;
00264 }
00265 l += CHUNK_BIT;
00266 }
00267 }
00268
00270 float distTo (const CHUNK *in)
00271 {
00272 float dist = 0;
00273 for (unsigned i = 0; i < nchunk_; ++i) {
00274 dist += lookup_[i][in[i]];
00275 }
00276 return dist;
00277 }
00278
00279 private:
00280 unsigned nchunk_;
00281 Matrix<float> lookup_;
00282 };
00283
00284
00285 }
00286
00287 #endif
00288