include/lshkit/sketch.h

Go to the documentation of this file.
00001 /* 
00002     Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved.
00003   
00004     This file is part of LSHKIT.
00005   
00006     LSHKIT is free software: you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation, either version 3 of the License, or
00009     (at your option) any later version.
00010 
00011     LSHKIT is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with LSHKIT.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020 
00021 #ifndef __LSHKIT_SKETCH__
00022 #define __LSHKIT_SKETCH__
00023 
00104 #include <lshkit/matrix.h>
00105 
00106 
00107 namespace lshkit {
00108 
00110 template <typename LSH, typename CHUNK = unsigned char>
00111 class Sketch
00112 {
00113     BOOST_CONCEPT_ASSERT((DeltaLshConcept<LSH>));
00114 
00115     unsigned dim_;
00116     std::vector<LSH> lsh_;
00117 
00118 public:
00120     typedef typename LSH::Parameter Parameter;
00122     typedef typename LSH::Domain Domain; 
00124     static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8; // #bits in CHUNK
00125 
00128     Sketch() {
00129     }
00130 
00136     template <typename Engine>
00137     void reset (unsigned chunks, Parameter param, Engine &engine) {
00138         dim_ = chunks;
00139         lsh_.resize(dim_ * CHUNK_BIT);
00140         for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine);
00141     }
00142 
00148     template <typename Engine>
00149     Sketch(unsigned chunks, Parameter param, Engine &engine) : 
00150         dim_(chunks), lsh_(dim_ * CHUNK_BIT)
00151     {
00152         for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine);
00153     }
00154 
00156     void apply (Domain in, CHUNK *out) const
00157     {
00158         unsigned l = 0;
00159         for (unsigned i = 0; i < dim_; ++i) {
00160             out[i] = 0;
00161             for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00162                 unsigned k = lsh_[l++](in);
00163                 out[i] = out[i] | (k << j);
00164             }
00165         }
00166     }
00167     
00169 
00174     void apply (Domain in, CHUNK *out, float *asym) const
00175     {
00176         unsigned l = 0;
00177         for (unsigned i = 0; i < dim_; ++i) {
00178             out[i] = 0;
00179             for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00180                 unsigned k = lsh_[l](in, &asym[l]);
00181                 out[i] = out[i] | (k << j);
00182                 l++;
00183             }
00184         }
00185     }
00186 
00188     template<class Archive>
00189     void serialize(Archive & ar, const unsigned int version)
00190     {
00191         ar & dim_;
00192         ar & lsh_;
00193     }
00194 
00196     void load (std::istream &is) {
00197         serialize(is, 0);
00198     }
00199 
00201     void save (std::ostream &os) {
00202         serialize(os, 0);
00203     }
00204 
00206     unsigned getBits () const {
00207         return dim_ * CHUNK_BIT;
00208     }
00209 
00211     unsigned getChunks () const {
00212         return dim_;
00213     }
00214 };
00215 
00217 
00228 template <typename CHUNK = unsigned char>
00229 class WeightedHammingHelper
00230 {
00231 public:
00232     static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8;
00234 
00237     WeightedHammingHelper(unsigned chunks)
00238         : nchunk_(chunks), lookup_(1 << CHUNK_BIT, chunks)
00239     {
00240     }
00241 
00243 
00247     void update (const CHUNK *in, const float *asym)
00248     {
00249         unsigned l = 0;
00250         for (unsigned i = 0; i < nchunk_; ++i) {
00251             CHUNK q = in[i];
00252             CHUNK p = 0;
00253             for (;;) {
00254                 CHUNK c = q ^ p;
00255                 float v = 0;
00256                 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00257                     if (c & (1 << j)) {
00258                         v += asym[l + j];
00259                     }
00260                 }
00261                 lookup_[i][p] = v;
00262                 ++p;
00263                 if (p == 0) break;
00264             }
00265             l += CHUNK_BIT;
00266         }
00267     }
00268 
00270     float distTo (const CHUNK *in)
00271     {
00272         float dist = 0;
00273         for (unsigned i = 0; i < nchunk_; ++i) {
00274             dist += lookup_[i][in[i]];
00275         }
00276         return dist;
00277     }
00278 
00279 private:
00280     unsigned nchunk_;
00281     Matrix<float> lookup_;
00282 };
00283 
00284 
00285 }
00286 
00287 #endif
00288 

Get LSHKIT at SourceForge.net. Fast, secure and Free Open Source software downloads doxygen