include/lshkit/composite.h

Go to the documentation of this file.
00001 /* 
00002     Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved.
00003   
00004     This file is part of LSHKIT.
00005   
00006     LSHKIT is free software: you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation, either version 3 of the License, or
00009     (at your option) any later version.
00010 
00011     LSHKIT is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with LSHKIT.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020 #ifndef __LSHKIT_COMPOSITE__
00021 #define __LSHKIT_COMPOSITE__
00022 
00050 namespace lshkit {
00051 
00053 
00069 template <typename LSH>
00070 class Tail
00071 {
00072     BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00073 protected:
00074     LSH lsh_;
00075     unsigned range_;
00076 public:
00078     typedef LSH Super;
00082     struct Parameter: public Super::Parameter {
00084         unsigned range;
00085     };
00086 
00087     typedef typename Super::Domain Domain;
00088 
00089     Tail() {
00090     }
00091 
00092     template <typename RNG>
00093     void reset(const Parameter &param, RNG &rng) {
00094         range_ = param.range;
00095         lsh_.reset(param, rng);
00096     }
00097 
00098     template <typename RNG>
00099     Tail(const Parameter &param, RNG &rng) {
00100         reset(param, rng);
00101     }
00102 
00103     unsigned getRange () const {
00104         return range_;
00105     }
00106 
00107     unsigned operator () (Domain obj) const {
00108         return lsh_(obj) % range_;
00109     }
00110 
00111     template<class Archive>
00112     void serialize(Archive & ar, const unsigned int version) {
00113         ar & lsh_;
00114         ar & range_;
00115     }
00116 };
00117 
00119 
00124 template <typename LSH, unsigned range>
00125 class FixedTail
00126 {
00127     BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00128 protected:
00129     LSH lsh_;
00130 public:
00131     typedef LSH Super;
00132 
00136     struct Parameter: public Super::Parameter {
00137     };
00138 
00139     typedef typename Super::Domain Domain;
00140 
00141     FixedTail()
00142     {
00143     }
00144 
00145     template <typename RNG>
00146     void reset(const Parameter &param, RNG &rng)
00147     {
00148         lsh_.reset(param, rng);
00149     }
00150 
00151     template <typename RNG>
00152     FixedTail(const Parameter &param, RNG &rng)
00153     {
00154         reset(param, rng);
00155     }
00156 
00157     unsigned getRange () const
00158     {
00159         return range;
00160     }
00161 
00162     unsigned operator () (Domain obj) const
00163     {
00164         return lsh_(obj) % range;
00165     }
00166 
00167     const LSH &getLsh () const
00168     {
00169         return lsh_;
00170     }
00171 
00172     template<class Archive>
00173     void serialize(Archive & ar, const unsigned int version)
00174     {
00175         ar & lsh_;
00176     }
00177 };
00178 
00180 
00185 template <typename LSH>
00186 class LSB: public FixedTail<LSH, 2>
00187 {
00188 public:
00189     typedef FixedTail<LSH,2> Super;
00193     struct Parameter: public Super::Parameter {
00194     };
00195 
00196     typedef typename Super::Domain Domain;
00197 
00198     LSB () {}
00199 
00200     template <typename RNG>
00201     LSB(const Parameter &param, RNG &rng)
00202         : FixedTail<LSH, 2>(param, rng)
00203     {
00204     }
00205 };
00206 
00208 
00211 template <typename LSH>
00212 class DeltaLSB: public FixedTail<LSH, 2>
00213 {
00214     BOOST_CONCEPT_ASSERT((DeltaLshConcept<LSH>));
00215 
00216 public:
00217     typedef FixedTail<LSH, 2> Super;
00221     struct Parameter: public Super::Parameter {
00222     };
00223 
00224     typedef typename Super::Domain Domain;
00225 
00226     DeltaLSB() {}
00227 
00228     template <typename RNG>
00229     DeltaLSB(const Parameter &param, RNG &rng)
00230         : Super(param, rng)
00231     {
00232     }
00233 
00234     unsigned operator () (Domain obj) const
00235     {
00236         return Super::operator () (obj);
00237     }
00238 
00239     unsigned operator () (Domain obj, float *delta) const
00240     {
00241         float d;
00242         unsigned r = Super::getLsh()(obj, &d);
00243         *delta = min(d, 1.0F - d);
00244         return r % Super::getRange();
00245     }
00246 };
00247 
00249 
00273 template <typename LSH>
00274 class Repeat
00275 {
00276     BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00277 
00278 public:
00279     typedef LSH Super;
00283     struct Parameter: public Super::Parameter {
00285         unsigned repeat;
00286     };
00287 
00288     typedef typename Super::Domain Domain;
00289 
00290     Repeat ()
00291     {
00292     }
00293 
00294     template <typename RNG>
00295     void reset(const Parameter &param, RNG &rng)
00296     {
00297         dup_ = param.repeat;
00298         assert(dup_ > 0);
00299         lsh_.resize(dup_);
00300         lsh_[0].reset(param, rng);
00301         range_ = unit_ = lsh_[0].getRange();
00302         assert(unit_ > 0);
00303         assert( (unsigned)(1 << (sizeof(unsigned) * 8 / dup_)) >= unit_);
00304         for (unsigned i = 1; i < dup_; ++i)
00305         {
00306             lsh_[i].reset(param, rng);
00307             assert(unit_ == lsh_[i].getRange());
00308             range_ *= unit_;
00309         }
00310     }
00311 
00312     template <typename RNG>
00313     Repeat(const Parameter &param, RNG &rng)
00314     {
00315         reset(param, rng);
00316     }
00317 
00318     unsigned getRange () const
00319     {
00320         return range_;
00321     }
00322 
00323     unsigned operator () (Domain obj) const
00324     {
00325         unsigned ret = 0;
00326         for (unsigned i = 0; i < dup_; ++i)
00327         {
00328             ret *= unit_;
00329             ret += lsh_[i](obj);
00330         }
00331         return ret;
00332     }
00333 
00334     template<class Archive>
00335     void serialize(Archive & ar, const unsigned int version)
00336     {
00337         ar & dup_;
00338         ar & range_;
00339         ar & unit_;
00340         ar & lsh_;
00341     }
00342 
00343 protected:
00344     std::vector<Super> lsh_;
00345     unsigned dup_;
00346     unsigned range_;
00347     unsigned unit_;
00348 
00349 };
00350 
00351 
00353 
00370 template <typename LSH>
00371 class RepeatHash
00372 {
00373     BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00374 
00375 public:
00376     typedef LSH Super;
00380     struct Parameter: public Super::Parameter {
00382         unsigned repeat;
00383     };
00384 
00385     typedef typename LSH::Domain Domain;
00386 
00387     RepeatHash ()
00388     {
00389     }
00390 
00391     template <typename RNG>
00392     void reset(const Parameter &param, RNG &rng)
00393     {
00394         assert(param.repeat > 0);
00395         lsh_.resize(param.repeat);
00396         for (unsigned i = 0; i < param.repeat; ++i)
00397         {
00398             lsh_[i].reset(param, rng);
00399         }
00400         a_.resize(param.repeat);
00401 
00402         for (unsigned i = 0; i < param.repeat; ++i) a_[i] = rng();
00403     }
00404 
00405     template <typename RNG>
00406     RepeatHash(const Parameter &param, RNG &rng)
00407     {
00408         reset(param, rng);
00409     }
00410 
00411     unsigned getRange () const
00412     {
00413         return 0;
00414     }
00415 
00416     unsigned operator () (Domain obj) const
00417     {
00418         unsigned ret = 0;
00419         for (unsigned i = 0; i < lsh_.size(); ++i)
00420         {
00421             ret += lsh_[i](obj) * a_[i];
00422         }
00423         return ret;
00424     }
00425 
00426     template<class Archive>
00427     void serialize(Archive & ar, const unsigned int version)
00428     {
00429         ar & lsh_;
00430         ar & a_;
00431         assert(a_.size() == lsh_.size());
00432     }
00433 protected:
00434     std::vector<Super> lsh_;
00435     std::vector<unsigned> a_;
00436 
00437 };
00438 
00439 
00441 /*
00442  * The XOR of a number of 1-bit LSHes has higher locality sensitivity than
00443  * the original LSH.  This serves a similar purpose as RepeatHash.
00444  *
00445  * The new parameter is defined as:
00446  *      \code
00447  *      struct Parameter {
00448  *          unsigned xor_;     // # of LSHes to XOR
00449  *          ...                  // all parameters of the base LSH are inherited.
00450  *      };
00451  *      \endcode
00452  */
00453 template <typename LSH>
00454 class Xor
00455 {
00456     BOOST_CONCEPT_ASSERT((LshConcept<LSH>));
00457 public:
00458 
00459     typedef LSH Super;
00463     struct Parameter: public Super::Parameter {
00464         unsigned xor_; // xor is a keyword
00465     };
00466 
00467     typedef typename Super::Domain Domain;
00468 
00469     Xor ()
00470     {
00471     }
00472 
00473     template <typename RNG>
00474     void reset(const Parameter &param, RNG &rng)
00475     {
00476         lsh_.resize(param.xor_);
00477         for (unsigned i = 0; i < lsh_.size(); ++i)
00478         {
00479             lsh_[i].reset(param, rng);
00480             assert(lsh_[i].getRange() == 2);
00481         }
00482     }
00483 
00484     template <typename RNG>
00485     Xor(const Parameter &param, RNG &rng)
00486     {
00487         reset(param, rng);
00488     }
00489 
00490     unsigned getRange () const
00491     {
00492         return 2;
00493     }
00494 
00495     unsigned operator () (Domain obj) const
00496     {
00497         unsigned ret = 0;
00498         for (unsigned i = 0; i < lsh_.size(); ++i)
00499         {
00500             ret = ret ^ lsh_[i](obj);
00501         }
00502         return ret;
00503     }
00504 
00505     template<class Archive>
00506     void serialize(Archive & ar, const unsigned int version)
00507     {
00508         ar & lsh_;
00509     }
00510 protected:
00511     std::vector<Super> lsh_;
00512 };
00513 
00515 /*
00516  * This is essentially the same as XOR.  The delta of XOR is the
00517  * minimum of the deltas of all the original LSHes.
00518  */
00519 template <typename LSH>
00520 class DeltaXor: public Xor<LSH>
00521 {
00522 public:
00523     typedef Xor<LSH> Super;
00527     struct Parameter: public Super::Parameter  {
00528     };
00529 
00530     typedef typename Super::Domain Domain;
00531 
00532     DeltaXor ()
00533     {
00534     }
00535 
00536     template <typename RNG>
00537     DeltaXor(const Parameter &param, RNG &rng): Xor<LSH>(param, rng)
00538     {
00539     }
00540 
00541     unsigned operator () (Domain obj) const {
00542         return Super::operator () (obj);
00543     }
00544 
00545     unsigned operator () (Domain obj, float *delta) const
00546     {
00547         unsigned ret = 0;
00548         float m = std::numeric_limits<float>::max(), d;
00549         for (unsigned i = 0; i < Xor<LSH>::lsh_.size(); ++i)
00550         {
00551             ret = ret ^ Xor<LSH>::lsh_[i](obj, &d);
00552             m = min(m, d);
00553         }
00554         *delta = m;
00555         return ret;
00556     }
00557 };
00558 
00559 }
00560 
00561 #endif
00562 

Get LSHKIT at SourceForge.net. Fast, secure and Free Open Source software downloads doxygen