include/lshkit/lsh.h

Go to the documentation of this file.
00001 /* 
00002     Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved.
00003   
00004     This file is part of LSHKIT.
00005   
00006     LSHKIT is free software: you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation, either version 3 of the License, or
00009     (at your option) any later version.
00010 
00011     LSHKIT is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with LSHKIT.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020 #ifndef __LSHKIT_LSH__
00021 #define __LSHKIT_LSH__
00022 
00034 namespace lshkit {
00035 
00037 
00071 template <typename DIST>
00072 class StableDistLsh
00073 {
00074     std::vector<float> a_;
00075     float b_;
00076     float W_;
00077     unsigned dim_;
00078 public:
00082     struct Parameter
00083     {
00085         unsigned dim;   
00087         float W;
00088     };
00089 
00090     typedef const float *Domain;
00091 
00092     StableDistLsh ()
00093     {
00094     }
00095 
00096     template <typename RNG>
00097     void reset(const Parameter &param, RNG &rng)
00098     {
00099         a_.resize(param.dim);
00100         W_ = param.W;
00101         dim_ = param.dim;
00102 
00103         boost::variate_generator<RNG &, DIST> gen(rng, DIST());
00104 
00105         for (unsigned i = 0; i < dim_; ++i) a_[i] = gen();
00106 
00107         b_ = boost::variate_generator<RNG &, Uniform>(rng, Uniform(0,W_))();
00108     }
00109 
00110     template <typename RNG>
00111     StableDistLsh(const Parameter &param, RNG &rng)
00112     {
00113         reset(param, rng);
00114     }
00115 
00116 
00117     unsigned getRange () const
00118     {
00119         return 0;
00120     }
00121 
00122     unsigned operator () (Domain obj) const
00123     {
00124         float ret = b_;
00125         for (unsigned i = 0; i < dim_; ++i)
00126         {
00127             ret += a_[i] * obj[i];
00128         }
00129         return unsigned(int(std::floor(ret / W_)));
00130     }
00131 
00132     unsigned operator () (Domain obj, float *delta) const
00133     {
00134         float ret = b_;
00135         for (unsigned i = 0; i < dim_; ++i)
00136         {
00137             ret += a_[i] * obj[i];
00138         }
00139         ret /= W_;
00140 
00141         float flr =  std::floor(ret);
00142         *delta = ret - flr;
00143         return unsigned(int(flr));
00144     }
00145 
00146     template<class Archive>
00147     void serialize(Archive & ar, const unsigned int version)
00148     {
00149         ar & a_;
00150         ar & b_;
00151         ar & W_;
00152         ar & dim_;
00153         assert(a_.size() == dim_);
00154     }
00155 
00156 };
00157 
00159 typedef StableDistLsh<Cauchy> CauchyLsh;
00161 typedef StableDistLsh<Gaussian> GaussianLsh;
00162 
00164 
00189 class HyperPlaneLsh
00190 {
00191     std::vector<float> a_;
00192 
00193 public:
00197     struct Parameter
00198     {
00200         unsigned dim;
00201     };
00202     typedef const float *Domain;
00203 
00204     HyperPlaneLsh ()
00205     {
00206     }
00207 
00208     template <typename RNG>
00209     void reset(const Parameter &param, RNG &rng)
00210     {
00211         a_ = boost::variate_generator<RNG &, boost::uniform_on_sphere<float> >
00212                 (rng, boost::uniform_on_sphere<float>(param.dim))();
00213     }
00214 
00215     template <typename RNG>
00216     HyperPlaneLsh(const Parameter &param, RNG &rng)
00217     {
00218         reset(param, rng);
00219     }
00220 
00221 
00222     unsigned getRange () const
00223     {
00224         return 2;
00225     }
00226 
00227     unsigned operator () (Domain obj) const
00228     {
00229         float ret = 0;
00230         for (unsigned i = 0; i < a_.size(); ++i)
00231         {
00232             ret += a_[i] * obj[i];
00233         }
00234         return ret >= 0 ? 1 : 0;
00235     }
00236 
00237     unsigned operator () (Domain obj, float *delta) const
00238     {
00239         float ret = 0;
00240         for (unsigned i = 0; i < a_.size(); ++i)
00241         {
00242             ret += a_[i] * obj[i];
00243         }
00244         if (ret >= 0)
00245         {
00246             *delta = ret;
00247             return 1;
00248         }
00249         else
00250         {
00251             *delta = -ret;
00252             return 0;
00253         }
00254     }
00255 
00256     template<class Archive>
00257     void serialize(Archive & ar, const unsigned int version)
00258     {
00259         ar & a_;
00260     }
00261 };
00262 
00263 
00265 
00298 class ThresholdingLsh
00299 {
00300     unsigned dim_;
00301     float threshold_;
00302 public:
00306     struct Parameter
00307     {
00309         unsigned dim;
00311         float min;
00313         float max;
00314     };
00315     typedef const float *Domain;
00316 
00317     ThresholdingLsh ()
00318     {
00319     }
00320 
00321     template <typename RNG>
00322     void reset(const Parameter &param, RNG &rng)
00323     {
00324         dim_ = boost::variate_generator<RNG &, UniformUnsigned>(rng, UniformUnsigned(0,param.dim - 1))();
00325         threshold_ = boost::variate_generator<RNG &, Uniform>(rng, Uniform(param.min,param.max))();
00326     }
00327 
00328     template <typename RNG>
00329     ThresholdingLsh(const Parameter &param, RNG &rng)
00330     {
00331         reset(param, rng);
00332     }
00333 
00334     unsigned getRange () const
00335     {
00336         return 2;
00337     }
00338 
00339     unsigned operator () (Domain obj) const
00340     {
00341         return obj[dim_] >= threshold_ ? 1 : 0;
00342     }
00343 
00344     unsigned operator () (Domain obj, float *delta) const
00345     {
00346         float ret = obj[dim_] - threshold_;
00347         if (ret >= 0)
00348         {
00349             *delta = ret;
00350             return 1;
00351         }
00352         else
00353         {
00354             *delta = -ret;
00355             return 0;
00356         }
00357     }
00358 
00359     template<class Archive>
00360     void serialize(Archive & ar, const unsigned int version)
00361     {
00362         ar & dim_;
00363         ar & threshold_;
00364     }
00365 };
00366 
00367 }
00368 
00369 #endif
00370 

Get LSHKIT at SourceForge.net. Fast, secure and Free Open Source software downloads doxygen