00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef __LSHKIT_LSH__
00021 #define __LSHKIT_LSH__
00022
00034 namespace lshkit {
00035
00037
00071 template <typename DIST>
00072 class StableDistLsh
00073 {
00074 std::vector<float> a_;
00075 float b_;
00076 float W_;
00077 unsigned dim_;
00078 public:
00082 struct Parameter
00083 {
00085 unsigned dim;
00087 float W;
00088 };
00089
00090 typedef const float *Domain;
00091
00092 StableDistLsh ()
00093 {
00094 }
00095
00096 template <typename RNG>
00097 void reset(const Parameter ¶m, RNG &rng)
00098 {
00099 a_.resize(param.dim);
00100 W_ = param.W;
00101 dim_ = param.dim;
00102
00103 boost::variate_generator<RNG &, DIST> gen(rng, DIST());
00104
00105 for (unsigned i = 0; i < dim_; ++i) a_[i] = gen();
00106
00107 b_ = boost::variate_generator<RNG &, Uniform>(rng, Uniform(0,W_))();
00108 }
00109
00110 template <typename RNG>
00111 StableDistLsh(const Parameter ¶m, RNG &rng)
00112 {
00113 reset(param, rng);
00114 }
00115
00116
00117 unsigned getRange () const
00118 {
00119 return 0;
00120 }
00121
00122 unsigned operator () (Domain obj) const
00123 {
00124 float ret = b_;
00125 for (unsigned i = 0; i < dim_; ++i)
00126 {
00127 ret += a_[i] * obj[i];
00128 }
00129 return unsigned(int(std::floor(ret / W_)));
00130 }
00131
00132 unsigned operator () (Domain obj, float *delta) const
00133 {
00134 float ret = b_;
00135 for (unsigned i = 0; i < dim_; ++i)
00136 {
00137 ret += a_[i] * obj[i];
00138 }
00139 ret /= W_;
00140
00141 float flr = std::floor(ret);
00142 *delta = ret - flr;
00143 return unsigned(int(flr));
00144 }
00145
00146 template<class Archive>
00147 void serialize(Archive & ar, const unsigned int version)
00148 {
00149 ar & a_;
00150 ar & b_;
00151 ar & W_;
00152 ar & dim_;
00153 assert(a_.size() == dim_);
00154 }
00155
00156 };
00157
00159 typedef StableDistLsh<Cauchy> CauchyLsh;
00161 typedef StableDistLsh<Gaussian> GaussianLsh;
00162
00164
00189 class HyperPlaneLsh
00190 {
00191 std::vector<float> a_;
00192
00193 public:
00197 struct Parameter
00198 {
00200 unsigned dim;
00201 };
00202 typedef const float *Domain;
00203
00204 HyperPlaneLsh ()
00205 {
00206 }
00207
00208 template <typename RNG>
00209 void reset(const Parameter ¶m, RNG &rng)
00210 {
00211 a_ = boost::variate_generator<RNG &, boost::uniform_on_sphere<float> >
00212 (rng, boost::uniform_on_sphere<float>(param.dim))();
00213 }
00214
00215 template <typename RNG>
00216 HyperPlaneLsh(const Parameter ¶m, RNG &rng)
00217 {
00218 reset(param, rng);
00219 }
00220
00221
00222 unsigned getRange () const
00223 {
00224 return 2;
00225 }
00226
00227 unsigned operator () (Domain obj) const
00228 {
00229 float ret = 0;
00230 for (unsigned i = 0; i < a_.size(); ++i)
00231 {
00232 ret += a_[i] * obj[i];
00233 }
00234 return ret >= 0 ? 1 : 0;
00235 }
00236
00237 unsigned operator () (Domain obj, float *delta) const
00238 {
00239 float ret = 0;
00240 for (unsigned i = 0; i < a_.size(); ++i)
00241 {
00242 ret += a_[i] * obj[i];
00243 }
00244 if (ret >= 0)
00245 {
00246 *delta = ret;
00247 return 1;
00248 }
00249 else
00250 {
00251 *delta = -ret;
00252 return 0;
00253 }
00254 }
00255
00256 template<class Archive>
00257 void serialize(Archive & ar, const unsigned int version)
00258 {
00259 ar & a_;
00260 }
00261 };
00262
00263
00265
00298 class ThresholdingLsh
00299 {
00300 unsigned dim_;
00301 float threshold_;
00302 public:
00306 struct Parameter
00307 {
00309 unsigned dim;
00311 float min;
00313 float max;
00314 };
00315 typedef const float *Domain;
00316
00317 ThresholdingLsh ()
00318 {
00319 }
00320
00321 template <typename RNG>
00322 void reset(const Parameter ¶m, RNG &rng)
00323 {
00324 dim_ = boost::variate_generator<RNG &, UniformUnsigned>(rng, UniformUnsigned(0,param.dim - 1))();
00325 threshold_ = boost::variate_generator<RNG &, Uniform>(rng, Uniform(param.min,param.max))();
00326 }
00327
00328 template <typename RNG>
00329 ThresholdingLsh(const Parameter ¶m, RNG &rng)
00330 {
00331 reset(param, rng);
00332 }
00333
00334 unsigned getRange () const
00335 {
00336 return 2;
00337 }
00338
00339 unsigned operator () (Domain obj) const
00340 {
00341 return obj[dim_] >= threshold_ ? 1 : 0;
00342 }
00343
00344 unsigned operator () (Domain obj, float *delta) const
00345 {
00346 float ret = obj[dim_] - threshold_;
00347 if (ret >= 0)
00348 {
00349 *delta = ret;
00350 return 1;
00351 }
00352 else
00353 {
00354 *delta = -ret;
00355 return 0;
00356 }
00357 }
00358
00359 template<class Archive>
00360 void serialize(Archive & ar, const unsigned int version)
00361 {
00362 ar & dim_;
00363 ar & threshold_;
00364 }
00365 };
00366
00367 }
00368
00369 #endif
00370