include/lshkit/eval.h

Go to the documentation of this file.
00001 /* 
00002     Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved.
00003   
00004     This file is part of LSHKIT.
00005   
00006     LSHKIT is free software: you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation, either version 3 of the License, or
00009     (at your option) any later version.
00010 
00011     LSHKIT is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with LSHKIT.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020 #ifndef __LSHKIT_EVAL__
00021 #define __LSHKIT_EVAL__
00022 
00029 #include <cmath>
00030 #include <limits>
00031 #include <vector>
00032 #include <algorithm>
00033 #include <fstream>
00034 #include <boost/random.hpp>
00035 #include <lshkit/topk.h>
00036 
00037 
00038 namespace lshkit {
00039 
00041 
00047 template <typename RNG>
00048 void SampleQueries (std::vector<unsigned> *qry, unsigned max, RNG
00049         &rng)
00050 {
00051     boost::variate_generator<RNG &, UniformUnsigned> gen(rng, UniformUnsigned(0,
00052                 max-1));
00053     for (unsigned i = 0; i < qry->size(); ++i)
00054     {
00055         for (;;)
00056         {
00057             qry->at(i) = gen();
00058             unsigned j;
00059             for (j = 0; j < i; j++) if (qry->at(i) == qry->at(j)) break;
00060             if (j >= i) break;
00061         }
00062     }
00063 }
00064 
00065 
00067 
00082 template <typename KEY = unsigned>
00083 class Benchmark
00084 {
00085     unsigned Q_;
00086 
00087     std::vector<unsigned> queries_;
00088     std::vector<Topk<KEY> > topks_;
00089 public:
00090     Benchmark(): Q_(0) {}
00091 
00092     void resize(unsigned Q, unsigned K = 0)
00093     {
00094         Q_ = Q;
00095         queries_.resize(Q);
00096         topks_.resize(Q);
00097         if (K > 0) {
00098             BOOST_FOREACH(Topk<KEY> &knn, topks_) {
00099                 if (knn.size() < K) throw std::runtime_error("BENCHMARK NOT LARGE ENOUGH");
00100                 knn.resize(K);
00101             }
00102         }
00103     }
00104 
00105     // Random initialization
00106     void init(unsigned Q, unsigned maxID) {
00107         Q_ = Q;
00108         queries_.resize(Q);
00109         topks_.resize(Q);
00110 
00111         DefaultRng rng;
00112         SampleQueries(&queries_, maxID, rng);
00113     }
00114 
00115     ~Benchmark() {}
00116 
00117     void load (std::istream &is)
00118     {
00119         std::string line;
00120         queries_.clear();
00121         topks_.clear();
00122         for (;;) {
00123             unsigned q, k;
00124             is >> q;
00125             if (!is) break;
00126             queries_.push_back(q);
00127             is >> k;
00128             topks_.push_back(Topk<KEY>());
00129             topks_.back().resize(k);
00130             for (unsigned j = 0; j < k; j++) {
00131                 is >> topks_.back()[j].key;
00132                 is >> topks_.back()[j].dist;
00133             }
00134         }
00135         Q_ = queries_.size();
00136     }
00137 
00138     void save (std::ostream &os) const
00139     {
00140         for (unsigned i = 0; i < Q_; i++) {
00141             os << queries_[i] << '\t' << topks_[i].size() << '\t';
00142             for (unsigned j = 0; j < topks_[i].size(); j++) {
00143                 os << '\t' <<  topks_[i][j].key;
00144                 os << '\t' <<  topks_[i][j].dist;
00145             }
00146             os << std::endl;
00147         }
00148     }
00149 
00150     void load (const std::string &path)
00151     {
00152         std::ifstream is(path.c_str());
00153         load(is);
00154         is.close();
00155     }
00156 
00157     void save (const std::string &path) const
00158     {
00159         std::ofstream os(path.c_str());
00160         save(os);
00161         os.close();
00162     }
00163 
00164     unsigned getQ () const { return Q_; }
00165 
00167     unsigned getQuery (unsigned n) const {  return queries_[n]; }
00168 
00170     const Topk<KEY> &getAnswer (unsigned n) const { return topks_[n]; }
00171 
00173     Topk<KEY> &getAnswer (unsigned n) { return topks_[n]; }
00174 };
00175 
00176 
00178 
00200 class Stat
00201 {
00202     int count;
00203     float sum;
00204     float sum2;
00205     float min;
00206     float max;
00207 public:
00208     Stat () : count(0), sum(0), sum2(0), min(std::numeric_limits<float>::max()), max(-std::numeric_limits<float>::max()) {
00209     }
00210 
00211     ~Stat () {
00212     }
00213 
00214     void reset () {
00215         count = 0;
00216         sum = sum2 = 0;
00217         min = std::numeric_limits<float>::max();
00218         max = -std::numeric_limits<float>::max();
00219     }
00220 
00221     void append (float r)
00222     {
00223         count++;
00224         sum += r;
00225         sum2 += r*r;
00226         if (r > max) max = r;
00227         if (r < min) min = r;
00228     }
00229 
00230     Stat & operator<< (float r) { append(r); return *this; }
00231 
00232     int getCount() const { return count; }
00233     float getSum() const { return sum; }
00234     float getAvg() const { return sum/count; }
00235     float getMax() const { return max; }
00236     float getMin() const { return min; }
00237     float getStd() const
00238     {
00239         if (count > 1) return std::sqrt((sum2 - (sum/count) * sum)/(count - 1)); 
00240         else return 0; 
00241     }
00242 
00243     void merge (const Stat &stat)
00244     {
00245         count += stat.count;
00246         sum += stat.sum;
00247         sum2 += stat.sum2;
00248         if (stat.min < min) min = stat.min;
00249         if (stat.max > max) max = stat.max;
00250     }
00251 };
00252 
00253 /*
00254 class Timer
00255 {
00256     struct  timeval start; 
00257 public:
00258     Timer () {}
00260     void tick ()
00261     {
00262         gettimeofday(&start, 0); 
00263     }
00265     float tuck (const char *msg) const
00266     {
00267         struct timeval end;
00268         float   diff; 
00269         gettimeofday(&end, 0); 
00270 
00271         diff = (end.tv_sec - start.tv_sec) 
00272                 + (end.tv_usec - start.tv_usec) * 0.000001; 
00273         if (msg != 0) {
00274             std::cout << msg << ':' <<  diff << std::endl;
00275         }
00276         return diff;
00277     }
00278 };
00279 */
00280 
00281 }
00282 
00283 #endif
00284 

Get LSHKIT at SourceForge.net. Fast, secure and Free Open Source software downloads doxygen