00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef __LSHKIT_EVAL__
00021 #define __LSHKIT_EVAL__
00022
00029 #include <cmath>
00030 #include <limits>
00031 #include <vector>
00032 #include <algorithm>
00033 #include <fstream>
00034 #include <boost/random.hpp>
00035 #include <lshkit/topk.h>
00036
00037
00038 namespace lshkit {
00039
00041
00047 template <typename RNG>
00048 void SampleQueries (std::vector<unsigned> *qry, unsigned max, RNG
00049 &rng)
00050 {
00051 boost::variate_generator<RNG &, UniformUnsigned> gen(rng, UniformUnsigned(0,
00052 max-1));
00053 for (unsigned i = 0; i < qry->size(); ++i)
00054 {
00055 for (;;)
00056 {
00057 qry->at(i) = gen();
00058 unsigned j;
00059 for (j = 0; j < i; j++) if (qry->at(i) == qry->at(j)) break;
00060 if (j >= i) break;
00061 }
00062 }
00063 }
00064
00065
00067
00082 template <typename KEY = unsigned>
00083 class Benchmark
00084 {
00085 unsigned Q_;
00086
00087 std::vector<unsigned> queries_;
00088 std::vector<Topk<KEY> > topks_;
00089 public:
00090 Benchmark(): Q_(0) {}
00091
00092 void resize(unsigned Q, unsigned K = 0)
00093 {
00094 Q_ = Q;
00095 queries_.resize(Q);
00096 topks_.resize(Q);
00097 if (K > 0) {
00098 BOOST_FOREACH(Topk<KEY> &knn, topks_) {
00099 if (knn.size() < K) throw std::runtime_error("BENCHMARK NOT LARGE ENOUGH");
00100 knn.resize(K);
00101 }
00102 }
00103 }
00104
00105
00106 void init(unsigned Q, unsigned maxID) {
00107 Q_ = Q;
00108 queries_.resize(Q);
00109 topks_.resize(Q);
00110
00111 DefaultRng rng;
00112 SampleQueries(&queries_, maxID, rng);
00113 }
00114
00115 ~Benchmark() {}
00116
00117 void load (std::istream &is)
00118 {
00119 std::string line;
00120 queries_.clear();
00121 topks_.clear();
00122 for (;;) {
00123 unsigned q, k;
00124 is >> q;
00125 if (!is) break;
00126 queries_.push_back(q);
00127 is >> k;
00128 topks_.push_back(Topk<KEY>());
00129 topks_.back().resize(k);
00130 for (unsigned j = 0; j < k; j++) {
00131 is >> topks_.back()[j].key;
00132 is >> topks_.back()[j].dist;
00133 }
00134 }
00135 Q_ = queries_.size();
00136 }
00137
00138 void save (std::ostream &os) const
00139 {
00140 for (unsigned i = 0; i < Q_; i++) {
00141 os << queries_[i] << '\t' << topks_[i].size() << '\t';
00142 for (unsigned j = 0; j < topks_[i].size(); j++) {
00143 os << '\t' << topks_[i][j].key;
00144 os << '\t' << topks_[i][j].dist;
00145 }
00146 os << std::endl;
00147 }
00148 }
00149
00150 void load (const std::string &path)
00151 {
00152 std::ifstream is(path.c_str());
00153 load(is);
00154 is.close();
00155 }
00156
00157 void save (const std::string &path) const
00158 {
00159 std::ofstream os(path.c_str());
00160 save(os);
00161 os.close();
00162 }
00163
00164 unsigned getQ () const { return Q_; }
00165
00167 unsigned getQuery (unsigned n) const { return queries_[n]; }
00168
00170 const Topk<KEY> &getAnswer (unsigned n) const { return topks_[n]; }
00171
00173 Topk<KEY> &getAnswer (unsigned n) { return topks_[n]; }
00174 };
00175
00176
00178
00200 class Stat
00201 {
00202 int count;
00203 float sum;
00204 float sum2;
00205 float min;
00206 float max;
00207 public:
00208 Stat () : count(0), sum(0), sum2(0), min(std::numeric_limits<float>::max()), max(-std::numeric_limits<float>::max()) {
00209 }
00210
00211 ~Stat () {
00212 }
00213
00214 void reset () {
00215 count = 0;
00216 sum = sum2 = 0;
00217 min = std::numeric_limits<float>::max();
00218 max = -std::numeric_limits<float>::max();
00219 }
00220
00221 void append (float r)
00222 {
00223 count++;
00224 sum += r;
00225 sum2 += r*r;
00226 if (r > max) max = r;
00227 if (r < min) min = r;
00228 }
00229
00230 Stat & operator<< (float r) { append(r); return *this; }
00231
00232 int getCount() const { return count; }
00233 float getSum() const { return sum; }
00234 float getAvg() const { return sum/count; }
00235 float getMax() const { return max; }
00236 float getMin() const { return min; }
00237 float getStd() const
00238 {
00239 if (count > 1) return std::sqrt((sum2 - (sum/count) * sum)/(count - 1));
00240 else return 0;
00241 }
00242
00243 void merge (const Stat &stat)
00244 {
00245 count += stat.count;
00246 sum += stat.sum;
00247 sum2 += stat.sum2;
00248 if (stat.min < min) min = stat.min;
00249 if (stat.max > max) max = stat.max;
00250 }
00251 };
00252
00253
00254
00255
00256
00257
00258
00260
00261
00262
00263
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281 }
00282
00283 #endif
00284