36#ifndef VIGRA_SAMPLING_HXX
37#define VIGRA_SAMPLING_HXX
39#include "array_vector.hxx"
67 double sample_proportion;
68 unsigned int sample_size;
69 bool sample_with_replacement;
70 bool stratified_sampling;
73 : sample_proportion(1.0),
75 sample_with_replacement(
true),
76 stratified_sampling(
false)
85 sample_with_replacement = in;
95 sample_with_replacement = !in;
125 vigra_precondition(proportion >= 0.0,
126 "SamplerOptions::sampleProportion(): argument must not be negative.");
127 sample_proportion = proportion;
143 stratified_sampling = in;
231template<
class Random = MersenneTwister >
249 typedef std::map<IndexType, IndexArrayType> StrataIndicesType;
250 typedef std::map<IndexType, int> StrataSizesType;
254 static const int oobInvalid = -1;
256 int total_count_, sample_size_;
257 mutable int current_oob_count_;
258 StrataIndicesType strata_indices_;
259 StrataSizesType strata_sample_size_;
263 Random default_random_;
264 Random
const & random_;
267 void initStrataCount()
271 int strata_sample_size =
static_cast<int>(std::ceil(
double(sample_size_) /
strataCount()));
272 int strata_total_count = strata_sample_size *
strataCount();
274 for(StrataIndicesType::iterator i = strata_indices_.begin();
275 i != strata_indices_.end(); ++i)
277 if(strata_total_count > sample_size_)
279 strata_sample_size_[i->first] = strata_sample_size - 1;
280 --strata_total_count;
284 strata_sample_size_[i->first] = strata_sample_size;
298 Random
const * rnd = 0)
300 sample_size_(opt.sample_size == 0
301 ? static_cast<int>((std::
ceil(total_count_ * opt.sample_proportion)))
303 current_oob_count_(oobInvalid),
304 current_sample_(sample_size_),
305 current_oob_sample_(total_count_),
306 is_used_(total_count_),
307 default_random_(RandomSeed),
308 random_(rnd ? *rnd : default_random_),
311 vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
312 "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
314 vigra_precondition(!opt.stratified_sampling,
315 "Sampler(): Stratified sampling requested, but no strata given.");
318 strata_indices_[0].resize(total_count_);
319 for(
int i=0; i<total_count_; ++i)
320 strata_indices_[0][i] = i;
336 template <
class Iterator>
338 Random
const * rnd = 0)
339 : total_count_(strataEnd - strataBegin),
340 sample_size_(opt.sample_size == 0
341 ? static_cast<int>((std::
ceil(total_count_ * opt.sample_proportion)))
343 current_oob_count_(oobInvalid),
344 current_sample_(sample_size_),
345 current_oob_sample_(total_count_),
346 is_used_(total_count_),
347 default_random_(RandomSeed),
348 random_(rnd ? *rnd : default_random_),
351 vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
352 "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
355 if(opt.stratified_sampling)
357 for(
int i = 0; strataBegin != strataEnd; ++i, ++strataBegin)
359 strata_indices_[*strataBegin].push_back(i);
364 strata_indices_[0].resize(total_count_);
365 for(
int i=0; i<total_count_; ++i)
366 strata_indices_[0][i] = i;
369 vigra_precondition(sample_size_ >=
static_cast<int>(strata_indices_.size()),
370 "Sampler(): Requested sample count must be at least as large as the number of strata.");
381 return current_sample_[k];
415 return strata_indices_.size();
423 return options_.stratified_sampling;
430 return options_.sample_with_replacement;
437 return current_sample_;
445 if(current_oob_count_ == oobInvalid)
447 current_oob_count_ = 0;
448 for(
int i = 0; i<total_count_; ++i)
452 current_oob_sample_[current_oob_count_] = i;
453 ++current_oob_count_;
457 return current_oob_sample_.
subarray(0, current_oob_count_);
459 IsUsedArrayType
const & is_used()
const
466template<
class Random>
469 current_oob_count_ = oobInvalid;
470 is_used_.
init(
false);
472 if(options_.sample_with_replacement)
476 StrataIndicesType::iterator iter;
477 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
480 int stratum_size = iter->second.size();
481 for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
483 current_sample_[j] = iter->second[random_.uniformInt(stratum_size)];
484 is_used_[current_sample_[j]] =
true;
492 StrataIndicesType::iterator iter;
493 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
496 int stratum_size = iter->second.
size();
497 for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
499 std::swap(iter->second[i], iter->second[i+ random_.uniformInt(stratum_size - i)]);
500 current_sample_[j] = iter->second[i];
501 is_used_[current_sample_[j]] =
true;
507template<
class Random =RandomTT800 >
512 typedef Int32 IndexType;
514 IndexArrayType used_indices_;
519 PoissonSampler(
double lambda,IndexType minIndex,IndexType maxIndex)
527 used_indices_.clear();
529 for(i=minIndex;i<maxIndex;++i)
534 double L=exp(-lambda);
538 p*=randfloat.uniform53();
545 used_indices_.push_back(i);
551 IndexType
const & operator[](
int in)
const
553 return used_indices_[in];
556 int numOfSamples()
const
558 return used_indices_.size();
size_type size() const
Definition: array_vector.hxx:358
void init(U const &initial)
Definition: array_vector.hxx:146
this_type subarray(size_type begin, size_type end) const
Definition: array_vector.hxx:200
Options object for the Sampler class.
Definition: sampling.hxx:64
SamplerOptions & withReplacement(bool in=true)
Sample from training population with replacement.
Definition: sampling.hxx:83
SamplerOptions & stratified(bool in=true)
Draw equally many samples from each "stratum". A stratum is a group of like entities,...
Definition: sampling.hxx:141
SamplerOptions & withoutReplacement(bool in=true)
Sample from training population without replacement.
Definition: sampling.hxx:93
SamplerOptions & sampleProportion(double proportion)
Determine the number of samples to draw as a proportion of the total number. That is,...
Definition: sampling.hxx:123
SamplerOptions & sampleSize(unsigned int size)
Draw the given number of samples. If stratifiedSampling is true, the size is equally distributed acro...
Definition: sampling.hxx:106
Create random samples from a sequence of indices.
Definition: sampling.hxx:233
int totalCount() const
Definition: sampling.hxx:390
bool withReplacement() const
Definition: sampling.hxx:428
ArrayVectorView< IndexType > IndexArrayViewType
Definition: sampling.hxx:246
IndexType operator[](int k) const
Definition: sampling.hxx:379
int strataCount() const
Definition: sampling.hxx:413
Int32 IndexType
Definition: sampling.hxx:239
IndexArrayViewType sampledIndices() const
Definition: sampling.hxx:435
void sample()
Definition: sampling.hxx:467
Sampler(Iterator strataBegin, Iterator strataEnd, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:337
IndexArrayViewType oobIndices() const
Definition: sampling.hxx:443
int sampleSize() const
Definition: sampling.hxx:397
Sampler(UInt32 totalCount, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:297
int size() const
Definition: sampling.hxx:404
bool stratifiedSampling() const
Definition: sampling.hxx:421
detail::SelectIntegerType< 32, detail::UnsignedIntTypes >::type UInt32
32-bit unsigned int
Definition: sized_int.hxx:183
detail::SelectIntegerType< 32, detail::SignedIntTypes >::type Int32
32-bit signed int
Definition: sized_int.hxx:175
int ceil(FixedPoint< IntBits, FracBits > v)
rounding up.
Definition: fixedpoint.hxx:675