27 #ifndef SCIMATH_CLASSICALSTATS_H 28 #define SCIMATH_CLASSICALSTATS_H 30 #include <casacore/casa/aips.h> 32 #include <casacore/scimath/Mathematics/StatisticsAlgorithm.h> 34 #include <casacore/scimath/Mathematics/StatisticsTypes.h> 35 #include <casacore/scimath/Mathematics/StatisticsUtilities.h> 59 template <
class AccumType,
class DataIterator,
class MaskIterator=const Bool*,
class WeightsIterator=DataIterator>
139 std::map<Double, AccumType>& quantiles,
const std::set<Double>& fractions,
142 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
150 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
159 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
169 virtual void getMinMax(AccumType& mymin, AccumType& mymax);
186 virtual void reset();
208 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
213 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
219 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
220 const MaskIterator& maskBegin,
uInt maskStride
225 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
226 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
232 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
238 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
244 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
245 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
251 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
252 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
273 void _doMinMax(AccumType& vmin, AccumType& vmax);
281 vector<vector<uInt64> >& binCounts,
283 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
285 const vector<AccumType>& maxLimit
289 vector<vector<uInt64> >& binCounts,
291 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
297 vector<vector<uInt64> >& binCounts,
299 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
300 const MaskIterator& maskBegin,
uInt maskStride,
305 vector<vector<uInt64> >& binCounts,
307 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
308 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
314 vector<vector<uInt64> >& binCounts,
316 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
322 vector<vector<uInt64> >& binCounts,
324 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
330 vector<vector<uInt64> >& binCounts,
332 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
333 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
339 vector<vector<uInt64> >& binCounts,
341 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
342 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
366 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
371 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
377 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
378 const MaskIterator& maskBegin,
uInt maskStride
383 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
384 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
390 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
396 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
402 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
403 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
409 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
410 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
418 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
423 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
428 vector<AccumType>& ary,
const DataIterator& dataBegin,
429 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
435 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
436 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
442 vector<AccumType>& ary,
const DataIterator& dataBegin,
443 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride
448 vector<AccumType>& ary,
const DataIterator& dataBegin,
449 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
455 vector<AccumType>& ary,
const DataIterator& dataBegin,
456 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
457 const MaskIterator& maskBegin,
uInt maskStride
462 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
463 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
475 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
476 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
481 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
483 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
487 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
488 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
490 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
495 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
496 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
498 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
503 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
504 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
505 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
510 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
511 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
513 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
518 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
519 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
520 const MaskIterator& maskBegin,
uInt maskStride,
521 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
526 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
527 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
529 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
536 vector<AccumType>& ary,
const DataIterator& dataBegin,
542 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
549 vector<AccumType>& ary,
const DataIterator& dataBegin,
550 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
556 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
557 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
563 vector<AccumType>& ary,
const DataIterator& dataBegin,
564 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
570 vector<AccumType>& ary,
const DataIterator& dataBegin,
571 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
577 vector<AccumType>& ary,
const DataIterator& dataBegin,
578 const WeightsIterator& weightBegin,
Int64 nr,
579 uInt dataStride,
const MaskIterator& maskBegin,
585 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
586 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
596 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
602 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
608 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
609 const MaskIterator& maskBegin,
uInt maskStride
614 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
615 const MaskIterator& maskBegin,
uInt maskStride,
628 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
634 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
640 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
641 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
646 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
647 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
659 mutable typename vector<DataIterator>::const_iterator
_dend,
_diter;
660 mutable vector<Int64>::const_iterator
_citer;
662 mutable std::map<uInt, MaskIterator>
_masks;
691 vector<Bool>& allSame, DataIterator dataIter, MaskIterator maskIter,
692 WeightsIterator weightsIter,
uInt64 count,
694 const vector<AccumType>& maxLimit
698 vector<AccumType>& ary, DataIterator dataIter,
699 MaskIterator maskIter, WeightsIterator weightsIter,
704 vector<vector<AccumType> >& arys,
uInt64& currentCount,
705 DataIterator dataIter, MaskIterator maskIter,
706 WeightsIterator weightsIter,
uInt64 dataCount,
707 const vector<std::pair<AccumType, AccumType> >& includeLimits,
713 DataIterator dataIter, MaskIterator maskIter,
714 WeightsIterator weightsIter,
uInt64 dataCount
719 DataIterator dataIter, MaskIterator maskIter,
720 WeightsIterator weightsIter,
uInt64 count
730 vector<AccumType>&
array 734 vector<vector<AccumType> >& arrays,
735 const vector<std::pair<AccumType, AccumType> > &includeLimits,
747 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
751 const vector<uInt64>& binNpts,
uInt64 maxArraySize,
752 const vector<std::pair<AccumType, AccumType> >& binLimits,
753 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
763 DataIterator& dataIter, MaskIterator& maskIter,
764 WeightsIterator& weightsIter,
uInt64& offset,
uInt nthreads
771 const std::set<uInt64>& dataIndices,
Bool persistSortedArray,
801 vector<Bool>& allSame,
const PtrHolder<vector<vector<uInt64> > >& tBins,
823 const std::set<uInt64>& indices,
uInt64 maxArraySize,
824 Bool persistSortedArray
830 #ifndef CASACORE_NO_AUTO_TEMPLATES 831 #include <casacore/scimath/Mathematics/ClassicalStatistics.tcc> 832 #endif //# CASACORE_NO_AUTO_TEMPLATES void _doMinMax(AccumType &vmin, AccumType &vmax)
scan dataset(s) to find min and max
static const uInt BLOCK_SIZE
vector< std::map< uInt64, AccumType > > _dataFromMultipleBins(const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, uInt64 maxArraySize, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
extract data from multiple histograms given by binDesc.
Bool _valuesFromSortedArray(std::map< uInt64, AccumType > &values, CountedPtr< uInt64 > knownNpts, const std::set< uInt64 > &indices, uInt64 maxArraySize, Bool persistSortedArray)
get values from sorted array if the array is small enough to be held in memory.
vector< DataIterator >::const_iterator _dend
mutables, used to mitigate repeated code
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
void _computeMinMax(CountedPtr< AccumType > &mymax, CountedPtr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount)
LatticeExprNode median(const LatticeExprNode &expr)
AccumType _getStatistic(StatisticsData::STATS stat)
ClassicalStatistics< AccumType, DataIterator, MaskIterator, WeightsIterator > & operator=(const ClassicalStatistics< AccumType, DataIterator, MaskIterator, WeightsIterator > &other)
copy semantics
vector< DataIterator >::const_iterator _diter
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
StatsData< AccumType > _getStatistics()
virtual StatsData< AccumType > & _getStatsData()
retreive stats structure.
TableExprNode array(const TableExprNode &values, const TableExprNodeSet &shape)
Create an array of the given shape and fill it with the values.
void _createDataArray(vector< AccumType > &array)
Create an unsorted array of the complete data set.
unsigned long long uInt64
std::set< uInt64 > _medianIndices(CountedPtr< uInt64 > knownNpts)
get the index (for odd npts) or indices (for even npts) of the median of the sorted array...
PtrHolder(const PtrHolder< T > &other)
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
std::map< uInt64, AccumType > _indicesToValues(CountedPtr< uInt64 > knownNpts, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax, uInt64 maxArraySize, const std::set< uInt64 > &dataIndices, Bool persistSortedArray, uInt64 nBins)
get the values for the specified indices in the sorted array of all good data
Bool _getDoMaxMin() const
std::pair< Int64, Int64 > LocationType
std::map< uInt, DataRanges > _ranges
void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
Abstract base class which defines interface for providing "datasets" to the statistics framework when...
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
void _computeBins(vector< vector< uInt64 > > &bins, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit)
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
Hold and delete pointers not deleted by object destructors.
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
Int64 _getIDataset() const
vector< vector< uInt64 > > _binCounts(vector< CountedPtr< AccumType > > &sameVal, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc)
tally the number of data points that fall into each bin provided by binDesc Any points that are less ...
std::map< uInt, WeightsIterator > _weights
void _computeDataArray(vector< AccumType > &ary, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount)
WeightsIterator _myWeights
ALGORITHM
implemented algorithms
virtual const StatsData< AccumType > & _getStatsData() const
virtual void _populateArrays(vector< vector< AccumType > > &arys, uInt64 ¤tCount, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount) const
Create a vector of unsorted arrays, one array for each bin defined by includeLimits.
void _addData()
Allows derived classes to do things after data is set or added.
Referenced counted pointer for constant data.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
get the median of the absolute deviation about the median of the data.
std::map< uInt, Bool > _isIncludeRanges
Bool hasData() const
Has any data been added to this object? Will return False if the object has been reset and no data ha...
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const LocationType &location)
virtual void _findBins(vector< vector< uInt64 > > &binCounts, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit) const
Get the counts of data within the specified histogram bins.
static void _convertToAbsDevMedArray(vector< AccumType > &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the vector and the median ...
void _computeDataArrays(vector< vector< AccumType > > &arys, uInt64 ¤tCount, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount)
void _initThreadVars(uInt &nBlocks, uInt64 &extra, uInt &nthreads, PtrHolder< DataIterator > &dataIter, PtrHolder< MaskIterator > &maskIter, PtrHolder< WeightsIterator > &weightsIter, PtrHolder< uInt64 > &offset, uInt nThreadsMax) const
#define DataRanges
Commonly used types in statistics framework.
bool Bool
Define the standard types used by Casacore.
virtual Bool _populateTestArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride, uInt maxElements) const
no weights, no mask, no ranges
virtual void setCalculateAsAdded(Bool c)
Should statistics be updated with calls to addData or should they only be calculated upon calls to ge...
void setDataProvider(StatsDataProvider< AccumType, DataIterator, MaskIterator, WeightsIterator > *dataProvider)
An exception will be thrown if setCalculateAsAdded(True) has been called.
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
Get the specified quantiles.
uInt _nThreadsMax() const
std::map< uInt, MaskIterator > _masks
void _createDataArrays(vector< vector< AccumType > > &arrays, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount)
static void _makeBins(typename StatisticsUtilities< AccumType >::BinDesc &bins, AccumType minData, AccumType maxData, uInt maxBins, Bool allowPad)
If allowPad is True, then pad the lower side of the lowest bin and the higher side of the highest bin...
virtual ~ClassicalStatistics()
vector< uInt >::const_iterator _dsiter
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, Int64 nr, uInt dataStride)
no weights, no mask, no ranges
static const uInt CACHE_PADDING
StatsData< AccumType > _statsData
vector< std::map< uInt64, AccumType > > _dataFromSingleBins(const vector< uInt64 > &binNpts, uInt64 maxArraySize, const vector< std::pair< AccumType, AccumType > > &binLimits, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
Bool _increment(Bool includeIDataset)
increment the relevant loop counters
virtual void reset()
reset object to initial state.
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
vector< Int64 >::const_iterator _citer
const Double c
Fundamental physical constants (SI units):
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
static void _mergeResults(vector< vector< uInt64 > > &bins, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const PtrHolder< vector< vector< uInt64 > > > &tBins, const PtrHolder< vector< CountedPtr< AccumType > > > &tSameVal, const PtrHolder< vector< Bool > > &tAllSame, uInt nThreadsMax)
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, Int64 nr, uInt dataStride)
has weights, but no mask, no ranges
void _incrementThreadIters(DataIterator &dataIter, MaskIterator &maskIter, WeightsIterator &weightsIter, uInt64 &offset, uInt nthreads) const
increment thread-based iterators
Bool _isNptsSmallerThan(vector< AccumType > &arrayToSort, uInt maxArraySize)
Determine by scanning the dataset if the number of good points is smaller than maxArraySize.
virtual void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
virtual void _populateArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
populate an unsorted array with valid data.
virtual StatsData< AccumType > _getInitialStats() const
void _computeStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count)
Base class of statistics algorithm class hierarchy.
this file contains all the compiler specific defines
description of a regularly spaced bins with the first bin having lower limit of minLimit and having n...