27 #ifndef SCIMATH_CLASSICALSTATS_H 28 #define SCIMATH_CLASSICALSTATS_H 30 #include <casacore/casa/aips.h> 32 #include <casacore/scimath/Mathematics/StatisticsAlgorithm.h> 34 #include <casacore/scimath/Mathematics/StatisticsTypes.h> 35 #include <casacore/scimath/Mathematics/StatisticsUtilities.h> 59 template <
class AccumType,
class DataIterator,
class MaskIterator=const Bool*,
class WeightsIterator=DataIterator>
142 std::map<Double, AccumType>& quantiles,
const std::set<Double>& fractions,
145 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
153 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
162 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
172 virtual void getMinMax(AccumType& mymin, AccumType& mymax);
189 virtual void reset();
211 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
216 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
222 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
223 const MaskIterator& maskBegin,
uInt maskStride
228 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
229 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
235 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
241 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
247 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
248 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
254 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
255 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
276 void _doMinMax(AccumType& vmin, AccumType& vmax);
284 vector<vector<uInt64> >& binCounts,
286 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
288 const vector<AccumType>& maxLimit
292 vector<vector<uInt64> >& binCounts,
294 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
300 vector<vector<uInt64> >& binCounts,
302 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
303 const MaskIterator& maskBegin,
uInt maskStride,
308 vector<vector<uInt64> >& binCounts,
310 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
311 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
317 vector<vector<uInt64> >& binCounts,
319 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
325 vector<vector<uInt64> >& binCounts,
327 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
333 vector<vector<uInt64> >& binCounts,
335 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
336 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
342 vector<vector<uInt64> >& binCounts,
344 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
345 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
369 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
374 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
380 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
381 const MaskIterator& maskBegin,
uInt maskStride
386 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
387 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
393 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
399 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
405 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
406 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
412 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
413 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
421 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
426 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
431 vector<AccumType>& ary,
const DataIterator& dataBegin,
432 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
438 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
439 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
445 vector<AccumType>& ary,
const DataIterator& dataBegin,
446 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride
451 vector<AccumType>& ary,
const DataIterator& dataBegin,
452 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
458 vector<AccumType>& ary,
const DataIterator& dataBegin,
459 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
460 const MaskIterator& maskBegin,
uInt maskStride
465 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
466 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
478 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
479 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
484 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
486 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
490 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
491 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
493 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
498 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
499 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
501 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
506 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
507 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
508 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
513 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
514 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
516 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
521 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
522 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
523 const MaskIterator& maskBegin,
uInt maskStride,
524 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
529 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
530 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
532 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
539 vector<AccumType>& ary,
const DataIterator& dataBegin,
545 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
552 vector<AccumType>& ary,
const DataIterator& dataBegin,
553 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
559 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
560 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
566 vector<AccumType>& ary,
const DataIterator& dataBegin,
567 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
573 vector<AccumType>& ary,
const DataIterator& dataBegin,
574 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
580 vector<AccumType>& ary,
const DataIterator& dataBegin,
581 const WeightsIterator& weightBegin,
Int64 nr,
582 uInt dataStride,
const MaskIterator& maskBegin,
588 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
589 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
599 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
605 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
611 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
612 const MaskIterator& maskBegin,
uInt maskStride
617 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
618 const MaskIterator& maskBegin,
uInt maskStride,
631 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
637 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
643 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
644 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
649 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
650 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
662 mutable typename vector<DataIterator>::const_iterator
_dend,
_diter;
663 mutable vector<Int64>::const_iterator
_citer;
665 mutable std::map<uInt, MaskIterator>
_masks;
691 vector<Bool>& allSame, DataIterator dataIter, MaskIterator maskIter,
692 WeightsIterator weightsIter,
uInt64 count,
694 const vector<AccumType>& maxLimit
698 vector<AccumType>& ary, DataIterator dataIter,
699 MaskIterator maskIter, WeightsIterator weightsIter,
704 vector<vector<AccumType> >& arys,
uInt64& currentCount,
705 DataIterator dataIter, MaskIterator maskIter,
706 WeightsIterator weightsIter,
uInt64 dataCount,
707 const vector<std::pair<AccumType, AccumType> >& includeLimits,
713 DataIterator dataIter, MaskIterator maskIter,
714 WeightsIterator weightsIter,
uInt64 dataCount
719 DataIterator dataIter, MaskIterator maskIter,
720 WeightsIterator weightsIter,
uInt64 count
730 vector<AccumType>&
array 734 vector<vector<AccumType> >& arrays,
735 const vector<std::pair<AccumType, AccumType> > &includeLimits,
747 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
751 const vector<uInt64>& binNpts,
uInt64 maxArraySize,
752 const vector<std::pair<AccumType, AccumType> >& binLimits,
753 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
763 DataIterator& dataIter, MaskIterator& maskIter,
764 WeightsIterator& weightsIter,
uInt64& offset,
uInt nthreads
771 const std::set<uInt64>& dataIndices,
Bool persistSortedArray,
801 vector<Bool>& allSame,
const PtrHolder<vector<vector<uInt64> > >& tBins,
823 const std::set<uInt64>& indices,
uInt64 maxArraySize,
824 Bool persistSortedArray
830 #ifndef CASACORE_NO_AUTO_TEMPLATES 831 #include <casacore/scimath/Mathematics/ClassicalStatistics.tcc> 832 #endif //# CASACORE_NO_AUTO_TEMPLATES void _doMinMax(AccumType &vmin, AccumType &vmax)
scan dataset(s) to find min and max
vector< std::map< uInt64, AccumType > > _dataFromMultipleBins(const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, uInt64 maxArraySize, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
extract data from multiple histograms given by binDesc.
Bool _valuesFromSortedArray(std::map< uInt64, AccumType > &values, CountedPtr< uInt64 > knownNpts, const std::set< uInt64 > &indices, uInt64 maxArraySize, Bool persistSortedArray)
get values from sorted array if the array is small enough to be held in memory.
vector< DataIterator >::const_iterator _dend
mutables, used to mitigate repeated code
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
void _computeMinMax(CountedPtr< AccumType > &mymax, CountedPtr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount)
LatticeExprNode median(const LatticeExprNode &expr)
AccumType _getStatistic(StatisticsData::STATS stat)
vector< DataIterator >::const_iterator _diter
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
StatsData< AccumType > _getStatistics()
virtual StatsData< AccumType > & _getStatsData()
retreive stats structure.
TableExprNode array(const TableExprNode &values, const TableExprNodeSet &shape)
Create an array of the given shape and fill it with the values.
void _createDataArray(vector< AccumType > &array)
Create an unsorted array of the complete data set.
unsigned long long uInt64
std::set< uInt64 > _medianIndices(CountedPtr< uInt64 > knownNpts)
get the index (for odd npts) or indices (for even npts) of the median of the sorted array...
PtrHolder(const PtrHolder< T > &other)
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
ClassicalStatistics< CASA_STATP > & operator=(const ClassicalStatistics< CASA_STATP > &other)
copy semantics
std::map< uInt64, AccumType > _indicesToValues(CountedPtr< uInt64 > knownNpts, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax, uInt64 maxArraySize, const std::set< uInt64 > &dataIndices, Bool persistSortedArray, uInt64 nBins)
get the values for the specified indices in the sorted array of all good data
Bool _getDoMaxMin() const
std::pair< Int64, Int64 > LocationType
std::map< uInt, DataRanges > _ranges
void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
void _computeBins(vector< vector< uInt64 > > &bins, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit)
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
Hold and delete pointers not deleted by object destructors.
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
Int64 _getIDataset() const
vector< vector< uInt64 > > _binCounts(vector< CountedPtr< AccumType > > &sameVal, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc)
tally the number of data points that fall into each bin provided by binDesc Any points that are less ...
std::map< uInt, WeightsIterator > _weights
void _computeDataArray(vector< AccumType > &ary, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount)
WeightsIterator _myWeights
ALGORITHM
implemented algorithms
virtual const StatsData< AccumType > & _getStatsData() const
virtual void _populateArrays(vector< vector< AccumType > > &arys, uInt64 ¤tCount, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount) const
Create a vector of unsorted arrays, one array for each bin defined by includeLimits.
void _addData()
Allows derived classes to do things after data is set or added.
Referenced counted pointer for constant data.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
get the median of the absolute deviation about the median of the data.
std::map< uInt, Bool > _isIncludeRanges
Bool hasData() const
Has any data been added to this object? Will return False if the object has been reset and no data ha...
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const LocationType &location)
void setDataProvider(StatsDataProvider< CASA_STATP > *dataProvider)
An exception will be thrown if setCalculateAsAdded(True) has been called.
virtual void _findBins(vector< vector< uInt64 > > &binCounts, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit) const
Get the counts of data within the specified histogram bins.
static void _convertToAbsDevMedArray(vector< AccumType > &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the vector and the median ...
void _computeDataArrays(vector< vector< AccumType > > &arys, uInt64 ¤tCount, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount)
void _initThreadVars(uInt &nBlocks, uInt64 &extra, uInt &nthreads, PtrHolder< DataIterator > &dataIter, PtrHolder< MaskIterator > &maskIter, PtrHolder< WeightsIterator > &weightsIter, PtrHolder< uInt64 > &offset, uInt nThreadsMax) const
#define DataRanges
Commonly used types in statistics framework.
bool Bool
Define the standard types used by Casacore.
virtual Bool _populateTestArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride, uInt maxElements) const
no weights, no mask, no ranges
virtual void setCalculateAsAdded(Bool c)
Should statistics be updated with calls to addData or should they only be calculated upon calls to ge...
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
Get the specified quantiles.
uInt _nThreadsMax() const
std::map< uInt, MaskIterator > _masks
void _createDataArrays(vector< vector< AccumType > > &arrays, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount)
static void _makeBins(typename StatisticsUtilities< AccumType >::BinDesc &bins, AccumType minData, AccumType maxData, uInt maxBins, Bool allowPad)
If allowPad is True, then pad the lower side of the lowest bin and the higher side of the highest bin...
virtual ~ClassicalStatistics()
vector< uInt >::const_iterator _dsiter
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, Int64 nr, uInt dataStride)
no weights, no mask, no ranges
StatsData< AccumType > _statsData
vector< std::map< uInt64, AccumType > > _dataFromSingleBins(const vector< uInt64 > &binNpts, uInt64 maxArraySize, const vector< std::pair< AccumType, AccumType > > &binLimits, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
Bool _increment(Bool includeIDataset)
increment the relevant loop counters
virtual void reset()
reset object to initial state.
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
vector< Int64 >::const_iterator _citer
const Double c
Fundamental physical constants (SI units):
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
static void _mergeResults(vector< vector< uInt64 > > &bins, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const PtrHolder< vector< vector< uInt64 > > > &tBins, const PtrHolder< vector< CountedPtr< AccumType > > > &tSameVal, const PtrHolder< vector< Bool > > &tAllSame, uInt nThreadsMax)
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, Int64 nr, uInt dataStride)
has weights, but no mask, no ranges
void _incrementThreadIters(DataIterator &dataIter, MaskIterator &maskIter, WeightsIterator &weightsIter, uInt64 &offset, uInt nthreads) const
increment thread-based iterators
Bool _isNptsSmallerThan(vector< AccumType > &arrayToSort, uInt maxArraySize)
Determine by scanning the dataset if the number of good points is smaller than maxArraySize.
virtual void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
virtual void _populateArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
populate an unsorted array with valid data.
virtual StatsData< AccumType > _getInitialStats() const
void _computeStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count)
Base class of statistics algorithm class hierarchy.
this file contains all the compiler specific defines
description of a regularly spaced bins with the first bin having lower limit of minLimit and having n...