casacore
StatisticsUtilities.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 //# $Id: Array.h 21545 2015-01-22 19:36:35Z gervandiepen $
26 
27 #ifndef SCIMATH_STATISTICSUTILITIES_H
28 #define SCIMATH_STATISTICSUTILITIES_H
29 
30 #include <casacore/casa/Exceptions/Error.h>
31 #include <casacore/scimath/Mathematics/StatisticsTypes.h>
32 #include <casacore/casa/Utilities/DataType.h>
33 #include <casacore/casa/aips.h>
34 
35 #include <iostream>
36 #include <casacore/casa/iosfwd.h>
37 
38 namespace casacore {
39 
40 // Various statistics related methods for the statistics framework.
41 
42 template <class AccumType> class StatisticsUtilities {
43 public:
44 
45  // description of a regularly spaced bins with the first bin having lower limit
46  // of minLimit and having nBins equally spaced bins of width binWidth, so that
47  // the upper limit of the last bin is given by minLimit + nBins*binWidth
48  struct BinDesc {
49  AccumType binWidth;
50  AccumType minLimit;
52  };
53 
55 
56  // <group>
57  // accumulate values. It is the responsibility of the caller to keep track
58  // of the accumulated values after each call. This class does not since it
59  // has no state.
60  // The accumulation derivation for mean and variance can be found at
61  // www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
62  // nvariance is an accumulated value. It is related to the variance via
63  // variance = nvariance/npts or nvariance/(npts-1) depending on your preferred definition
64  // in the non-weighted case and
65  // wvariance = wnvariance/sumofweights or wnvariance/(sumofweights-1) in the weighted case
66  // It's basic definition is nvariance = sum((x_i - mean)**2),
67  // wnvariance = sum((weight_i*(x_i - mean)**2)
68  // npts is a Double rather than an Int64 because of compilation issues when T is a Complex
69  inline static void accumulate (
70  Double& npts, AccumType& sum, AccumType& mean, const AccumType& datum
71  );
72 
73  // in order to optimize performance, no checking is done for the weight == 0 case
74  // callers should ensure that the weigth is not zero before calling this method,
75  // and shouldn't call this method if the weight is 0. Expect a segfault because of
76  // division by zero if sumweights and weight are both zero.
77  inline static void waccumulate (
78  Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
79  const AccumType& datum, const AccumType& weight
80  );
81 
82  inline static void accumulate (
83  Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
84  AccumType& sumsq, const AccumType& datum
85  );
86 
87  // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
88  inline static void waccumulate (
89  Double& npts, AccumType& sumweights, AccumType& wsum,
90  AccumType& wmean, AccumType& wnvariance, AccumType& wsumsq,
91  const AccumType& datum, const AccumType& weight
92  );
93  // </group>
94 
95  // <group>
96  // The assignment operator of class LocationType should use copy, not reference,
97  // semantics.
98  template <class LocationType>
99  inline static void accumulate (
100  Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
101  AccumType& sumsq, AccumType& datamin,
102  AccumType& datamax, LocationType& minpos, LocationType& maxpos,
103  const AccumType& datum, const LocationType& location
104  );
105 
106  template <class LocationType, class DataType>
107  inline static void accumulate (
108  Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
109  AccumType& sumsq, DataType& datamin,
110  DataType& datamax, LocationType& minpos, LocationType& maxpos,
111  const DataType& datum, const LocationType& location
112  );
113 
114  template <class LocationType>
115  inline static void waccumulate (
116  Double& npts, AccumType& sumofweights, AccumType& sum, AccumType& mean,
117  AccumType& nvariance, AccumType& sumsq, AccumType& datamin, AccumType& datamax,
118  LocationType& minpos, LocationType& maxpos,
119  const AccumType& datum, const AccumType& weight, const LocationType& location
120  );
121 
122  // </group>
123 
124  // <group>
125  // return True if the max or min was updated, False otherwise.
126  template <class LocationType>
127  inline static Bool doMax(
128  AccumType& datamax, LocationType& maxpos, Bool isFirst,
129  const AccumType& datum, const LocationType& location
130  );
131 
132  template <class LocationType>
133  inline static Bool doMin(
134  AccumType& datamin, LocationType& minpos, Bool isFirst,
135  const AccumType& datum, const LocationType& location
136  );
137  // </group>
138 
139  // <group>
140  // These versions are for symmetric accumulation about a specified center
141  // point. The actual point is accumulated, as is a "virtual" point that is
142  // symmetric about the specified center. Of course, the trivial relationship
143  // that the mean is the specified center is used to simplify things.
144  /*
145  inline static void accumulateSym (
146  Double& npts, AccumType& sum, const AccumType& datum, const AccumType& center
147  );
148  */
149 
150  /*
151  inline static void waccumulateSym (
152  Double& npts, AccumType& sumweights, AccumType& wsum,
153  const AccumType& datum, const AccumType& weight, const AccumType& center
154  );
155  */
156 
157  inline static void accumulateSym (
158  Double& npts, AccumType& nvariance,
159  AccumType& sumsq, const AccumType& datum, const AccumType& center
160  );
161 
162  // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
163  inline static void waccumulateSym (
164  Double& npts, AccumType& sumweights,
165  AccumType& wnvariance, AccumType& wsumsq,
166  const AccumType& datum, const AccumType& weight, const AccumType& center
167  );
168 
169  // <src>maxpos</src> and <src>minpos</src> refer to actual, not
170  // virtually created, data only.
171  template <class LocationType>
172  inline static void accumulateSym (
173  Double& npts, AccumType& nvariance,
174  AccumType& sumsq, AccumType& datamin,
175  AccumType& datamax, LocationType& minpos, LocationType& maxpos,
176  const AccumType& datum, const LocationType& location, const AccumType& center
177  );
178 
179  template <class LocationType>
180  inline static void waccumulateSym (
181  Double& npts, AccumType& sumofweights,
182  AccumType& nvariance, AccumType& sumsq, AccumType& datamin, AccumType& datamax,
183  LocationType& minpos, LocationType& maxpos,
184  const AccumType& datum, const AccumType& weight, const LocationType& location,
185  const AccumType& center
186  );
187 
188  // </group>
189  // This does the obvious conversions. The Complex and DComplex versions
190  // (implemented after the class definition) are used solely to permit compilation. In general, these versions should
191  // never actually be called
192  inline static Int getInt(const AccumType& v) {
193  return (Int)v;
194  }
195 
196  inline static Bool includeDatum(
197  const AccumType& datum, typename DataRanges::const_iterator beginRange,
198  typename DataRanges::const_iterator endRange, Bool isInclude
199  );
200 
201  // use two statistics sets to get the statistics set that would
202  // result in combining the two data sets used to produce the
203  // individual statistics sets. The quantile related stats are
204  // not considered, since it is not in general possible to determine
205  // the resultant quantiles from the information provided; only
206  // the aggregate statistics make sense.
208  const vector<StatsData<AccumType> >& stats
209  );
210 
211 private:
212 
213  const static AccumType TWO;
214 
216 
217 };
218 
219 // The Complex and DComplex versions
220 // are used solely to permit compilation. In general, these versions should
221 // never actually be called
222 template<>
224  ThrowCc("This version for complex data types should never be called");
225 }
226 
227 template<>
229  ThrowCc("Logic Error: This version for complex data types should never be called");
230 }
231 
232 template <class T>
233 ostream &operator<<(ostream &os, const typename StatisticsUtilities<T>::BinDesc &desc) {
234  os << "min limit " << desc.minLimit << " bin width " << desc.binWidth
235  << " nbins " << desc.nBins;
236  return os;
237 }
238 
239 }
240 
241 #ifndef CASACORE_NO_AUTO_TEMPLATES
242 #include <casacore/scimath/Mathematics/StatisticsUtilities.tcc>
243 #endif //# CASACORE_NO_AUTO_TEMPLATES
244 
245 #endif
int Int
Definition: aipstype.h:50
static void waccumulateSym(Double &npts, AccumType &sumweights, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight, const AccumType &center)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
LatticeExprNode sum(const LatticeExprNode &expr)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, const AccumType &datum, const AccumType &weight)
in order to optimize performance, no checking is done for the weight == 0 case callers should ensure ...
std::pair< Int64, Int64 > LocationType
static Bool doMin(AccumType &datamin, LocationType &minpos, Bool isFirst, const AccumType &datum, const LocationType &location)
static Bool includeDatum(const AccumType &datum, typename DataRanges::const_iterator beginRange, typename DataRanges::const_iterator endRange, Bool isInclude)
double Double
Definition: aipstype.h:55
static Bool doMax(AccumType &datamax, LocationType &maxpos, Bool isFirst, const AccumType &datum, const LocationType &location)
return True if the max or min was updated, False otherwise.
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, const AccumType &datum, const AccumType &center)
These versions are for symmetric accumulation about a specified center point.
Various statistics related methods for the statistics framework.
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, const AccumType &datum)
accumulate values.
LatticeExprNode mean(const LatticeExprNode &expr)
#define ThrowCc(m)
Definition: Error.h:86
static Int getInt(const AccumType &v)
This does the obvious conversions.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
static StatsData< AccumType > combine(const vector< StatsData< AccumType > > &stats)
use two statistics sets to get the statistics set that would result in combining the two data sets us...
unsigned int uInt
Definition: aipstype.h:51
description of a regularly spaced bins with the first bin having lower limit of minLimit and having n...