1 | #ifndef STATS_H |
---|
2 | #define STATS_H |
---|
3 | |
---|
4 | #include <iostream> |
---|
5 | #include <math.h> |
---|
6 | #include <Utils/utils.hh> |
---|
7 | |
---|
8 | /** |
---|
9 | * A namespace to control everything to do with statistical |
---|
10 | * calculations. |
---|
11 | */ |
---|
12 | |
---|
13 | namespace Statistics |
---|
14 | { |
---|
15 | |
---|
16 | /** Divide by the following correction factor to convert from MADFM |
---|
17 | to sigma (rms) estimator. */ |
---|
18 | const float correctionFactor = 0.6744888; |
---|
19 | |
---|
20 | /** Multiply by the following correction factor to convert from |
---|
21 | trimmedSigma to sigma estimator. */ |
---|
22 | const double trimToNormal = 1.17036753077; |
---|
23 | |
---|
24 | /** A templated function to do the MADFM-to-rms conversion. */ |
---|
25 | template <class T> float madfmToSigma(T madfm){ |
---|
26 | return float(madfm)/correctionFactor; |
---|
27 | }; |
---|
28 | template float madfmToSigma<int>(int madfm); |
---|
29 | template float madfmToSigma<long>(long madfm); |
---|
30 | template float madfmToSigma<float>(float madfm); |
---|
31 | template float madfmToSigma<double>(double madfm); |
---|
32 | |
---|
33 | /** |
---|
34 | * Class to hold statistics for a given set of values. |
---|
35 | * |
---|
36 | * This class is designed to hold all useful statistics for a given |
---|
37 | * array of numbers. It does *not* hold the data themselves. It |
---|
38 | * provides the functions to calculate mean, rms, median and MADFM |
---|
39 | * (median absolute deviation from median), as well as functions to |
---|
40 | * control detection (ie. defining thresholds) in both standard |
---|
41 | * (sigma-clipping) cases and False Detection Rate scenarios. |
---|
42 | */ |
---|
43 | |
---|
44 | template <class Type> |
---|
45 | class StatsContainer |
---|
46 | { |
---|
47 | public: |
---|
48 | StatsContainer(){useRobust=true; defined=false; useFDR=false;}; |
---|
49 | virtual ~StatsContainer(){}; |
---|
50 | StatsContainer(const StatsContainer<Type>& s); |
---|
51 | StatsContainer<Type>& operator= (const StatsContainer<Type>& s); |
---|
52 | |
---|
53 | /** A way of printing the statistics. */ |
---|
54 | template <class T> |
---|
55 | friend std::ostream& operator<< ( std::ostream& theStream, StatsContainer<T> &s); |
---|
56 | |
---|
57 | float getMean(){return mean;}; |
---|
58 | void setMean(float f){mean=f;}; |
---|
59 | float getStddev(){return stddev;}; |
---|
60 | void setStddev(float f){stddev=f;}; |
---|
61 | Type getMedian(){return median;}; |
---|
62 | void setMedian(Type f){median=f;}; |
---|
63 | Type getMadfm(){return madfm;}; |
---|
64 | void setMadfm(Type f){madfm=f;}; |
---|
65 | float getThreshold(){return threshold;}; |
---|
66 | void setThreshold(float f){threshold=f;}; |
---|
67 | float getPThreshold(){return pThreshold;}; |
---|
68 | void setPThreshold(float f){pThreshold=f;}; |
---|
69 | bool getRobust(){return useRobust;}; |
---|
70 | void setRobust(bool b){useRobust=b;}; |
---|
71 | bool setUseFDR(){return useFDR;}; |
---|
72 | void setUseFDR(bool b){useFDR=b;}; |
---|
73 | |
---|
74 | /** |
---|
75 | * Return the threshold as a signal-to-noise ratio. |
---|
76 | * |
---|
77 | * The SNR is defined in terms of excess over the middle estimator |
---|
78 | * in units of the spread estimator. |
---|
79 | */ |
---|
80 | float getThresholdSNR(){ |
---|
81 | return (threshold - this->getMiddle())/this->getSpread();}; |
---|
82 | |
---|
83 | /** |
---|
84 | * Set the threshold in units of a signal-to-noise ratio. |
---|
85 | * |
---|
86 | * The SNR is defined in terms of excess over the middle estimator |
---|
87 | * in units of the spread estimator. |
---|
88 | */ |
---|
89 | void setThresholdSNR(float snr){ |
---|
90 | threshold=this->getMiddle() + snr*this->getSpread();}; |
---|
91 | |
---|
92 | /** |
---|
93 | * Return the estimator of the middle value of the data. |
---|
94 | * |
---|
95 | * The middle value is determined by the StatsContainer::useRobust |
---|
96 | * flag -- it will be either the median (if true), or the mean (if |
---|
97 | * false). |
---|
98 | */ |
---|
99 | float getMiddle(){if(useRobust) return float(median); else return mean;}; |
---|
100 | |
---|
101 | /** |
---|
102 | * Return the estimator of the amount of spread of the data. |
---|
103 | * |
---|
104 | * The spread value returned is determined by the |
---|
105 | * StatsContainer::useRobust flag -- it will be either the madfm |
---|
106 | * (if true), or the rms (if false). If robust, the madfm will be |
---|
107 | * converted to an equivalent rms under the assumption of |
---|
108 | * Gaussianity, using the Statistics::madfmToSigma function. |
---|
109 | */ |
---|
110 | float getSpread(){ |
---|
111 | if(useRobust) return madfmToSigma(madfm); |
---|
112 | else return stddev; |
---|
113 | }; |
---|
114 | |
---|
115 | /** Get the "probability", under the assumption of normality, of a |
---|
116 | value occuring. */ |
---|
117 | float getPValue(float value){ |
---|
118 | float zStat = (value - this->getMiddle()) / this->getSpread(); |
---|
119 | return 0.5 * erfc( zStat / M_SQRT2 ); |
---|
120 | }; |
---|
121 | |
---|
122 | /** Is a value above the threshold? */ |
---|
123 | bool isDetection(float value){ |
---|
124 | if(useFDR) return (this->getPValue(value) < this->pThreshold); |
---|
125 | else return (value > this->threshold); |
---|
126 | }; |
---|
127 | |
---|
128 | // Functions to calculate the stats for a given array. |
---|
129 | // The idea here is that there are two options to do the calculations: |
---|
130 | // *The first just uses all the points in the array. If you need to |
---|
131 | // remove BLANK points (or something similar), do this beforehand. |
---|
132 | // *Alternatively, construct a bool array of the same size, showing which |
---|
133 | // points are good, and use the second option. |
---|
134 | |
---|
135 | /** Calculate statistics for all elements of a data array */ |
---|
136 | void calculate(Type *array, long size); |
---|
137 | |
---|
138 | /** Calculate statistics for a subset of a data array */ |
---|
139 | void calculate(Type *array, long size, bool *isGood); |
---|
140 | |
---|
141 | private: |
---|
142 | bool defined; // a flag indicating whether the stats are defined. |
---|
143 | |
---|
144 | float mean; ///< The mean, or average, of the values. |
---|
145 | float stddev; ///< The standard deviation, or R.M.S., or sigma... |
---|
146 | Type median; ///< The median of the values. |
---|
147 | Type madfm; ///< The median absolute deviation from the median |
---|
148 | |
---|
149 | float threshold; ///< a threshold for simple sigma-clipping |
---|
150 | float pThreshold; ///< a threshold for the FDR case -- the |
---|
151 | /// upper limit of P values that detected |
---|
152 | /// pixels can have. |
---|
153 | bool useRobust; ///< whether we use the two robust stats or not |
---|
154 | bool useFDR; ///< whether the FDR method is used for |
---|
155 | /// determining a detection |
---|
156 | |
---|
157 | }; |
---|
158 | |
---|
159 | } |
---|
160 | |
---|
161 | #endif /*STATS_H*/ |
---|