[301] | 1 | // ----------------------------------------------------------------------- |
---|
| 2 | // Statistics.hh: Definition of the StatsContainer class, that holds |
---|
| 3 | // statistical parameters such as mean, median, rms, madfm. |
---|
| 4 | // ----------------------------------------------------------------------- |
---|
| 5 | // Copyright (C) 2006, Matthew Whiting, ATNF |
---|
| 6 | // |
---|
| 7 | // This program is free software; you can redistribute it and/or modify it |
---|
| 8 | // under the terms of the GNU General Public License as published by the |
---|
| 9 | // Free Software Foundation; either version 2 of the License, or (at your |
---|
| 10 | // option) any later version. |
---|
| 11 | // |
---|
| 12 | // Duchamp is distributed in the hope that it will be useful, but WITHOUT |
---|
| 13 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
| 14 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
| 15 | // for more details. |
---|
| 16 | // |
---|
| 17 | // You should have received a copy of the GNU General Public License |
---|
| 18 | // along with Duchamp; if not, write to the Free Software Foundation, |
---|
| 19 | // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA |
---|
| 20 | // |
---|
| 21 | // Correspondence concerning Duchamp may be directed to: |
---|
| 22 | // Internet email: Matthew.Whiting [at] atnf.csiro.au |
---|
| 23 | // Postal address: Dr. Matthew Whiting |
---|
| 24 | // Australia Telescope National Facility, CSIRO |
---|
| 25 | // PO Box 76 |
---|
| 26 | // Epping NSW 1710 |
---|
| 27 | // AUSTRALIA |
---|
| 28 | // ----------------------------------------------------------------------- |
---|
[190] | 29 | #ifndef STATS_H |
---|
| 30 | #define STATS_H |
---|
| 31 | |
---|
[201] | 32 | #include <iostream> |
---|
[1146] | 33 | #include <fstream> |
---|
[1413] | 34 | #include <vector> |
---|
[190] | 35 | #include <math.h> |
---|
| 36 | |
---|
[528] | 37 | /// A namespace to control everything to do with statistical |
---|
| 38 | /// calculations. |
---|
[221] | 39 | |
---|
[190] | 40 | namespace Statistics |
---|
| 41 | { |
---|
| 42 | |
---|
[528] | 43 | /// @brief Divide by the following correction factor to convert from MADFM to sigma (rms) estimator. |
---|
[190] | 44 | const float correctionFactor = 0.6744888; |
---|
[221] | 45 | |
---|
[528] | 46 | /// @brief Multiply by the following correction factor to convert from trimmedSigma to sigma estimator. |
---|
[190] | 47 | const double trimToNormal = 1.17036753077; |
---|
| 48 | |
---|
[528] | 49 | /// @brief A templated function to do the MADFM-to-rms conversion. |
---|
[266] | 50 | template <class T> float madfmToSigma(T madfm); |
---|
[528] | 51 | /// @brief A non-templated function to do the rms-to-MADFM conversion. |
---|
[282] | 52 | float sigmaToMADFM(float sigma); |
---|
[190] | 53 | |
---|
[266] | 54 | |
---|
[528] | 55 | /// @brief |
---|
| 56 | /// Class to hold statistics for a given set of values. |
---|
| 57 | /// @details |
---|
| 58 | /// This class is designed to hold all useful statistics for a given |
---|
| 59 | /// array of numbers. It does *not* hold the data themselves. It |
---|
| 60 | /// provides the functions to calculate mean, rms, median and MADFM |
---|
| 61 | /// (median absolute deviation from median), as well as functions to |
---|
| 62 | /// control detection (ie. defining thresholds) in both standard |
---|
| 63 | /// (sigma-clipping) cases and False Detection Rate scenarios. |
---|
[221] | 64 | |
---|
[190] | 65 | template <class Type> |
---|
| 66 | class StatsContainer |
---|
| 67 | { |
---|
| 68 | public: |
---|
[1152] | 69 | StatsContainer(); |
---|
[190] | 70 | virtual ~StatsContainer(){}; |
---|
[192] | 71 | StatsContainer(const StatsContainer<Type>& s); |
---|
| 72 | StatsContainer<Type>& operator= (const StatsContainer<Type>& s); |
---|
[221] | 73 | |
---|
[528] | 74 | /// @brief A way of printing the statistics. |
---|
[222] | 75 | template <class T> |
---|
[282] | 76 | friend std::ostream& operator<< ( std::ostream& theStream, |
---|
| 77 | StatsContainer<T> &s); |
---|
[190] | 78 | |
---|
| 79 | float getMean(){return mean;}; |
---|
| 80 | void setMean(float f){mean=f;}; |
---|
| 81 | float getStddev(){return stddev;}; |
---|
| 82 | void setStddev(float f){stddev=f;}; |
---|
| 83 | Type getMedian(){return median;}; |
---|
| 84 | void setMedian(Type f){median=f;}; |
---|
| 85 | Type getMadfm(){return madfm;}; |
---|
| 86 | void setMadfm(Type f){madfm=f;}; |
---|
| 87 | float getThreshold(){return threshold;}; |
---|
| 88 | void setThreshold(float f){threshold=f;}; |
---|
| 89 | float getPThreshold(){return pThreshold;}; |
---|
| 90 | void setPThreshold(float f){pThreshold=f;}; |
---|
| 91 | bool getRobust(){return useRobust;}; |
---|
| 92 | void setRobust(bool b){useRobust=b;}; |
---|
| 93 | bool setUseFDR(){return useFDR;}; |
---|
| 94 | void setUseFDR(bool b){useFDR=b;}; |
---|
| 95 | |
---|
[528] | 96 | /// @brief Return the threshold as a signal-to-noise ratio. |
---|
[271] | 97 | float getThresholdSNR(); |
---|
[222] | 98 | |
---|
[528] | 99 | /// @brief Set the threshold in units of a signal-to-noise ratio. |
---|
[271] | 100 | void setThresholdSNR(float snr); |
---|
| 101 | |
---|
[528] | 102 | /// @brief Convert a value to a signal-to-noise ratio. |
---|
[481] | 103 | float valueToSNR(float value); |
---|
| 104 | |
---|
[528] | 105 | /// @brief Convert a signal-to-noise ratio to a flux value |
---|
[481] | 106 | float snrToValue(float snr); |
---|
[258] | 107 | |
---|
[528] | 108 | /// @brief Return the estimator of the middle value of the data. |
---|
[271] | 109 | float getMiddle(); |
---|
[475] | 110 | void setMiddle(float middle); |
---|
[271] | 111 | |
---|
[528] | 112 | /// @brief Return the estimator of the amount of spread of the data. |
---|
[271] | 113 | float getSpread(); |
---|
[475] | 114 | void setSpread(float spread); |
---|
[222] | 115 | |
---|
[528] | 116 | /// @brief Scale the noise by a given factor. |
---|
[275] | 117 | void scaleNoise(float scale); |
---|
| 118 | |
---|
[528] | 119 | /// @brief Return the Gaussian probability of a value given the stats. |
---|
[271] | 120 | float getPValue(float value); |
---|
[190] | 121 | |
---|
[528] | 122 | /// @brief Is a value above the threshold? |
---|
[271] | 123 | bool isDetection(float value); |
---|
[190] | 124 | |
---|
[1137] | 125 | /// @brief Set the comment characters |
---|
[1163] | 126 | void setCommentString(std::string comment){commentString = comment;}; |
---|
[1137] | 127 | |
---|
[190] | 128 | // Functions to calculate the stats for a given array. |
---|
| 129 | // The idea here is that there are two options to do the calculations: |
---|
| 130 | // *The first just uses all the points in the array. If you need to |
---|
| 131 | // remove BLANK points (or something similar), do this beforehand. |
---|
[282] | 132 | // *Alternatively, construct a mask array of the same size, |
---|
| 133 | // showing which points are good, and use the second option. |
---|
[222] | 134 | |
---|
[1170] | 135 | /// @brief Directly set the statistics that have been calculated elsewhere. |
---|
| 136 | void define(float mean, Type median, float stddev, Type madfm); |
---|
| 137 | |
---|
[528] | 138 | /// @brief Calculate statistics for all elements of a data array |
---|
[190] | 139 | void calculate(Type *array, long size); |
---|
[222] | 140 | |
---|
[528] | 141 | /// @brief Calculate statistics for a subset of a data array |
---|
[1413] | 142 | void calculate(Type *array, long size, std::vector<bool> mask); |
---|
[190] | 143 | |
---|
[1146] | 144 | void writeToBinaryFile(std::string filename); |
---|
| 145 | std::streampos readFromBinaryFile(std::string filename, std::streampos loc=0); |
---|
| 146 | |
---|
[190] | 147 | private: |
---|
| 148 | bool defined; // a flag indicating whether the stats are defined. |
---|
| 149 | |
---|
[258] | 150 | float mean; ///< The mean, or average, of the values. |
---|
[221] | 151 | float stddev; ///< The standard deviation, or R.M.S., or sigma... |
---|
[258] | 152 | Type median; ///< The median of the values. |
---|
[221] | 153 | Type madfm; ///< The median absolute deviation from the median |
---|
[190] | 154 | |
---|
[221] | 155 | float threshold; ///< a threshold for simple sigma-clipping |
---|
[528] | 156 | float pThreshold; ///< a threshold for the FDR case -- the upper limit of P values that detected pixels can have. |
---|
[221] | 157 | bool useRobust; ///< whether we use the two robust stats or not |
---|
[528] | 158 | bool useFDR; ///< whether the FDR method is used for determining a detection |
---|
[190] | 159 | |
---|
[1137] | 160 | std::string commentString; ///< Any comment characters etc that need to be prepended to any output via the << operator. |
---|
| 161 | |
---|
[190] | 162 | }; |
---|
| 163 | |
---|
| 164 | } |
---|
| 165 | |
---|
[531] | 166 | #endif // STATS_H |
---|