source: trunk/src/Utils/Statistics.hh

Last change on this file was 1393, checked in by MatthewWhiting, 10 years ago

A large changeset that moves all use of bool arrays (ie. bool *) to std::vector<bool>. This will be a bit safer, plus potentially cheaper in memory usage. Interfaces to all stats functions with masks have changed accordingly, as well as the Gaussian smoothing functions and some spectral utility functions.

File size: 6.5 KB
Line 
1// -----------------------------------------------------------------------
2// Statistics.hh: Definition of the StatsContainer class, that holds
3// statistical parameters such as mean, median, rms, madfm.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
29#ifndef STATS_H
30#define STATS_H
31
32#include <iostream>
33#include <fstream>
34#include <vector>
35#include <math.h>
36
37/// A namespace to control everything to do with statistical
38/// calculations.
39
40namespace Statistics
41{
42
43  /// @brief Divide by the following correction factor to convert from MADFM to sigma (rms) estimator.
44  const float correctionFactor = 0.6744888;
45
46  /// @brief Multiply by the following correction factor to convert from trimmedSigma to sigma estimator.
47  const double trimToNormal = 1.17036753077;
48
49  /// @brief A templated function to do the MADFM-to-rms conversion.
50  template <class T> float madfmToSigma(T madfm);
51  /// @brief A non-templated function to do the rms-to-MADFM conversion.
52  float sigmaToMADFM(float sigma);
53
54
55  /// @brief
56  ///  Class to hold statistics for a given set of values.
57  /// @details
58  ///  This class is designed to hold all useful statistics for a given
59  ///  array of numbers.  It does *not* hold the data themselves. It
60  ///  provides the functions to calculate mean, rms, median and MADFM
61  ///  (median absolute deviation from median), as well as functions to
62  ///  control detection (ie. defining thresholds) in both standard
63  ///  (sigma-clipping) cases and False Detection Rate scenarios.
64
65  template <class Type>
66  class StatsContainer
67  {
68  public:
69    StatsContainer();
70    virtual ~StatsContainer(){};
71    StatsContainer(const StatsContainer<Type>& s);
72    StatsContainer<Type>& operator= (const StatsContainer<Type>& s);
73
74    /// @brief A way of printing the statistics.
75    template <class T>
76    friend std::ostream& operator<< ( std::ostream& theStream,
77                                      StatsContainer<T> &s);
78
79    float getMean(){return mean;};
80    void  setMean(float f){mean=f;};
81    float getStddev(){return stddev;};
82    void  setStddev(float f){stddev=f;};
83    Type  getMedian(){return median;};
84    void  setMedian(Type f){median=f;};
85    Type  getMadfm(){return madfm;};
86    void  setMadfm(Type f){madfm=f;};
87    float getThreshold(){return threshold;};
88    void  setThreshold(float f){threshold=f;};
89    float getPThreshold(){return pThreshold;};
90    void  setPThreshold(float f){pThreshold=f;};
91    bool  getRobust(){return useRobust;};
92    void  setRobust(bool b){useRobust=b;};
93    bool  setUseFDR(){return useFDR;};
94    void  setUseFDR(bool b){useFDR=b;};
95
96    /// @brief Return the threshold as a signal-to-noise ratio.
97    float getThresholdSNR();
98
99    /// @brief Set the threshold in units of a signal-to-noise ratio.
100    void  setThresholdSNR(float snr);
101
102    /// @brief Convert a value to a signal-to-noise ratio.
103    float valueToSNR(float value);
104
105    /// @brief Convert a signal-to-noise ratio to a flux value
106    float snrToValue(float snr);
107   
108    /// @brief Return the estimator of the middle value of the data.
109    float getMiddle();
110    void  setMiddle(float middle);
111   
112    /// @brief Return the estimator of the amount of spread of the data.
113    float getSpread();
114    void  setSpread(float spread);
115
116    /// @brief Scale the noise by a given factor.
117    void  scaleNoise(float scale);
118
119    /// @brief Return the Gaussian probability of a value given the stats.
120    float getPValue(float value);
121
122    /// @brief Is a value above the threshold?
123    bool isDetection(float value);
124
125    /// @brief Set the comment characters
126    void setCommentString(std::string comment){commentString = comment;};
127
128    // Functions to calculate the stats for a given array.
129    // The idea here is that there are two options to do the calculations:
130    //   *The first just uses all the points in the array. If you need to
131    //     remove BLANK points (or something similar), do this beforehand.
132    //   *Alternatively, construct a mask array of the same size,
133    //     showing which points are good, and use the second option.
134
135      /// @brief Directly set the statistics that have been calculated elsewhere.
136      void define(float mean, Type median, float stddev, Type madfm);
137
138    /// @brief Calculate statistics for all elements of a data array
139    void calculate(Type *array, long size);
140
141    /// @brief Calculate statistics for a subset of a data array
142    void calculate(Type *array, long size, std::vector<bool> mask);
143
144    void writeToBinaryFile(std::string filename);
145    std::streampos readFromBinaryFile(std::string filename, std::streampos loc=0);
146
147  private:
148    bool   defined;      // a flag indicating whether the stats are defined.
149
150    float  mean;         ///< The mean, or average, of the values.
151    float  stddev;       ///< The standard deviation, or R.M.S., or sigma...
152    Type   median;       ///< The median of the values.
153    Type   madfm;        ///< The median absolute deviation from the median
154
155    float  threshold;    ///< a threshold for simple sigma-clipping
156    float  pThreshold;   ///< a threshold for the FDR case -- the upper limit of P values that detected pixels can have.
157    bool   useRobust;    ///< whether we use the two robust stats or not
158    bool   useFDR;       ///< whether the FDR method is used for determining a detection
159
160    std::string commentString; ///< Any comment characters etc that need to be prepended to any output via the << operator.
161
162  };
163
164}
165
166#endif // STATS_H
Note: See TracBrowser for help on using the repository browser.