source: trunk/src/Utils/Statistics.cc @ 1152

Last change on this file since 1152 was 1152, checked in by MatthewWhiting, 11 years ago

Ticket #173 - Largely implemented. Added Detection::eIntFlux and associated accessors, together with calculations thereof (from within Cube::calcObjectWCSparams()). Also included in the output (except not for the case of a specified threshold, since we don't calculate the noise then - now also treat S/Nmax in this way too). eIntFlux is not printed to the screen in normal output, but is written to the results file.

File size: 18.1 KB
Line 
1// -----------------------------------------------------------------------
2// Statistics.cc: Member functions for the templated StatsContainer
3//                class.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
29#include <iostream>
30#include <fstream>
31#include <duchamp/Utils/Statistics.hh>
32#include <duchamp/Utils/utils.hh>
33
34namespace Statistics
35{
36
37  template <class T>
38  float madfmToSigma(T madfm){
39    return float(madfm)/correctionFactor;
40  }
41  template float madfmToSigma<int>(int madfm);
42  template float madfmToSigma<long>(long madfm);
43  template float madfmToSigma<float>(float madfm);
44  template float madfmToSigma<double>(double madfm);
45
46  //--------------------------------------------------------------------
47  float sigmaToMADFM(float sigma){
48    return float(sigma)*correctionFactor;
49  }
50  //--------------------------------------------------------------------
51  //--------------------------------------------------------------------
52
53    template <class Type>
54    StatsContainer<Type>::StatsContainer()
55    {
56        defined = false;
57        mean=Type(0);
58        stddev=Type(0);
59        median=Type(0);
60        madfm=Type(0);
61        threshold=Type(0);
62        pThreshold=Type(0);
63        useRobust=true;
64        useFDR=false;
65        commentString="";
66    }
67    template StatsContainer<int>::StatsContainer();
68    template StatsContainer<long>::StatsContainer();
69    template StatsContainer<float>::StatsContainer();
70    template StatsContainer<double>::StatsContainer();
71  //--------------------------------------------------------------------
72
73
74  template <class Type>
75  StatsContainer<Type>::StatsContainer(const StatsContainer<Type>& s)
76  {
77    ///  @details
78    ///  The copy constructor for the StatsContainer class. Just uses
79    ///  the assignment operator.
80
81    operator=(s);
82  }
83  template StatsContainer<int>::StatsContainer(const StatsContainer<int>& s);
84  template StatsContainer<long>::StatsContainer(const StatsContainer<long>& s);
85  template StatsContainer<float>::StatsContainer(const StatsContainer<float>& s);
86  template StatsContainer<double>::StatsContainer(const StatsContainer<double>& s);
87  //--------------------------------------------------------------------
88
89  template <class Type>
90  StatsContainer<Type>&
91  StatsContainer<Type>::operator= (const StatsContainer<Type>& s)
92  {
93    ///  The assignment operator for the StatsContainer class.
94
95    if(this == &s) return *this;
96    this->defined    = s.defined;
97    this->mean       = s.mean;
98    this->stddev     = s.stddev;
99    this->median     = s.median;
100    this->madfm      = s.madfm;
101    this->threshold  = s.threshold;
102    this->pThreshold = s.pThreshold;
103    this->useRobust  = s.useRobust;
104    this->useFDR     = s.useFDR;
105    this->commentString = s.commentString;
106    return *this;
107  }
108  template StatsContainer<int>& StatsContainer<int>::operator= (const StatsContainer<int>& s);
109  template StatsContainer<long>& StatsContainer<long>::operator= (const StatsContainer<long>& s);
110  template StatsContainer<float>& StatsContainer<float>::operator= (const StatsContainer<float>& s);
111  template StatsContainer<double>& StatsContainer<double>::operator= (const StatsContainer<double>& s);
112  //--------------------------------------------------------------------
113
114  template <class Type>
115  float StatsContainer<Type>::getThresholdSNR()
116  {
117    ///  @details
118    /// The SNR is defined in terms of excess over the middle estimator
119    /// in units of the spread estimator.
120    return (threshold - this->getMiddle())/this->getSpread();
121  }
122  template float StatsContainer<int>::getThresholdSNR();
123  template float StatsContainer<long>::getThresholdSNR();
124  template float StatsContainer<float>::getThresholdSNR();
125  template float StatsContainer<double>::getThresholdSNR();
126  //--------------------------------------------------------------------
127
128  template <class Type>
129  void  StatsContainer<Type>::setThresholdSNR(float snr)
130  {
131    ///  @details
132    /// The SNR is defined in terms of excess over the middle estimator
133    /// in units of the spread estimator.
134      if(this->defined)
135          threshold=this->getMiddle() + snr*this->getSpread();
136  }
137  template void StatsContainer<int>::setThresholdSNR(float snr);
138  template void StatsContainer<long>::setThresholdSNR(float snr);
139  template void StatsContainer<float>::setThresholdSNR(float snr);
140  template void StatsContainer<double>::setThresholdSNR(float snr);
141  //--------------------------------------------------------------------
142 
143  template <class Type>
144  float StatsContainer<Type>::valueToSNR(float value)
145  {
146    ///  @details
147    /// The SNR is defined in terms of excess over the middle estimator
148    /// in units of the spread estimator.
149      if(this->defined)
150          return (value - this->getMiddle())/this->getSpread();
151      else
152          return Type(0);
153  }
154  template float StatsContainer<int>::valueToSNR(float value);
155  template float StatsContainer<long>::valueToSNR(float value);
156  template float StatsContainer<float>::valueToSNR(float value);
157  template float StatsContainer<double>::valueToSNR(float value);
158  //--------------------------------------------------------------------
159 
160  template <class Type>
161  float StatsContainer<Type>::snrToValue(float snr)
162  {
163    ///  @details
164    /// The SNR is defined in terms of excess over the middle estimator
165    /// in units of the spread estimator.
166    return snr * this->getSpread() + this->getMiddle();
167  }
168  template float StatsContainer<int>::snrToValue(float value);
169  template float StatsContainer<long>::snrToValue(float value);
170  template float StatsContainer<float>::snrToValue(float value);
171  template float StatsContainer<double>::snrToValue(float value);
172  //--------------------------------------------------------------------
173   
174  template <class Type>
175  void StatsContainer<Type>::setMiddle(float middle)
176  {
177    ///  @details
178    /// The middle value is determined by the StatsContainer::useRobust
179    /// flag -- it will be either the median (if true), or the mean (if
180    /// false).
181    if(useRobust) this->median = Type(middle);
182    else this->mean = middle;
183  }
184  template void StatsContainer<int>::setMiddle(float middle);
185  template void StatsContainer<long>::setMiddle(float middle);
186  template void StatsContainer<float>::setMiddle(float middle);
187  template void StatsContainer<double>::setMiddle(float middle);
188  //--------------------------------------------------------------------
189   
190  template <class Type>
191  float StatsContainer<Type>::getMiddle()
192  {
193    ///  @details
194    /// The middle value is determined by the StatsContainer::useRobust
195    /// flag -- it will be either the median (if true), or the mean (if
196    /// false).
197    if(useRobust) return float(this->median);
198    else return this->mean;
199  }
200  template float StatsContainer<int>::getMiddle();
201  template float StatsContainer<long>::getMiddle();
202  template float StatsContainer<float>::getMiddle();
203  template float StatsContainer<double>::getMiddle();
204  //--------------------------------------------------------------------
205   
206  template <class Type>
207  void StatsContainer<Type>::setSpread(float spread)
208  {
209    ///  @details
210    /// The spread value is set according to the
211    /// StatsContainer::useRobust flag -- it will be either the madfm
212    /// (if true), or the rms (if false). If robust, the spread value will be
213    /// converted to a madfm from an equivalent rms under the assumption of
214    /// Gaussianity, using the Statistics::sigmaToMADFM function.
215    if(useRobust) this->madfm = Type(sigmaToMADFM(spread));
216    else this->stddev = spread;
217  }
218  template void StatsContainer<int>::setSpread(float spread);
219  template void StatsContainer<long>::setSpread(float spread);
220  template void StatsContainer<float>::setSpread(float spread);
221  template void StatsContainer<double>::setSpread(float spread);
222  //--------------------------------------------------------------------
223   
224  template <class Type>
225  float StatsContainer<Type>::getSpread()
226  {
227    ///  @details
228    /// The spread value returned is determined by the
229    /// StatsContainer::useRobust flag -- it will be either the madfm
230    /// (if true), or the rms (if false). If robust, the madfm will be
231    /// converted to an equivalent rms under the assumption of
232    /// Gaussianity, using the Statistics::madfmToSigma function.
233    if(useRobust) return madfmToSigma(this->madfm);
234    else return this->stddev;
235  }
236  template float StatsContainer<int>::getSpread();
237  template float StatsContainer<long>::getSpread();
238  template float StatsContainer<float>::getSpread();
239  template float StatsContainer<double>::getSpread();
240  //--------------------------------------------------------------------
241   
242  template <class Type>
243  void  StatsContainer<Type>::scaleNoise(float scale)
244  {
245    /// @details
246    ///  Multiply the noise parameters (stddev & madfm) by a given
247    ///  factor, and adjust the threshold.
248    float snr = (threshold - this->getMiddle())/this->getSpread();   
249    this->madfm  = Type(this->madfm*scale);
250    this->stddev *= scale;
251    this->threshold = this->getMiddle() + snr*this->getSpread();
252  }
253  template void StatsContainer<int>::scaleNoise(float scale);
254  template void StatsContainer<long>::scaleNoise(float scale);
255  template void StatsContainer<float>::scaleNoise(float scale);
256  template void StatsContainer<double>::scaleNoise(float scale);
257 //--------------------------------------------------------------------
258
259  template <class Type>
260  float StatsContainer<Type>::getPValue(float value)
261  {
262    ///  @details
263    /// Get the "probability", under the assumption of normality, of a
264    /// value occuring. 
265    ///
266    /// It is defined by \f$0.5 \operatorname{erfc}(z/\sqrt{2})\f$, where
267    /// \f$z=(x-\mu)/\sigma\f$. We need the factor of 0.5 here, as we are
268    /// only considering the positive tail of the distribution -- we
269    /// don't care about negative detections.
270   
271    float zStat = (value - this->getMiddle()) / this->getSpread();
272    return 0.5 * erfc( zStat / M_SQRT2 );
273  }
274  template float StatsContainer<int>::getPValue(float value);
275  template float StatsContainer<long>::getPValue(float value);
276  template float StatsContainer<float>::getPValue(float value);
277  template float StatsContainer<double>::getPValue(float value);
278  //--------------------------------------------------------------------
279   
280  template <class Type>
281  bool StatsContainer<Type>::isDetection(float value)
282  {
283    ///  @details
284    /// Compares the value given to the correct threshold, depending on
285    /// the value of the StatsContainer::useFDR flag.
286    if(useFDR) return (this->getPValue(value) < this->pThreshold);
287    else       return (value > this->threshold);
288  }
289  template bool StatsContainer<int>::isDetection(float value);
290  template bool StatsContainer<long>::isDetection(float value);
291  template bool StatsContainer<float>::isDetection(float value);
292  template bool StatsContainer<double>::isDetection(float value);
293  //--------------------------------------------------------------------
294
295  template <class Type>
296  void StatsContainer<Type>::calculate(Type *array, long size)
297  {
298    /// @details
299    /// Calculate all four statistics for all elements of a given
300    /// array.
301    ///
302    /// \param array The input data array.
303    /// \param size The length of the input array
304//     findNormalStats(array, size, this->mean, this->stddev);
305//     findMedianStats(array, size, this->median, this->madfm);
306    findAllStats(array,size,this->mean,this->stddev,this->median,this->madfm);
307    this->defined = true;
308  }
309  template void StatsContainer<int>::calculate(int *array, long size);
310  template void StatsContainer<long>::calculate(long *array, long size);
311  template void StatsContainer<float>::calculate(float *array, long size);
312  template void StatsContainer<double>::calculate(double *array, long size);
313  //--------------------------------------------------------------------
314
315  template <class Type>
316  void StatsContainer<Type>::calculate(Type *array, long size, bool *mask)
317  {
318    /// @details
319    /// Calculate all four statistics for a subset of a given
320    /// array. The subset is defined by an array of bool
321    /// variables. 
322    ///
323    /// \param array The input data array.
324    /// \param size The length of the input array
325    /// \param mask An array of the same length that says whether to
326    /// include each member of the array in the calculations. Use a
327    /// value if mask=true.
328//     findNormalStats(array, size, mask, this->mean, this->stddev);
329//     findMedianStats(array, size, mask, this->median, this->madfm);
330    findAllStats(array, size, mask,
331                 this->mean, this->stddev, this->median, this->madfm);
332    this->defined = true;
333  }
334  template void StatsContainer<int>::calculate(int *array, long size, bool *mask);
335  template void StatsContainer<long>::calculate(long *array, long size, bool *mask);
336  template void StatsContainer<float>::calculate(float *array, long size, bool *mask);
337  template void StatsContainer<double>::calculate(double *array, long size, bool *mask);
338  //--------------------------------------------------------------------
339
340  template <class Type>
341  std::ostream& operator<< (std::ostream& theStream, StatsContainer<Type> &s)
342  {
343    /// Prints out the four key statistics to the requested stream.
344    theStream << s.commentString << " "
345              << "Mean   = "   << s.mean   << "\t"
346              << "Std.Dev. = " << s.stddev << "\n"
347              << s.commentString << " "
348              << "Median = "   << s.median << "\t"
349              << "MADFM    = " << s.madfm  << " (= " << madfmToSigma(s.madfm) << " as std.dev.)\n";
350    return theStream;
351  }
352  template std::ostream& operator<<<int> (std::ostream& theStream, StatsContainer<int> &s);
353  template std::ostream& operator<<<long> (std::ostream& theStream, StatsContainer<long> &s);
354  template std::ostream& operator<<<float> (std::ostream& theStream, StatsContainer<float> &s);
355  template std::ostream& operator<<<double> (std::ostream& theStream, StatsContainer<double> &s);
356
357  //--------------------------------------------------------------------
358
359  template <class Type>
360  void StatsContainer<Type>::writeToBinaryFile(std::string filename)
361  {
362    std::ofstream outfile(filename.c_str(), std::ios::out | std::ios::binary | std::ios::app);
363    outfile.write(reinterpret_cast<const char*>(&this->defined), sizeof this->defined);
364    outfile.write(reinterpret_cast<const char*>(&this->mean), sizeof this->mean);
365    outfile.write(reinterpret_cast<const char*>(&this->stddev), sizeof this->stddev);
366    outfile.write(reinterpret_cast<const char*>(&this->median), sizeof this->median);
367    outfile.write(reinterpret_cast<const char*>(&this->madfm), sizeof this->madfm);
368    outfile.write(reinterpret_cast<const char*>(&this->threshold), sizeof this->threshold);
369    outfile.write(reinterpret_cast<const char*>(&this->pThreshold), sizeof this->pThreshold);
370    outfile.write(reinterpret_cast<const char*>(&this->useRobust), sizeof this->useRobust);
371    outfile.write(reinterpret_cast<const char*>(&this->useFDR), sizeof this->useFDR);
372    writeStringToBinaryFile(outfile,this->commentString);
373    outfile.close();
374  }
375  template void StatsContainer<int>::writeToBinaryFile(std::string filename);
376  template void StatsContainer<long>::writeToBinaryFile(std::string filename);
377  template void StatsContainer<float>::writeToBinaryFile(std::string filename);
378  template void StatsContainer<double>::writeToBinaryFile(std::string filename);
379
380 //--------------------------------------------------------------------
381
382  template <class Type>
383  std::streampos StatsContainer<Type>::readFromBinaryFile(std::string filename, std::streampos loc)
384  {
385    std::ifstream infile(filename.c_str(), std::ios::in | std::ios::binary);
386    if(!infile.is_open()){
387      // DUCHAMPERROR("read binary stats","Could not open binary catalogue \""<<filename <<"\"");
388      std::cerr << "Read binary stats : Could not open binary catalogue \"" << filename << "\"\n";
389      return -1;
390    }
391    infile.seekg(loc);
392    infile.read(reinterpret_cast<char*>(&this->defined), sizeof this->defined);
393    infile.read(reinterpret_cast<char*>(&this->mean), sizeof this->mean);
394    infile.read(reinterpret_cast<char*>(&this->stddev), sizeof this->stddev);
395    infile.read(reinterpret_cast<char*>(&this->median), sizeof this->median);
396    infile.read(reinterpret_cast<char*>(&this->madfm), sizeof this->madfm);
397    infile.read(reinterpret_cast<char*>(&this->threshold), sizeof this->threshold);
398    infile.read(reinterpret_cast<char*>(&this->pThreshold), sizeof this->pThreshold);
399    infile.read(reinterpret_cast<char*>(&this->useRobust), sizeof this->useRobust);
400    infile.read(reinterpret_cast<char*>(&this->useFDR), sizeof this->useFDR);
401    this->commentString=readStringFromBinaryFile(infile);
402    std::streampos newloc = infile.tellg();
403    infile.close();
404    return newloc;
405  }
406  template std::streampos StatsContainer<int>::readFromBinaryFile(std::string filename, std::streampos loc);
407  template std::streampos StatsContainer<long>::readFromBinaryFile(std::string filename, std::streampos loc);
408  template std::streampos StatsContainer<float>::readFromBinaryFile(std::string filename, std::streampos loc);
409  template std::streampos StatsContainer<double>::readFromBinaryFile(std::string filename, std::streampos loc);
410
411
412
413}  // matches:  namespace Statistics {
Note: See TracBrowser for help on using the repository browser.