source: trunk/src/Utils/Statistics.cc

Last change on this file was 1393, checked in by MatthewWhiting, 10 years ago

A large changeset that moves all use of bool arrays (ie. bool *) to std::vector<bool>. This will be a bit safer, plus potentially cheaper in memory usage. Interfaces to all stats functions with masks have changed accordingly, as well as the Gaussian smoothing functions and some spectral utility functions.

File size: 18.9 KB
RevLine 
[301]1// -----------------------------------------------------------------------
2// Statistics.cc: Member functions for the templated StatsContainer
3//                class.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
[201]29#include <iostream>
[1146]30#include <fstream>
[393]31#include <duchamp/Utils/Statistics.hh>
32#include <duchamp/Utils/utils.hh>
[190]33
[201]34namespace Statistics
[190]35{
[266]36
[271]37  template <class T>
38  float madfmToSigma(T madfm){
[266]39    return float(madfm)/correctionFactor;
[282]40  }
[266]41  template float madfmToSigma<int>(int madfm);
42  template float madfmToSigma<long>(long madfm);
43  template float madfmToSigma<float>(float madfm);
44  template float madfmToSigma<double>(double madfm);
45
[222]46  //--------------------------------------------------------------------
[282]47  float sigmaToMADFM(float sigma){
[275]48    return float(sigma)*correctionFactor;
[282]49  }
[222]50  //--------------------------------------------------------------------
[275]51  //--------------------------------------------------------------------
[222]52
[1152]53    template <class Type>
54    StatsContainer<Type>::StatsContainer()
55    {
56        defined = false;
57        mean=Type(0);
58        stddev=Type(0);
59        median=Type(0);
60        madfm=Type(0);
61        threshold=Type(0);
62        pThreshold=Type(0);
63        useRobust=true;
64        useFDR=false;
65        commentString="";
66    }
67    template StatsContainer<int>::StatsContainer();
68    template StatsContainer<long>::StatsContainer();
69    template StatsContainer<float>::StatsContainer();
70    template StatsContainer<double>::StatsContainer();
71  //--------------------------------------------------------------------
72
73
[222]74  template <class Type>
75  StatsContainer<Type>::StatsContainer(const StatsContainer<Type>& s)
[201]76  {
[528]77    ///  @details
78    ///  The copy constructor for the StatsContainer class. Just uses
79    ///  the assignment operator.
80
[365]81    operator=(s);
[201]82  }
83  template StatsContainer<int>::StatsContainer(const StatsContainer<int>& s);
84  template StatsContainer<long>::StatsContainer(const StatsContainer<long>& s);
85  template StatsContainer<float>::StatsContainer(const StatsContainer<float>& s);
86  template StatsContainer<double>::StatsContainer(const StatsContainer<double>& s);
87  //--------------------------------------------------------------------
[192]88
[222]89  template <class Type>
[271]90  StatsContainer<Type>&
91  StatsContainer<Type>::operator= (const StatsContainer<Type>& s)
[201]92  {
[528]93    ///  The assignment operator for the StatsContainer class.
[271]94
[270]95    if(this == &s) return *this;
[201]96    this->defined    = s.defined;
97    this->mean       = s.mean;
98    this->stddev     = s.stddev;
99    this->median     = s.median;
100    this->madfm      = s.madfm;
101    this->threshold  = s.threshold;
102    this->pThreshold = s.pThreshold;
103    this->useRobust  = s.useRobust;
104    this->useFDR     = s.useFDR;
[1137]105    this->commentString = s.commentString;
[270]106    return *this;
[201]107  }
108  template StatsContainer<int>& StatsContainer<int>::operator= (const StatsContainer<int>& s);
109  template StatsContainer<long>& StatsContainer<long>::operator= (const StatsContainer<long>& s);
110  template StatsContainer<float>& StatsContainer<float>::operator= (const StatsContainer<float>& s);
111  template StatsContainer<double>& StatsContainer<double>::operator= (const StatsContainer<double>& s);
112  //--------------------------------------------------------------------
[192]113
[222]114  template <class Type>
[271]115  float StatsContainer<Type>::getThresholdSNR()
116  {
[528]117    ///  @details
118    /// The SNR is defined in terms of excess over the middle estimator
119    /// in units of the spread estimator.
[271]120    return (threshold - this->getMiddle())/this->getSpread();
121  }
122  template float StatsContainer<int>::getThresholdSNR();
123  template float StatsContainer<long>::getThresholdSNR();
124  template float StatsContainer<float>::getThresholdSNR();
125  template float StatsContainer<double>::getThresholdSNR();
126  //--------------------------------------------------------------------
127
128  template <class Type>
129  void  StatsContainer<Type>::setThresholdSNR(float snr)
130  {
[528]131    ///  @details
132    /// The SNR is defined in terms of excess over the middle estimator
133    /// in units of the spread estimator.
[1152]134      if(this->defined)
[1170]135          this->threshold=this->getMiddle() + snr*this->getSpread();
[271]136  }
137  template void StatsContainer<int>::setThresholdSNR(float snr);
138  template void StatsContainer<long>::setThresholdSNR(float snr);
139  template void StatsContainer<float>::setThresholdSNR(float snr);
140  template void StatsContainer<double>::setThresholdSNR(float snr);
141  //--------------------------------------------------------------------
142 
143  template <class Type>
[481]144  float StatsContainer<Type>::valueToSNR(float value)
[271]145  {
[528]146    ///  @details
147    /// The SNR is defined in terms of excess over the middle estimator
148    /// in units of the spread estimator.
[1152]149      if(this->defined)
150          return (value - this->getMiddle())/this->getSpread();
151      else
152          return Type(0);
[271]153  }
[481]154  template float StatsContainer<int>::valueToSNR(float value);
155  template float StatsContainer<long>::valueToSNR(float value);
156  template float StatsContainer<float>::valueToSNR(float value);
157  template float StatsContainer<double>::valueToSNR(float value);
[271]158  //--------------------------------------------------------------------
[481]159 
160  template <class Type>
161  float StatsContainer<Type>::snrToValue(float snr)
162  {
[528]163    ///  @details
164    /// The SNR is defined in terms of excess over the middle estimator
165    /// in units of the spread estimator.
[481]166    return snr * this->getSpread() + this->getMiddle();
167  }
168  template float StatsContainer<int>::snrToValue(float value);
169  template float StatsContainer<long>::snrToValue(float value);
170  template float StatsContainer<float>::snrToValue(float value);
171  template float StatsContainer<double>::snrToValue(float value);
172  //--------------------------------------------------------------------
[271]173   
174  template <class Type>
[475]175  void StatsContainer<Type>::setMiddle(float middle)
176  {
[528]177    ///  @details
178    /// The middle value is determined by the StatsContainer::useRobust
179    /// flag -- it will be either the median (if true), or the mean (if
180    /// false).
[475]181    if(useRobust) this->median = Type(middle);
182    else this->mean = middle;
183  }
184  template void StatsContainer<int>::setMiddle(float middle);
185  template void StatsContainer<long>::setMiddle(float middle);
186  template void StatsContainer<float>::setMiddle(float middle);
187  template void StatsContainer<double>::setMiddle(float middle);
188  //--------------------------------------------------------------------
189   
190  template <class Type>
[271]191  float StatsContainer<Type>::getMiddle()
192  {
[528]193    ///  @details
194    /// The middle value is determined by the StatsContainer::useRobust
195    /// flag -- it will be either the median (if true), or the mean (if
196    /// false).
[385]197    if(useRobust) return float(this->median);
198    else return this->mean;
[271]199  }
200  template float StatsContainer<int>::getMiddle();
201  template float StatsContainer<long>::getMiddle();
202  template float StatsContainer<float>::getMiddle();
203  template float StatsContainer<double>::getMiddle();
204  //--------------------------------------------------------------------
205   
206  template <class Type>
[528]207  void StatsContainer<Type>::setSpread(float spread)
208  {
209    ///  @details
210    /// The spread value is set according to the
211    /// StatsContainer::useRobust flag -- it will be either the madfm
212    /// (if true), or the rms (if false). If robust, the spread value will be
213    /// converted to a madfm from an equivalent rms under the assumption of
214    /// Gaussianity, using the Statistics::sigmaToMADFM function.
[475]215    if(useRobust) this->madfm = Type(sigmaToMADFM(spread));
216    else this->stddev = spread;
217  }
218  template void StatsContainer<int>::setSpread(float spread);
219  template void StatsContainer<long>::setSpread(float spread);
220  template void StatsContainer<float>::setSpread(float spread);
221  template void StatsContainer<double>::setSpread(float spread);
222  //--------------------------------------------------------------------
223   
224  template <class Type>
[528]225  float StatsContainer<Type>::getSpread()
226  {
227    ///  @details
228    /// The spread value returned is determined by the
229    /// StatsContainer::useRobust flag -- it will be either the madfm
230    /// (if true), or the rms (if false). If robust, the madfm will be
231    /// converted to an equivalent rms under the assumption of
232    /// Gaussianity, using the Statistics::madfmToSigma function.
[385]233    if(useRobust) return madfmToSigma(this->madfm);
234    else return this->stddev;
[271]235  }
236  template float StatsContainer<int>::getSpread();
237  template float StatsContainer<long>::getSpread();
238  template float StatsContainer<float>::getSpread();
239  template float StatsContainer<double>::getSpread();
240  //--------------------------------------------------------------------
241   
242  template <class Type>
[275]243  void  StatsContainer<Type>::scaleNoise(float scale)
244  {
[528]245    /// @details
246    ///  Multiply the noise parameters (stddev & madfm) by a given
247    ///  factor, and adjust the threshold.
[275]248    float snr = (threshold - this->getMiddle())/this->getSpread();   
249    this->madfm  = Type(this->madfm*scale);
250    this->stddev *= scale;
251    this->threshold = this->getMiddle() + snr*this->getSpread();
252  }
253  template void StatsContainer<int>::scaleNoise(float scale);
254  template void StatsContainer<long>::scaleNoise(float scale);
255  template void StatsContainer<float>::scaleNoise(float scale);
256  template void StatsContainer<double>::scaleNoise(float scale);
257 //--------------------------------------------------------------------
258
259  template <class Type>
[271]260  float StatsContainer<Type>::getPValue(float value)
261  {
[528]262    ///  @details
263    /// Get the "probability", under the assumption of normality, of a
264    /// value occuring. 
265    ///
266    /// It is defined by \f$0.5 \operatorname{erfc}(z/\sqrt{2})\f$, where
267    /// \f$z=(x-\mu)/\sigma\f$. We need the factor of 0.5 here, as we are
268    /// only considering the positive tail of the distribution -- we
269    /// don't care about negative detections.
[271]270   
271    float zStat = (value - this->getMiddle()) / this->getSpread();
272    return 0.5 * erfc( zStat / M_SQRT2 );
273  }
274  template float StatsContainer<int>::getPValue(float value);
275  template float StatsContainer<long>::getPValue(float value);
276  template float StatsContainer<float>::getPValue(float value);
277  template float StatsContainer<double>::getPValue(float value);
278  //--------------------------------------------------------------------
279   
280  template <class Type>
[528]281  bool StatsContainer<Type>::isDetection(float value)
282  {
283    ///  @details
284    /// Compares the value given to the correct threshold, depending on
285    /// the value of the StatsContainer::useFDR flag.
[271]286    if(useFDR) return (this->getPValue(value) < this->pThreshold);
287    else       return (value > this->threshold);
288  }
289  template bool StatsContainer<int>::isDetection(float value);
290  template bool StatsContainer<long>::isDetection(float value);
291  template bool StatsContainer<float>::isDetection(float value);
292  template bool StatsContainer<double>::isDetection(float value);
293  //--------------------------------------------------------------------
294
295  template <class Type>
[1170]296  void StatsContainer<Type>::define(float mean, Type median, float stddev, Type madfm)
297  {
298    /// @details
299    /// Set all four statistics directly
300    ///
301      this->mean = mean;
302      this->median = median;
303      this->stddev = stddev;
304      this->madfm = madfm;
305      this->defined = true;
306  }
307  template void StatsContainer<int>::define(float mean, int median, float stddev, int madfm);
308  template void StatsContainer<long>::define(float mean, long median, float stddev, long madfm);
309  template void StatsContainer<float>::define(float mean, float median, float stddev, float madfm);
310  template void StatsContainer<double>::define(float mean, double median, float stddev, double madfm);
311  //--------------------------------------------------------------------
312
313  template <class Type>
[222]314  void StatsContainer<Type>::calculate(Type *array, long size)
[201]315  {
[528]316    /// @details
317    /// Calculate all four statistics for all elements of a given
318    /// array.
319    ///
320    /// \param array The input data array.
321    /// \param size The length of the input array
[275]322//     findNormalStats(array, size, this->mean, this->stddev);
323//     findMedianStats(array, size, this->median, this->madfm);
324    findAllStats(array,size,this->mean,this->stddev,this->median,this->madfm);
[201]325    this->defined = true;
326  }
[222]327  template void StatsContainer<int>::calculate(int *array, long size);
328  template void StatsContainer<long>::calculate(long *array, long size);
329  template void StatsContainer<float>::calculate(float *array, long size);
330  template void StatsContainer<double>::calculate(double *array, long size);
[201]331  //--------------------------------------------------------------------
[190]332
[222]333  template <class Type>
[1393]334  void StatsContainer<Type>::calculate(Type *array, long size, std::vector<bool> mask)
[201]335  {
[528]336    /// @details
337    /// Calculate all four statistics for a subset of a given
338    /// array. The subset is defined by an array of bool
339    /// variables. 
340    ///
341    /// \param array The input data array.
342    /// \param size The length of the input array
343    /// \param mask An array of the same length that says whether to
344    /// include each member of the array in the calculations. Use a
345    /// value if mask=true.
[275]346//     findNormalStats(array, size, mask, this->mean, this->stddev);
347//     findMedianStats(array, size, mask, this->median, this->madfm);
348    findAllStats(array, size, mask,
349                 this->mean, this->stddev, this->median, this->madfm);
[201]350    this->defined = true;
351  }
[1393]352  template void StatsContainer<int>::calculate(int *array, long size, std::vector<bool> mask);
353  template void StatsContainer<long>::calculate(long *array, long size, std::vector<bool> mask);
354  template void StatsContainer<float>::calculate(float *array, long size, std::vector<bool> mask);
355  template void StatsContainer<double>::calculate(double *array, long size, std::vector<bool> mask);
[201]356  //--------------------------------------------------------------------
[190]357
[222]358  template <class Type>
359  std::ostream& operator<< (std::ostream& theStream, StatsContainer<Type> &s)
[201]360  {
[528]361    /// Prints out the four key statistics to the requested stream.
[1137]362    theStream << s.commentString << " "
363              << "Mean   = "   << s.mean   << "\t"
[201]364              << "Std.Dev. = " << s.stddev << "\n"
[1137]365              << s.commentString << " "
[201]366              << "Median = "   << s.median << "\t"
[519]367              << "MADFM    = " << s.madfm  << " (= " << madfmToSigma(s.madfm) << " as std.dev.)\n";
[270]368    return theStream;
[201]369  }
370  template std::ostream& operator<<<int> (std::ostream& theStream, StatsContainer<int> &s);
371  template std::ostream& operator<<<long> (std::ostream& theStream, StatsContainer<long> &s);
372  template std::ostream& operator<<<float> (std::ostream& theStream, StatsContainer<float> &s);
373  template std::ostream& operator<<<double> (std::ostream& theStream, StatsContainer<double> &s);
374
[1146]375  //--------------------------------------------------------------------
[201]376
[1146]377  template <class Type>
378  void StatsContainer<Type>::writeToBinaryFile(std::string filename)
379  {
380    std::ofstream outfile(filename.c_str(), std::ios::out | std::ios::binary | std::ios::app);
381    outfile.write(reinterpret_cast<const char*>(&this->defined), sizeof this->defined);
382    outfile.write(reinterpret_cast<const char*>(&this->mean), sizeof this->mean);
383    outfile.write(reinterpret_cast<const char*>(&this->stddev), sizeof this->stddev);
384    outfile.write(reinterpret_cast<const char*>(&this->median), sizeof this->median);
385    outfile.write(reinterpret_cast<const char*>(&this->madfm), sizeof this->madfm);
386    outfile.write(reinterpret_cast<const char*>(&this->threshold), sizeof this->threshold);
387    outfile.write(reinterpret_cast<const char*>(&this->pThreshold), sizeof this->pThreshold);
388    outfile.write(reinterpret_cast<const char*>(&this->useRobust), sizeof this->useRobust);
389    outfile.write(reinterpret_cast<const char*>(&this->useFDR), sizeof this->useFDR);
390    writeStringToBinaryFile(outfile,this->commentString);
391    outfile.close();
392  }
393  template void StatsContainer<int>::writeToBinaryFile(std::string filename);
394  template void StatsContainer<long>::writeToBinaryFile(std::string filename);
395  template void StatsContainer<float>::writeToBinaryFile(std::string filename);
396  template void StatsContainer<double>::writeToBinaryFile(std::string filename);
[201]397
[1146]398 //--------------------------------------------------------------------
399
400  template <class Type>
401  std::streampos StatsContainer<Type>::readFromBinaryFile(std::string filename, std::streampos loc)
402  {
403    std::ifstream infile(filename.c_str(), std::ios::in | std::ios::binary);
404    if(!infile.is_open()){
405      // DUCHAMPERROR("read binary stats","Could not open binary catalogue \""<<filename <<"\"");
406      std::cerr << "Read binary stats : Could not open binary catalogue \"" << filename << "\"\n";
407      return -1;
408    }
409    infile.seekg(loc);
410    infile.read(reinterpret_cast<char*>(&this->defined), sizeof this->defined);
411    infile.read(reinterpret_cast<char*>(&this->mean), sizeof this->mean);
412    infile.read(reinterpret_cast<char*>(&this->stddev), sizeof this->stddev);
413    infile.read(reinterpret_cast<char*>(&this->median), sizeof this->median);
414    infile.read(reinterpret_cast<char*>(&this->madfm), sizeof this->madfm);
415    infile.read(reinterpret_cast<char*>(&this->threshold), sizeof this->threshold);
416    infile.read(reinterpret_cast<char*>(&this->pThreshold), sizeof this->pThreshold);
417    infile.read(reinterpret_cast<char*>(&this->useRobust), sizeof this->useRobust);
418    infile.read(reinterpret_cast<char*>(&this->useFDR), sizeof this->useFDR);
419    this->commentString=readStringFromBinaryFile(infile);
420    std::streampos newloc = infile.tellg();
421    infile.close();
422    return newloc;
423  }
424  template std::streampos StatsContainer<int>::readFromBinaryFile(std::string filename, std::streampos loc);
425  template std::streampos StatsContainer<long>::readFromBinaryFile(std::string filename, std::streampos loc);
426  template std::streampos StatsContainer<float>::readFromBinaryFile(std::string filename, std::streampos loc);
427  template std::streampos StatsContainer<double>::readFromBinaryFile(std::string filename, std::streampos loc);
428
429
430
[201]431}  // matches:  namespace Statistics {
Note: See TracBrowser for help on using the repository browser.