source: trunk/src/Utils/getStats.cc @ 822

Last change on this file since 822 was 756, checked in by MatthewWhiting, 14 years ago

A new way to calculate MADFM, so that if we have the median we don't need to re-calculate it

File size: 18.6 KB
RevLine 
[301]1// -----------------------------------------------------------------------
2// getStats.cc: Basic functions to calculate statistical parameters
3//              such as mean, median, rms, madfm, min, max.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
[3]29#include <iostream>
[204]30#include <algorithm>
[3]31#include <math.h>
[393]32#include <duchamp/Utils/utils.hh>
[3]33
[190]34template <class T> T absval(T value)
[70]35{
[528]36  /// Type-independent way of getting the absolute value.
[190]37  if( value > 0) return value;
38  else return 0-value;
39}
[222]40template int absval<int>(int value);
41template long absval<long>(long value);
42template float absval<float>(float value);
43template double absval<double>(double value);
[190]44//--------------------------------------------------------------------
45
46template <class T> void findMinMax(const T *array, const int size,
47                                   T &min, T &max)
48{
[528]49  /// @details
50  /// A function to find the minimum and maximum values of a set of numbers.
51  /// \param array The array of data values. Type independent.
52  /// \param size The length of the array
53  /// \param min The returned value of the minimum value in the array.
54  /// \param max The returned value of the maximum value in the array.
[70]55  min = max = array[0];
56  for(int i=1;i<size;i++) {
57    if(array[i]<min) min=array[i];
58    if(array[i]>max) max=array[i];
59  }
60}
[190]61template void findMinMax<int>(const int *array, const int size,
[222]62                                 int &min, int &max);
63template void findMinMax<long>(const long *array, const int size,
64                                  long &min, long &max);
[190]65template void findMinMax<float>(const float *array, const int size,
[222]66                                   float &min, float &max);
67template void findMinMax<double>(const double *array, const int size,
68                                    double &min, double &max);
[190]69//--------------------------------------------------------------------
[70]70
[190]71template <class T> float findMean(T *array, int size)
[53]72{
[528]73  /// @details
74  /// Find the mean of an array of numbers. Type independent.
75  /// \param array The array of numbers.
76  /// \param size The length of the array.
77  /// \return The mean value of the array, returned as a float
[653]78  double mean = double(array[0]);
79  for(int i=1;i<size;i++) mean += double(array[i]);
80  mean /= double(size);
81  return float(mean);
[53]82}
[190]83template float findMean<int>(int *array, int size);
[222]84template float findMean<long>(long *array, int size);
[190]85template float findMean<float>(float *array, int size);
[222]86template float findMean<double>(double *array, int size);
[190]87//--------------------------------------------------------------------
[53]88
[190]89template <class T> float findStddev(T *array, int size)
[53]90{
[528]91  /// @details
92  /// Find the rms or standard deviation of an array of numbers. Type independent.
93  /// \param array The array of numbers.
94  /// \param size The length of the array.
95  /// \return The rms value of the array, returned as a float
[653]96  double mean = double(findMean(array,size));
97  double stddev = (double(array[0])-mean) * (double(array[0])-mean);
98  for(int i=1;i<size;i++) stddev += (double(array[i])-mean)*(double(array[i])-mean);
99  stddev = sqrt(stddev/double(size-1));
100  return float(stddev);
[53]101}
[190]102template float findStddev<int>(int *array, int size);
[222]103template float findStddev<long>(long *array, int size);
[190]104template float findStddev<float>(float *array, int size);
[222]105template float findStddev<double>(double *array, int size);
[190]106//--------------------------------------------------------------------
[53]107
[646]108template <class T> T findMedian(T *array, int size, bool changeArray)
[53]109{
[528]110  /// @details
111  /// Find the median value of an array of numbers. Type independent.
112  /// \param array The array of numbers.
113  /// \param size The length of the array.
[646]114  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered (ie. the order of elements will be changed).
[528]115  /// \return The median value of the array, returned as the same type as the array.
[646]116  T *newarray;
117  if(changeArray) newarray = array;
118  else{
119    newarray = new T[size];
120    for(int i=0;i<size;i++) newarray[i] = array[i];
121  }
[190]122  T median;
[657]123  bool isEven = ((size/2)==0);
[646]124  std::nth_element(newarray,newarray+size/2,newarray+size);
125  median = newarray[size/2];
[657]126  if(isEven){
[646]127    std::nth_element(newarray,newarray+size/2-1,newarray+size);
128    median += newarray[size/2-1];
[699]129    median /= T(2);
[646]130  }
131  if(!changeArray) delete [] newarray;
[53]132  return median;
133}
[646]134template int findMedian<int>(int *array, int size, bool changeArray);
135template long findMedian<long>(long *array, int size, bool changeArray);
136template float findMedian<float>(float *array, int size, bool changeArray);
137template double findMedian<double>(double *array, int size, bool changeArray);
[190]138//--------------------------------------------------------------------
[53]139
[646]140template <class T> T findMADFM(T *array, int size, bool changeArray)
[53]141{
[528]142  /// @details
143  /// Find the median absolute deviation from the median value of an
144  /// array of numbers. Type independent.
145  ///
146  /// \param array The array of numbers.
147  /// \param size The length of the array.
[646]148  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered - both the order and values of the elements will be changed.
[528]149  /// \return The median absolute deviation from the median value of
150  /// the array, returned as the same type as the array.
[646]151  T *newarray;
152  if(changeArray) newarray = array;
153  else newarray = new T[size];
154
155  T median = findMedian<T>(array,size,changeArray);
[190]156  T madfm;
[657]157  bool isEven = ((size/2)==0);
[190]158  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
[646]159  std::nth_element(newarray,newarray+size/2,newarray+size);
160  madfm = newarray[size/2];
[657]161  if(isEven){
[646]162    std::nth_element(newarray,newarray+size/2-1,newarray+size);
163    madfm += newarray[size/2-1];
[699]164    madfm /= T(2);
[646]165  }
166  if(!changeArray) delete [] newarray;
[53]167  return madfm;
168}
[646]169template int findMADFM<int>(int *array, int size, bool changeArray);
170template long findMADFM<long>(long *array, int size, bool changeArray);
171template float findMADFM<float>(float *array, int size, bool changeArray);
172template double findMADFM<double>(double *array, int size, bool changeArray);
[190]173//--------------------------------------------------------------------
174
[756]175template <class T> T findMADFM(T *array, int size, T median, bool changeArray)
176{
177  /// @details
178  /// Find the median absolute deviation from the median value of an
179  /// array of numbers. Type independent. This version accepts a previously-
180  /// calculated median value.
181  ///
182  /// \param array The array of numbers.
183  /// \param size The length of the array.
184  /// \param median The median of the array.
185  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered - both the order and values of the elements will be changed.
186  /// \return The median absolute deviation from the median value of
187  /// the array, returned as the same type as the array.
188  T *newarray;
189  if(changeArray) newarray = array;
190  else newarray = new T[size];
191
192  T madfm;
193  bool isEven = ((size/2)==0);
194  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
195  std::nth_element(newarray,newarray+size/2,newarray+size);
196  madfm = newarray[size/2];
197  if(isEven){
198    std::nth_element(newarray,newarray+size/2-1,newarray+size);
199    madfm += newarray[size/2-1];
200    madfm /= T(2);
201  }
202  if(!changeArray) delete [] newarray;
203  return madfm;
204}
205template int findMADFM<int>(int *array, int size, int median, bool changeArray);
206template long findMADFM<long>(long *array, int size, long median, bool changeArray);
207template float findMADFM<float>(float *array, int size, float median, bool changeArray);
208template double findMADFM<double>(double *array, int size, double median, bool changeArray);
209//--------------------------------------------------------------------
210
[190]211template <class T> void findMedianStats(T *array, int size,
212                                        T &median, T &madfm)
[3]213{
[528]214  /// @details
215  /// Find the median and the median absolute deviation from the median
216  /// value of an array of numbers. Type independent.
217  ///
218  /// \param array The array of numbers.
219  /// \param size The length of the array.
220  /// \param median The median value of the array, returned as the same
221  /// type as the array.
222  /// \param madfm The median absolute deviation from the median value
223  /// of the array, returned as the same type as the array.
[190]224  if(size==0){
225    std::cerr << "Error in findMedianStats: zero sized array!\n";
226    return;
227  }
228  T *newarray = new T[size];
[646]229   for(int i=0;i<size;i++) newarray[i] = array[i];
[3]230
[646]231   median = findMedian(newarray,size,true);
[756]232   //   madfm = findMADFM(newarray,size,true);
233   madfm = findMADFM(newarray,size,median,true);
[3]234
235  delete [] newarray;
236}
[190]237template void findMedianStats<int>(int *array, int size,
[222]238                                      int &median, int &madfm);
[190]239template void findMedianStats<long>(long *array, int size,
[222]240                                       long &median, long &madfm);
[190]241template void findMedianStats<float>(float *array, int size,
[222]242                                        float &median, float &madfm);
[190]243template void findMedianStats<double>(double *array, int size,
[222]244                                         double &median, double &madfm);
[190]245//--------------------------------------------------------------------
246
[275]247template <class T> void findMedianStats(T *array, int size, bool *mask,
[190]248                                        T &median, T &madfm)
[3]249{
[528]250  /// @details
251  /// Find the median and the median absolute deviation from the median
252  /// value of a subset of an array of numbers. The subset is defined
253  /// by an array of bool variables. Type independent.
254  ///
255  /// \param array The array of numbers.
256  /// \param size The length of the array.
257  /// \param mask An array of the same length that says whether to
258  /// include each member of the array in the calculations. Only use
259  /// values where mask=true.
260  /// \param median The median value of the array, returned as the same
261  /// type as the array.
262  /// \param madfm The median absolute deviation from the median value
263  /// of the array, returned as the same type as the array.
264
[190]265  int goodSize=0;
[275]266  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
[190]267  if(goodSize==0){
268    std::cerr << "Error in findMedianStats: no good values!\n";
269    return;
270  }
271  T *newarray = new T[goodSize];
[646]272
[648]273  goodSize=0;
[275]274  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
[646]275  median = findMedian(newarray,goodSize,true);
[756]276  //  madfm = findMADFM(newarray,goodSize,true);
277  madfm = findMADFM(newarray,goodSize,median,true);
[3]278
279  delete [] newarray;
280}
[275]281template void findMedianStats<int>(int *array, int size, bool *mask,
[222]282                                      int &median, int &madfm);
[275]283template void findMedianStats<long>(long *array, int size, bool *mask,
[222]284                                       long &median, long &madfm);
[275]285template void findMedianStats<float>(float *array, int size, bool *mask,
[222]286                                        float &median, float &madfm);
[275]287template void findMedianStats<double>(double *array, int size, bool *mask,
[222]288                                         double &median, double &madfm);
[190]289//--------------------------------------------------------------------
[3]290 
291
[190]292template <class T> void findNormalStats(T *array, int size,
293                                        float &mean, float &stddev)
[3]294{
[528]295  /// @details
296  /// Find the mean and rms or standard deviation of an array of
297  /// numbers. Type independent.
298  ///
299  /// \param array The array of numbers.
300  /// \param size The length of the array.
301  /// \param mean The mean value of the array, returned as a float.
302  /// \param stddev The rms or standard deviation of the array,
303  /// returned as a float.
304
[190]305  if(size==0){
306    std::cerr << "Error in findNormalStats: zero sized array!\n";
307    return;
308  }
[3]309  mean = array[0];
310  for(int i=1;i<size;i++){
311    mean += array[i];
312  }
313  mean /= float(size);
314
[79]315  stddev = (array[0]-mean) * (array[0]-mean);
[275]316  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
[79]317  stddev = sqrt(stddev/float(size-1));
[3]318
319}
[190]320template void findNormalStats<int>(int *array, int size,
[222]321                                      float &mean, float &stddev);
[190]322template void findNormalStats<long>(long *array, int size,
[222]323                                       float &mean, float &stddev);
[190]324template void findNormalStats<float>(float *array, int size,
[222]325                                        float &mean, float &stddev);
[190]326template void findNormalStats<double>(double *array, int size,
[222]327                                         float &mean, float &stddev);
[190]328//--------------------------------------------------------------------
[3]329
[275]330template <class T> void findNormalStats(T *array, int size, bool *mask,
[190]331                                        float &mean, float &stddev)
[70]332{
[528]333  /// @details
334  /// Find the mean and rms or standard deviation of a subset of an
335  /// array of numbers. The subset is defined by an array of bool
336  /// variables. Type independent.
337  ///
338  /// \param array The array of numbers.
339  /// \param size The length of the array.
340  /// \param mask An array of the same length that says whether to
341  /// include each member of the array in the calculations. Only look
342  /// at values where mask=true.
343  /// \param mean The mean value of the array, returned as a float.
344  /// \param stddev The rms or standard deviation of the array,
345  /// returned as a float.
346
[190]347  int goodSize=0;
[275]348  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
[190]349  if(goodSize==0){
350    std::cerr << "Error in findNormalStats: no good values!\n";
351    return;
352  }
353  int start=0;
[275]354  while(!mask[start]){start++;}
[190]355  mean = array[start];
356  for(int i=start+1;i<size;i++){
[275]357    if(mask[i]) mean += array[i];
[190]358  }
359  mean /= float(goodSize);
[70]360
[190]361  stddev = (array[start]-mean) * (array[start]-mean);
362  for(int i=1;i<size;i++){
[275]363    if(mask[i]) stddev += (array[i]-mean)*(array[i]-mean);
[190]364  }
365  stddev = sqrt(stddev/float(goodSize-1));
366
367}
[275]368template void findNormalStats<int>(int *array, int size, bool *mask,
[222]369                                      float &mean, float &stddev);
[275]370template void findNormalStats<long>(long *array, int size, bool *mask,
[222]371                                       float &mean, float &stddev);
[275]372template void findNormalStats<float>(float *array, int size, bool *mask,
[222]373                                        float &mean, float &stddev);
[275]374template void findNormalStats<double>(double *array, int size, bool *mask,
[222]375                                         float &mean, float &stddev);
[190]376//--------------------------------------------------------------------
[275]377//--------------------------------------------------------------------
378 
379
380template <class T> void findAllStats(T *array, int size,
381                                     float &mean, float &stddev,
382                                     T &median, T &madfm)
383{
[528]384  /// @details
385  /// Find the mean,rms (or standard deviation), median AND madfm of an
386  /// array of numbers. Type independent.
387  ///
388  /// \param array The array of numbers.
389  /// \param size The length of the array.
390  /// \param mean The mean value of the array, returned as a float.
391  /// \param stddev The rms or standard deviation of the array,
392  /// returned as a float.
393  /// \param median The median value of the array, returned as the same
394  /// type as the array.
395  /// \param madfm The median absolute deviation from the median value
396  /// of the array, returned as the same type as the array.
397
[275]398  if(size==0){
[285]399    std::cerr << "Error in findAllStats: zero sized array!\n";
[275]400    return;
401  }
402
403  T *newarray = new T[size];
404
405  for(int i=0;i<size;i++) newarray[i] = array[i];
[285]406
[646]407  mean = findMean(newarray,size);
408  stddev = findStddev(newarray,size);
409  median = findMedian(newarray,size,true);
[756]410  //  madfm = findMADFM(newarray,size,true);
411  madfm = findMADFM(newarray,size,median,true);
[285]412
[275]413  delete [] newarray;
414
415}
416template void findAllStats<int>(int *array, int size,
417                                float &mean, float &stddev,
418                                int &median, int &madfm);
419template void findAllStats<long>(long *array, int size,
420                                 float &mean, float &stddev,
421                                 long &median, long &madfm);
422template void findAllStats<float>(float *array, int size,
423                                  float &mean, float &stddev,
424                                  float &median, float &madfm);
425template void findAllStats<double>(double *array, int size,
426                                   float &mean, float &stddev,
427                                   double &median, double &madfm);
428//--------------------------------------------------------------------
429
430template <class T> void findAllStats(T *array, int size, bool *mask,
431                                     float &mean, float &stddev,
432                                     T &median, T &madfm)
433{
[528]434  /// @details
435  /// Find the mean,rms (or standard deviation), median AND madfm of a
436  /// subset of an array of numbers. Type independent.The subset is
437  /// defined by an array of bool variables. Type independent.
438  ///
439  /// \param array The array of numbers.
440  /// \param size The length of the array.
441  /// \param mask An array of the same length that says whether to
442  /// include each member of the array in the calculations. Only look
443  /// at values where mask=true.
444  /// \param mean The mean value of the array, returned as a float.
445  /// \param stddev The rms or standard deviation of the array,
446  /// returned as a float.
447  /// \param median The median value of the array, returned as the same
448  /// type as the array.
449  /// \param madfm The median absolute deviation from the median value
450  /// of the array, returned as the same type as the array.
451
[275]452  int goodSize=0;
453  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
454  if(goodSize==0){
455    std::cerr << "Error in findAllStats: no good values!\n";
456    return;
457  }
458
459  T *newarray = new T[goodSize];
460  goodSize=0;
461  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
462
[646]463  mean = findMean(newarray,goodSize);
464  stddev = findStddev(newarray,goodSize);
465  median = findMedian(newarray,goodSize,true);
[756]466  //madfm = findMADFM(newarray,goodSize,true);
467  madfm = findMADFM(newarray,goodSize,median,true);
[275]468
[282]469  delete [] newarray;
470
[275]471}
472template void findAllStats<int>(int *array, int size, bool *mask,
473                                float &mean, float &stddev,
474                                int &median, int &madfm);
475template void findAllStats<long>(long *array, int size, bool *mask,
476                                 float &mean, float &stddev,
477                                 long &median, long &madfm);
478template void findAllStats<float>(float *array, int size, bool *mask,
479                                  float &mean, float &stddev,
480                                  float &median, float &madfm);
481template void findAllStats<double>(double *array, int size, bool *mask,
482                                   float &mean, float &stddev,
483                                   double &median, double &madfm);
484//--------------------------------------------------------------------
Note: See TracBrowser for help on using the repository browser.