source: tags/release-1.1.12/src/Utils/getStats.cc

Last change on this file was 756, checked in by MatthewWhiting, 14 years ago

A new way to calculate MADFM, so that if we have the median we don't need to re-calculate it

File size: 18.6 KB
Line 
1// -----------------------------------------------------------------------
2// getStats.cc: Basic functions to calculate statistical parameters
3//              such as mean, median, rms, madfm, min, max.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
29#include <iostream>
30#include <algorithm>
31#include <math.h>
32#include <duchamp/Utils/utils.hh>
33
34template <class T> T absval(T value)
35{
36  /// Type-independent way of getting the absolute value.
37  if( value > 0) return value;
38  else return 0-value;
39}
40template int absval<int>(int value);
41template long absval<long>(long value);
42template float absval<float>(float value);
43template double absval<double>(double value);
44//--------------------------------------------------------------------
45
46template <class T> void findMinMax(const T *array, const int size,
47                                   T &min, T &max)
48{
49  /// @details
50  /// A function to find the minimum and maximum values of a set of numbers.
51  /// \param array The array of data values. Type independent.
52  /// \param size The length of the array
53  /// \param min The returned value of the minimum value in the array.
54  /// \param max The returned value of the maximum value in the array.
55  min = max = array[0];
56  for(int i=1;i<size;i++) {
57    if(array[i]<min) min=array[i];
58    if(array[i]>max) max=array[i];
59  }
60}
61template void findMinMax<int>(const int *array, const int size,
62                                 int &min, int &max);
63template void findMinMax<long>(const long *array, const int size,
64                                  long &min, long &max);
65template void findMinMax<float>(const float *array, const int size,
66                                   float &min, float &max);
67template void findMinMax<double>(const double *array, const int size,
68                                    double &min, double &max);
69//--------------------------------------------------------------------
70
71template <class T> float findMean(T *array, int size)
72{
73  /// @details
74  /// Find the mean of an array of numbers. Type independent.
75  /// \param array The array of numbers.
76  /// \param size The length of the array.
77  /// \return The mean value of the array, returned as a float
78  double mean = double(array[0]);
79  for(int i=1;i<size;i++) mean += double(array[i]);
80  mean /= double(size);
81  return float(mean);
82}
83template float findMean<int>(int *array, int size);
84template float findMean<long>(long *array, int size);
85template float findMean<float>(float *array, int size);
86template float findMean<double>(double *array, int size);
87//--------------------------------------------------------------------
88
89template <class T> float findStddev(T *array, int size)
90{
91  /// @details
92  /// Find the rms or standard deviation of an array of numbers. Type independent.
93  /// \param array The array of numbers.
94  /// \param size The length of the array.
95  /// \return The rms value of the array, returned as a float
96  double mean = double(findMean(array,size));
97  double stddev = (double(array[0])-mean) * (double(array[0])-mean);
98  for(int i=1;i<size;i++) stddev += (double(array[i])-mean)*(double(array[i])-mean);
99  stddev = sqrt(stddev/double(size-1));
100  return float(stddev);
101}
102template float findStddev<int>(int *array, int size);
103template float findStddev<long>(long *array, int size);
104template float findStddev<float>(float *array, int size);
105template float findStddev<double>(double *array, int size);
106//--------------------------------------------------------------------
107
108template <class T> T findMedian(T *array, int size, bool changeArray)
109{
110  /// @details
111  /// Find the median value of an array of numbers. Type independent.
112  /// \param array The array of numbers.
113  /// \param size The length of the array.
114  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered (ie. the order of elements will be changed).
115  /// \return The median value of the array, returned as the same type as the array.
116  T *newarray;
117  if(changeArray) newarray = array;
118  else{
119    newarray = new T[size];
120    for(int i=0;i<size;i++) newarray[i] = array[i];
121  }
122  T median;
123  bool isEven = ((size/2)==0);
124  std::nth_element(newarray,newarray+size/2,newarray+size);
125  median = newarray[size/2];
126  if(isEven){
127    std::nth_element(newarray,newarray+size/2-1,newarray+size);
128    median += newarray[size/2-1];
129    median /= T(2);
130  }
131  if(!changeArray) delete [] newarray;
132  return median;
133}
134template int findMedian<int>(int *array, int size, bool changeArray);
135template long findMedian<long>(long *array, int size, bool changeArray);
136template float findMedian<float>(float *array, int size, bool changeArray);
137template double findMedian<double>(double *array, int size, bool changeArray);
138//--------------------------------------------------------------------
139
140template <class T> T findMADFM(T *array, int size, bool changeArray)
141{
142  /// @details
143  /// Find the median absolute deviation from the median value of an
144  /// array of numbers. Type independent.
145  ///
146  /// \param array The array of numbers.
147  /// \param size The length of the array.
148  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered - both the order and values of the elements will be changed.
149  /// \return The median absolute deviation from the median value of
150  /// the array, returned as the same type as the array.
151  T *newarray;
152  if(changeArray) newarray = array;
153  else newarray = new T[size];
154
155  T median = findMedian<T>(array,size,changeArray);
156  T madfm;
157  bool isEven = ((size/2)==0);
158  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
159  std::nth_element(newarray,newarray+size/2,newarray+size);
160  madfm = newarray[size/2];
161  if(isEven){
162    std::nth_element(newarray,newarray+size/2-1,newarray+size);
163    madfm += newarray[size/2-1];
164    madfm /= T(2);
165  }
166  if(!changeArray) delete [] newarray;
167  return madfm;
168}
169template int findMADFM<int>(int *array, int size, bool changeArray);
170template long findMADFM<long>(long *array, int size, bool changeArray);
171template float findMADFM<float>(float *array, int size, bool changeArray);
172template double findMADFM<double>(double *array, int size, bool changeArray);
173//--------------------------------------------------------------------
174
175template <class T> T findMADFM(T *array, int size, T median, bool changeArray)
176{
177  /// @details
178  /// Find the median absolute deviation from the median value of an
179  /// array of numbers. Type independent. This version accepts a previously-
180  /// calculated median value.
181  ///
182  /// \param array The array of numbers.
183  /// \param size The length of the array.
184  /// \param median The median of the array.
185  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered - both the order and values of the elements will be changed.
186  /// \return The median absolute deviation from the median value of
187  /// the array, returned as the same type as the array.
188  T *newarray;
189  if(changeArray) newarray = array;
190  else newarray = new T[size];
191
192  T madfm;
193  bool isEven = ((size/2)==0);
194  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
195  std::nth_element(newarray,newarray+size/2,newarray+size);
196  madfm = newarray[size/2];
197  if(isEven){
198    std::nth_element(newarray,newarray+size/2-1,newarray+size);
199    madfm += newarray[size/2-1];
200    madfm /= T(2);
201  }
202  if(!changeArray) delete [] newarray;
203  return madfm;
204}
205template int findMADFM<int>(int *array, int size, int median, bool changeArray);
206template long findMADFM<long>(long *array, int size, long median, bool changeArray);
207template float findMADFM<float>(float *array, int size, float median, bool changeArray);
208template double findMADFM<double>(double *array, int size, double median, bool changeArray);
209//--------------------------------------------------------------------
210
211template <class T> void findMedianStats(T *array, int size,
212                                        T &median, T &madfm)
213{
214  /// @details
215  /// Find the median and the median absolute deviation from the median
216  /// value of an array of numbers. Type independent.
217  ///
218  /// \param array The array of numbers.
219  /// \param size The length of the array.
220  /// \param median The median value of the array, returned as the same
221  /// type as the array.
222  /// \param madfm The median absolute deviation from the median value
223  /// of the array, returned as the same type as the array.
224  if(size==0){
225    std::cerr << "Error in findMedianStats: zero sized array!\n";
226    return;
227  }
228  T *newarray = new T[size];
229   for(int i=0;i<size;i++) newarray[i] = array[i];
230
231   median = findMedian(newarray,size,true);
232   //   madfm = findMADFM(newarray,size,true);
233   madfm = findMADFM(newarray,size,median,true);
234
235  delete [] newarray;
236}
237template void findMedianStats<int>(int *array, int size,
238                                      int &median, int &madfm);
239template void findMedianStats<long>(long *array, int size,
240                                       long &median, long &madfm);
241template void findMedianStats<float>(float *array, int size,
242                                        float &median, float &madfm);
243template void findMedianStats<double>(double *array, int size,
244                                         double &median, double &madfm);
245//--------------------------------------------------------------------
246
247template <class T> void findMedianStats(T *array, int size, bool *mask,
248                                        T &median, T &madfm)
249{
250  /// @details
251  /// Find the median and the median absolute deviation from the median
252  /// value of a subset of an array of numbers. The subset is defined
253  /// by an array of bool variables. Type independent.
254  ///
255  /// \param array The array of numbers.
256  /// \param size The length of the array.
257  /// \param mask An array of the same length that says whether to
258  /// include each member of the array in the calculations. Only use
259  /// values where mask=true.
260  /// \param median The median value of the array, returned as the same
261  /// type as the array.
262  /// \param madfm The median absolute deviation from the median value
263  /// of the array, returned as the same type as the array.
264
265  int goodSize=0;
266  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
267  if(goodSize==0){
268    std::cerr << "Error in findMedianStats: no good values!\n";
269    return;
270  }
271  T *newarray = new T[goodSize];
272
273  goodSize=0;
274  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
275  median = findMedian(newarray,goodSize,true);
276  //  madfm = findMADFM(newarray,goodSize,true);
277  madfm = findMADFM(newarray,goodSize,median,true);
278
279  delete [] newarray;
280}
281template void findMedianStats<int>(int *array, int size, bool *mask,
282                                      int &median, int &madfm);
283template void findMedianStats<long>(long *array, int size, bool *mask,
284                                       long &median, long &madfm);
285template void findMedianStats<float>(float *array, int size, bool *mask,
286                                        float &median, float &madfm);
287template void findMedianStats<double>(double *array, int size, bool *mask,
288                                         double &median, double &madfm);
289//--------------------------------------------------------------------
290 
291
292template <class T> void findNormalStats(T *array, int size,
293                                        float &mean, float &stddev)
294{
295  /// @details
296  /// Find the mean and rms or standard deviation of an array of
297  /// numbers. Type independent.
298  ///
299  /// \param array The array of numbers.
300  /// \param size The length of the array.
301  /// \param mean The mean value of the array, returned as a float.
302  /// \param stddev The rms or standard deviation of the array,
303  /// returned as a float.
304
305  if(size==0){
306    std::cerr << "Error in findNormalStats: zero sized array!\n";
307    return;
308  }
309  mean = array[0];
310  for(int i=1;i<size;i++){
311    mean += array[i];
312  }
313  mean /= float(size);
314
315  stddev = (array[0]-mean) * (array[0]-mean);
316  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
317  stddev = sqrt(stddev/float(size-1));
318
319}
320template void findNormalStats<int>(int *array, int size,
321                                      float &mean, float &stddev);
322template void findNormalStats<long>(long *array, int size,
323                                       float &mean, float &stddev);
324template void findNormalStats<float>(float *array, int size,
325                                        float &mean, float &stddev);
326template void findNormalStats<double>(double *array, int size,
327                                         float &mean, float &stddev);
328//--------------------------------------------------------------------
329
330template <class T> void findNormalStats(T *array, int size, bool *mask,
331                                        float &mean, float &stddev)
332{
333  /// @details
334  /// Find the mean and rms or standard deviation of a subset of an
335  /// array of numbers. The subset is defined by an array of bool
336  /// variables. Type independent.
337  ///
338  /// \param array The array of numbers.
339  /// \param size The length of the array.
340  /// \param mask An array of the same length that says whether to
341  /// include each member of the array in the calculations. Only look
342  /// at values where mask=true.
343  /// \param mean The mean value of the array, returned as a float.
344  /// \param stddev The rms or standard deviation of the array,
345  /// returned as a float.
346
347  int goodSize=0;
348  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
349  if(goodSize==0){
350    std::cerr << "Error in findNormalStats: no good values!\n";
351    return;
352  }
353  int start=0;
354  while(!mask[start]){start++;}
355  mean = array[start];
356  for(int i=start+1;i<size;i++){
357    if(mask[i]) mean += array[i];
358  }
359  mean /= float(goodSize);
360
361  stddev = (array[start]-mean) * (array[start]-mean);
362  for(int i=1;i<size;i++){
363    if(mask[i]) stddev += (array[i]-mean)*(array[i]-mean);
364  }
365  stddev = sqrt(stddev/float(goodSize-1));
366
367}
368template void findNormalStats<int>(int *array, int size, bool *mask,
369                                      float &mean, float &stddev);
370template void findNormalStats<long>(long *array, int size, bool *mask,
371                                       float &mean, float &stddev);
372template void findNormalStats<float>(float *array, int size, bool *mask,
373                                        float &mean, float &stddev);
374template void findNormalStats<double>(double *array, int size, bool *mask,
375                                         float &mean, float &stddev);
376//--------------------------------------------------------------------
377//--------------------------------------------------------------------
378 
379
380template <class T> void findAllStats(T *array, int size,
381                                     float &mean, float &stddev,
382                                     T &median, T &madfm)
383{
384  /// @details
385  /// Find the mean,rms (or standard deviation), median AND madfm of an
386  /// array of numbers. Type independent.
387  ///
388  /// \param array The array of numbers.
389  /// \param size The length of the array.
390  /// \param mean The mean value of the array, returned as a float.
391  /// \param stddev The rms or standard deviation of the array,
392  /// returned as a float.
393  /// \param median The median value of the array, returned as the same
394  /// type as the array.
395  /// \param madfm The median absolute deviation from the median value
396  /// of the array, returned as the same type as the array.
397
398  if(size==0){
399    std::cerr << "Error in findAllStats: zero sized array!\n";
400    return;
401  }
402
403  T *newarray = new T[size];
404
405  for(int i=0;i<size;i++) newarray[i] = array[i];
406
407  mean = findMean(newarray,size);
408  stddev = findStddev(newarray,size);
409  median = findMedian(newarray,size,true);
410  //  madfm = findMADFM(newarray,size,true);
411  madfm = findMADFM(newarray,size,median,true);
412
413  delete [] newarray;
414
415}
416template void findAllStats<int>(int *array, int size,
417                                float &mean, float &stddev,
418                                int &median, int &madfm);
419template void findAllStats<long>(long *array, int size,
420                                 float &mean, float &stddev,
421                                 long &median, long &madfm);
422template void findAllStats<float>(float *array, int size,
423                                  float &mean, float &stddev,
424                                  float &median, float &madfm);
425template void findAllStats<double>(double *array, int size,
426                                   float &mean, float &stddev,
427                                   double &median, double &madfm);
428//--------------------------------------------------------------------
429
430template <class T> void findAllStats(T *array, int size, bool *mask,
431                                     float &mean, float &stddev,
432                                     T &median, T &madfm)
433{
434  /// @details
435  /// Find the mean,rms (or standard deviation), median AND madfm of a
436  /// subset of an array of numbers. Type independent.The subset is
437  /// defined by an array of bool variables. Type independent.
438  ///
439  /// \param array The array of numbers.
440  /// \param size The length of the array.
441  /// \param mask An array of the same length that says whether to
442  /// include each member of the array in the calculations. Only look
443  /// at values where mask=true.
444  /// \param mean The mean value of the array, returned as a float.
445  /// \param stddev The rms or standard deviation of the array,
446  /// returned as a float.
447  /// \param median The median value of the array, returned as the same
448  /// type as the array.
449  /// \param madfm The median absolute deviation from the median value
450  /// of the array, returned as the same type as the array.
451
452  int goodSize=0;
453  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
454  if(goodSize==0){
455    std::cerr << "Error in findAllStats: no good values!\n";
456    return;
457  }
458
459  T *newarray = new T[goodSize];
460  goodSize=0;
461  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
462
463  mean = findMean(newarray,goodSize);
464  stddev = findStddev(newarray,goodSize);
465  median = findMedian(newarray,goodSize,true);
466  //madfm = findMADFM(newarray,goodSize,true);
467  madfm = findMADFM(newarray,goodSize,median,true);
468
469  delete [] newarray;
470
471}
472template void findAllStats<int>(int *array, int size, bool *mask,
473                                float &mean, float &stddev,
474                                int &median, int &madfm);
475template void findAllStats<long>(long *array, int size, bool *mask,
476                                 float &mean, float &stddev,
477                                 long &median, long &madfm);
478template void findAllStats<float>(float *array, int size, bool *mask,
479                                  float &mean, float &stddev,
480                                  float &median, float &madfm);
481template void findAllStats<double>(double *array, int size, bool *mask,
482                                   float &mean, float &stddev,
483                                   double &median, double &madfm);
484//--------------------------------------------------------------------
Note: See TracBrowser for help on using the repository browser.