source: trunk/src/Utils/getStats.cc @ 646

Last change on this file since 646 was 646, checked in by MatthewWhiting, 14 years ago

Change to using std::nth_element instead of std::sort in calculating the median statistics (as per ticket #65). Shows a great speed-up. Also tidied up the functions so that we don't have duplicated code.

File size: 16.7 KB
Line 
1// -----------------------------------------------------------------------
2// getStats.cc: Basic functions to calculate statistical parameters
3//              such as mean, median, rms, madfm, min, max.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
29#include <iostream>
30#include <algorithm>
31#include <math.h>
32#include <duchamp/Utils/utils.hh>
33
34template <class T> T absval(T value)
35{
36  /// Type-independent way of getting the absolute value.
37  if( value > 0) return value;
38  else return 0-value;
39}
40template int absval<int>(int value);
41template long absval<long>(long value);
42template float absval<float>(float value);
43template double absval<double>(double value);
44//--------------------------------------------------------------------
45
46template <class T> void findMinMax(const T *array, const int size,
47                                   T &min, T &max)
48{
49  /// @details
50  /// A function to find the minimum and maximum values of a set of numbers.
51  /// \param array The array of data values. Type independent.
52  /// \param size The length of the array
53  /// \param min The returned value of the minimum value in the array.
54  /// \param max The returned value of the maximum value in the array.
55  min = max = array[0];
56  for(int i=1;i<size;i++) {
57    if(array[i]<min) min=array[i];
58    if(array[i]>max) max=array[i];
59  }
60}
61template void findMinMax<int>(const int *array, const int size,
62                                 int &min, int &max);
63template void findMinMax<long>(const long *array, const int size,
64                                  long &min, long &max);
65template void findMinMax<float>(const float *array, const int size,
66                                   float &min, float &max);
67template void findMinMax<double>(const double *array, const int size,
68                                    double &min, double &max);
69//--------------------------------------------------------------------
70
71template <class T> float findMean(T *array, int size)
72{
73  /// @details
74  /// Find the mean of an array of numbers. Type independent.
75  /// \param array The array of numbers.
76  /// \param size The length of the array.
77  /// \return The mean value of the array, returned as a float
78  float mean = array[0];
79  for(int i=1;i<size;i++) mean += array[i];
80  mean /= float(size);
81  return mean;
82}
83template float findMean<int>(int *array, int size);
84template float findMean<long>(long *array, int size);
85template float findMean<float>(float *array, int size);
86template float findMean<double>(double *array, int size);
87//--------------------------------------------------------------------
88
89template <class T> float findStddev(T *array, int size)
90{
91  /// @details
92  /// Find the rms or standard deviation of an array of numbers. Type independent.
93  /// \param array The array of numbers.
94  /// \param size The length of the array.
95  /// \return The rms value of the array, returned as a float
96  float mean = findMean(array,size);
97  float stddev = (array[0]-mean) * (array[0]-mean);
98  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
99  stddev = sqrt(stddev/float(size-1));
100  return stddev;
101}
102template float findStddev<int>(int *array, int size);
103template float findStddev<long>(long *array, int size);
104template float findStddev<float>(float *array, int size);
105template float findStddev<double>(double *array, int size);
106//--------------------------------------------------------------------
107
108template <class T> T findMedian(T *array, int size, bool changeArray)
109{
110  /// @details
111  /// Find the median value of an array of numbers. Type independent.
112  /// \param array The array of numbers.
113  /// \param size The length of the array.
114  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered (ie. the order of elements will be changed).
115  /// \return The median value of the array, returned as the same type as the array.
116  T *newarray;
117  if(changeArray) newarray = array;
118  else{
119    newarray = new T[size];
120    for(int i=0;i<size;i++) newarray[i] = array[i];
121  }
122  T median;
123  bool isOdd = ((size/2)!=0);
124  std::nth_element(newarray,newarray+size/2,newarray+size);
125  median = newarray[size/2];
126  if(isOdd){
127    std::nth_element(newarray,newarray+size/2-1,newarray+size);
128    median += newarray[size/2-1];
129    median /= 2.;
130  }
131  if(!changeArray) delete [] newarray;
132  return median;
133}
134template int findMedian<int>(int *array, int size, bool changeArray);
135template long findMedian<long>(long *array, int size, bool changeArray);
136template float findMedian<float>(float *array, int size, bool changeArray);
137template double findMedian<double>(double *array, int size, bool changeArray);
138//--------------------------------------------------------------------
139
140template <class T> T findMADFM(T *array, int size, bool changeArray)
141{
142  /// @details
143  /// Find the median absolute deviation from the median value of an
144  /// array of numbers. Type independent.
145  ///
146  /// \param array The array of numbers.
147  /// \param size The length of the array.
148  /// \param changeArray [false] Whether to use the provided array in calculations. If true, the input array will be altered - both the order and values of the elements will be changed.
149  /// \return The median absolute deviation from the median value of
150  /// the array, returned as the same type as the array.
151  T *newarray;
152  if(changeArray) newarray = array;
153  else newarray = new T[size];
154
155  T median = findMedian<T>(array,size,changeArray);
156  T madfm;
157  bool isOdd = ((size/2)!=0);
158  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
159  std::nth_element(newarray,newarray+size/2,newarray+size);
160  madfm = newarray[size/2];
161  if(isOdd){
162    std::nth_element(newarray,newarray+size/2-1,newarray+size);
163    madfm += newarray[size/2-1];
164    madfm /= 2.;
165  }
166  if(!changeArray) delete [] newarray;
167  return madfm;
168}
169template int findMADFM<int>(int *array, int size, bool changeArray);
170template long findMADFM<long>(long *array, int size, bool changeArray);
171template float findMADFM<float>(float *array, int size, bool changeArray);
172template double findMADFM<double>(double *array, int size, bool changeArray);
173//--------------------------------------------------------------------
174
175template <class T> void findMedianStats(T *array, int size,
176                                        T &median, T &madfm)
177{
178  /// @details
179  /// Find the median and the median absolute deviation from the median
180  /// value of an array of numbers. Type independent.
181  ///
182  /// \param array The array of numbers.
183  /// \param size The length of the array.
184  /// \param median The median value of the array, returned as the same
185  /// type as the array.
186  /// \param madfm The median absolute deviation from the median value
187  /// of the array, returned as the same type as the array.
188  if(size==0){
189    std::cerr << "Error in findMedianStats: zero sized array!\n";
190    return;
191  }
192  T *newarray = new T[size];
193   for(int i=0;i<size;i++) newarray[i] = array[i];
194
195   median = findMedian(newarray,size,true);
196   madfm = findMADFM(newarray,size,true);
197
198  delete [] newarray;
199}
200template void findMedianStats<int>(int *array, int size,
201                                      int &median, int &madfm);
202template void findMedianStats<long>(long *array, int size,
203                                       long &median, long &madfm);
204template void findMedianStats<float>(float *array, int size,
205                                        float &median, float &madfm);
206template void findMedianStats<double>(double *array, int size,
207                                         double &median, double &madfm);
208//--------------------------------------------------------------------
209
210template <class T> void findMedianStats(T *array, int size, bool *mask,
211                                        T &median, T &madfm)
212{
213  /// @details
214  /// Find the median and the median absolute deviation from the median
215  /// value of a subset of an array of numbers. The subset is defined
216  /// by an array of bool variables. Type independent.
217  ///
218  /// \param array The array of numbers.
219  /// \param size The length of the array.
220  /// \param mask An array of the same length that says whether to
221  /// include each member of the array in the calculations. Only use
222  /// values where mask=true.
223  /// \param median The median value of the array, returned as the same
224  /// type as the array.
225  /// \param madfm The median absolute deviation from the median value
226  /// of the array, returned as the same type as the array.
227
228  int goodSize=0;
229  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
230  if(goodSize==0){
231    std::cerr << "Error in findMedianStats: no good values!\n";
232    return;
233  }
234  T *newarray = new T[goodSize];
235
236  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
237  median = findMedian(newarray,goodSize,true);
238  madfm = findMADFM(newarray,goodSize,true);
239
240  delete [] newarray;
241}
242template void findMedianStats<int>(int *array, int size, bool *mask,
243                                      int &median, int &madfm);
244template void findMedianStats<long>(long *array, int size, bool *mask,
245                                       long &median, long &madfm);
246template void findMedianStats<float>(float *array, int size, bool *mask,
247                                        float &median, float &madfm);
248template void findMedianStats<double>(double *array, int size, bool *mask,
249                                         double &median, double &madfm);
250//--------------------------------------------------------------------
251 
252
253template <class T> void findNormalStats(T *array, int size,
254                                        float &mean, float &stddev)
255{
256  /// @details
257  /// Find the mean and rms or standard deviation of an array of
258  /// numbers. Type independent.
259  ///
260  /// \param array The array of numbers.
261  /// \param size The length of the array.
262  /// \param mean The mean value of the array, returned as a float.
263  /// \param stddev The rms or standard deviation of the array,
264  /// returned as a float.
265
266  if(size==0){
267    std::cerr << "Error in findNormalStats: zero sized array!\n";
268    return;
269  }
270  mean = array[0];
271  for(int i=1;i<size;i++){
272    mean += array[i];
273  }
274  mean /= float(size);
275
276  stddev = (array[0]-mean) * (array[0]-mean);
277  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
278  stddev = sqrt(stddev/float(size-1));
279
280}
281template void findNormalStats<int>(int *array, int size,
282                                      float &mean, float &stddev);
283template void findNormalStats<long>(long *array, int size,
284                                       float &mean, float &stddev);
285template void findNormalStats<float>(float *array, int size,
286                                        float &mean, float &stddev);
287template void findNormalStats<double>(double *array, int size,
288                                         float &mean, float &stddev);
289//--------------------------------------------------------------------
290
291template <class T> void findNormalStats(T *array, int size, bool *mask,
292                                        float &mean, float &stddev)
293{
294  /// @details
295  /// Find the mean and rms or standard deviation of a subset of an
296  /// array of numbers. The subset is defined by an array of bool
297  /// variables. Type independent.
298  ///
299  /// \param array The array of numbers.
300  /// \param size The length of the array.
301  /// \param mask An array of the same length that says whether to
302  /// include each member of the array in the calculations. Only look
303  /// at values where mask=true.
304  /// \param mean The mean value of the array, returned as a float.
305  /// \param stddev The rms or standard deviation of the array,
306  /// returned as a float.
307
308  int goodSize=0;
309  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
310  if(goodSize==0){
311    std::cerr << "Error in findNormalStats: no good values!\n";
312    return;
313  }
314  int start=0;
315  while(!mask[start]){start++;}
316  mean = array[start];
317  for(int i=start+1;i<size;i++){
318    if(mask[i]) mean += array[i];
319  }
320  mean /= float(goodSize);
321
322  stddev = (array[start]-mean) * (array[start]-mean);
323  for(int i=1;i<size;i++){
324    if(mask[i]) stddev += (array[i]-mean)*(array[i]-mean);
325  }
326  stddev = sqrt(stddev/float(goodSize-1));
327
328}
329template void findNormalStats<int>(int *array, int size, bool *mask,
330                                      float &mean, float &stddev);
331template void findNormalStats<long>(long *array, int size, bool *mask,
332                                       float &mean, float &stddev);
333template void findNormalStats<float>(float *array, int size, bool *mask,
334                                        float &mean, float &stddev);
335template void findNormalStats<double>(double *array, int size, bool *mask,
336                                         float &mean, float &stddev);
337//--------------------------------------------------------------------
338//--------------------------------------------------------------------
339 
340
341template <class T> void findAllStats(T *array, int size,
342                                     float &mean, float &stddev,
343                                     T &median, T &madfm)
344{
345  /// @details
346  /// Find the mean,rms (or standard deviation), median AND madfm of an
347  /// array of numbers. Type independent.
348  ///
349  /// \param array The array of numbers.
350  /// \param size The length of the array.
351  /// \param mean The mean value of the array, returned as a float.
352  /// \param stddev The rms or standard deviation of the array,
353  /// returned as a float.
354  /// \param median The median value of the array, returned as the same
355  /// type as the array.
356  /// \param madfm The median absolute deviation from the median value
357  /// of the array, returned as the same type as the array.
358
359  if(size==0){
360    std::cerr << "Error in findAllStats: zero sized array!\n";
361    return;
362  }
363
364  T *newarray = new T[size];
365
366  for(int i=0;i<size;i++) newarray[i] = array[i];
367
368  mean = findMean(newarray,size);
369  stddev = findStddev(newarray,size);
370  median = findMedian(newarray,size,true);
371  madfm = findMADFM(newarray,size,true);
372
373  delete [] newarray;
374
375}
376template void findAllStats<int>(int *array, int size,
377                                float &mean, float &stddev,
378                                int &median, int &madfm);
379template void findAllStats<long>(long *array, int size,
380                                 float &mean, float &stddev,
381                                 long &median, long &madfm);
382template void findAllStats<float>(float *array, int size,
383                                  float &mean, float &stddev,
384                                  float &median, float &madfm);
385template void findAllStats<double>(double *array, int size,
386                                   float &mean, float &stddev,
387                                   double &median, double &madfm);
388//--------------------------------------------------------------------
389
390template <class T> void findAllStats(T *array, int size, bool *mask,
391                                     float &mean, float &stddev,
392                                     T &median, T &madfm)
393{
394  /// @details
395  /// Find the mean,rms (or standard deviation), median AND madfm of a
396  /// subset of an array of numbers. Type independent.The subset is
397  /// defined by an array of bool variables. Type independent.
398  ///
399  /// \param array The array of numbers.
400  /// \param size The length of the array.
401  /// \param mask An array of the same length that says whether to
402  /// include each member of the array in the calculations. Only look
403  /// at values where mask=true.
404  /// \param mean The mean value of the array, returned as a float.
405  /// \param stddev The rms or standard deviation of the array,
406  /// returned as a float.
407  /// \param median The median value of the array, returned as the same
408  /// type as the array.
409  /// \param madfm The median absolute deviation from the median value
410  /// of the array, returned as the same type as the array.
411
412  int goodSize=0;
413  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
414  if(goodSize==0){
415    std::cerr << "Error in findAllStats: no good values!\n";
416    return;
417  }
418
419  T *newarray = new T[goodSize];
420  goodSize=0;
421  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
422
423  mean = findMean(newarray,goodSize);
424  stddev = findStddev(newarray,goodSize);
425  median = findMedian(newarray,goodSize,true);
426  madfm = findMADFM(newarray,goodSize,true);
427
428  delete [] newarray;
429
430}
431template void findAllStats<int>(int *array, int size, bool *mask,
432                                float &mean, float &stddev,
433                                int &median, int &madfm);
434template void findAllStats<long>(long *array, int size, bool *mask,
435                                 float &mean, float &stddev,
436                                 long &median, long &madfm);
437template void findAllStats<float>(float *array, int size, bool *mask,
438                                  float &mean, float &stddev,
439                                  float &median, float &madfm);
440template void findAllStats<double>(double *array, int size, bool *mask,
441                                   float &mean, float &stddev,
442                                   double &median, double &madfm);
443//--------------------------------------------------------------------
Note: See TracBrowser for help on using the repository browser.