source: trunk/src/Utils/getStats.cc @ 368

Last change on this file since 368 was 301, checked in by Matthew Whiting, 17 years ago

Mostly adding the distribution text to the start of files, with a few additional comments added too.

File size: 17.1 KB
RevLine 
[301]1// -----------------------------------------------------------------------
2// getStats.cc: Basic functions to calculate statistical parameters
3//              such as mean, median, rms, madfm, min, max.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
[3]29#include <iostream>
[204]30#include <algorithm>
[3]31#include <math.h>
32#include <Utils/utils.hh>
33
[190]34template <class T> T absval(T value)
[70]35{
[222]36  /**
37   * Type-independent way of getting the absolute value.
38   */
[190]39  if( value > 0) return value;
40  else return 0-value;
41}
[222]42template int absval<int>(int value);
43template long absval<long>(long value);
44template float absval<float>(float value);
45template double absval<double>(double value);
[190]46//--------------------------------------------------------------------
47
48template <class T> void findMinMax(const T *array, const int size,
49                                   T &min, T &max)
50{
[222]51  /**
52   * A function to find the minimum and maximum values of a set of numbers.
53   * \param array The array of data values. Type independent.
54   * \param size The length of the array
55   * \param min The returned value of the minimum value in the array.
56   * \param max The returned value of the maximum value in the array.
57   */
[70]58  min = max = array[0];
59  for(int i=1;i<size;i++) {
60    if(array[i]<min) min=array[i];
61    if(array[i]>max) max=array[i];
62  }
63}
[190]64template void findMinMax<int>(const int *array, const int size,
[222]65                                 int &min, int &max);
66template void findMinMax<long>(const long *array, const int size,
67                                  long &min, long &max);
[190]68template void findMinMax<float>(const float *array, const int size,
[222]69                                   float &min, float &max);
70template void findMinMax<double>(const double *array, const int size,
71                                    double &min, double &max);
[190]72//--------------------------------------------------------------------
[70]73
[190]74template <class T> float findMean(T *array, int size)
[53]75{
[222]76  /**
77   * Find the mean of an array of numbers. Type independent.
78   * \param array The array of numbers.
79   * \param size The length of the array.
80   * \return The mean value of the array, returned as a float
81   */
[53]82  float mean = array[0];
83  for(int i=1;i<size;i++) mean += array[i];
84  mean /= float(size);
85  return mean;
86}
[190]87template float findMean<int>(int *array, int size);
[222]88template float findMean<long>(long *array, int size);
[190]89template float findMean<float>(float *array, int size);
[222]90template float findMean<double>(double *array, int size);
[190]91//--------------------------------------------------------------------
[53]92
[190]93template <class T> float findStddev(T *array, int size)
[53]94{
[222]95  /**
96   * Find the rms or standard deviation of an array of numbers. Type independent.
97   * \param array The array of numbers.
98   * \param size The length of the array.
99   * \return The rms value of the array, returned as a float
100   */
[53]101  float mean = findMean(array,size);
[78]102  float stddev = (array[0]-mean) * (array[0]-mean);
103  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
[79]104  stddev = sqrt(stddev/float(size-1));
[78]105  return stddev;
[53]106}
[190]107template float findStddev<int>(int *array, int size);
[222]108template float findStddev<long>(long *array, int size);
[190]109template float findStddev<float>(float *array, int size);
[222]110template float findStddev<double>(double *array, int size);
[190]111//--------------------------------------------------------------------
[53]112
[190]113template <class T> T findMedian(T *array, int size)
[53]114{
[222]115  /**
116   * Find the median value of an array of numbers. Type independent.
117   * \param array The array of numbers.
118   * \param size The length of the array.
119   * \return The median value of the array, returned as the same type as the array.
120   */
[190]121  T *newarray = new T[size];
122  T median;
[53]123  for(int i=0;i<size;i++) newarray[i] = array[i];
[212]124  std::sort(newarray,newarray+size);
[190]125  if((size%2)==0) median = (newarray[size/2-1]+newarray[size/2])/2;
[53]126  else median = newarray[size/2];
127  delete [] newarray;
128  return median;
129}
[190]130template int findMedian<int>(int *array, int size);
[222]131template long findMedian<long>(long *array, int size);
[190]132template float findMedian<float>(float *array, int size);
[222]133template double findMedian<double>(double *array, int size);
[190]134//--------------------------------------------------------------------
[53]135
[190]136template <class T> T findMADFM(T *array, int size)
[53]137{
[222]138  /**
139   * Find the median absolute deviation from the median value of an
140   * array of numbers. Type independent.
141   *
142   * \param array The array of numbers.
143   * \param size The length of the array.
144   * \return The median absolute deviation from the median value of
145   * the array, returned as the same type as the array.
146   */
[190]147  T *newarray = new T[size];
[192]148  T median = findMedian<T>(array,size);
[190]149  T madfm;
150  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
[212]151  std::sort(newarray,newarray+size);
[190]152  if((size%2)==0) madfm = (newarray[size/2-1]+newarray[size/2])/2;
[53]153  else madfm = newarray[size/2];
154  delete [] newarray;
155  return madfm;
156}
[190]157template int findMADFM<int>(int *array, int size);
[222]158template long findMADFM<long>(long *array, int size);
[190]159template float findMADFM<float>(float *array, int size);
[222]160template double findMADFM<double>(double *array, int size);
[190]161//--------------------------------------------------------------------
162
163template <class T> void findMedianStats(T *array, int size,
164                                        T &median, T &madfm)
[3]165{
[222]166  /**
167   * Find the median and the median absolute deviation from the median
168   * value of an array of numbers. Type independent.
169   *
170   * \param array The array of numbers.
171   * \param size The length of the array.
[275]172   * \param median The median value of the array, returned as the same
173   * type as the array.
174   * \param madfm The median absolute deviation from the median value
175   * of the array, returned as the same type as the array.
[222]176   */
[190]177  if(size==0){
178    std::cerr << "Error in findMedianStats: zero sized array!\n";
179    return;
180  }
181  T *newarray = new T[size];
[3]182
183  for(int i=0;i<size;i++) newarray[i] = array[i];
[212]184  std::sort(newarray,newarray+size);
[190]185  if((size%2)==0) median = (newarray[size/2-1]+newarray[size/2])/2;
[3]186  else median = newarray[size/2];
187
[190]188  for(int i=0;i<size;i++) newarray[i] = absval(array[i]-median);
[212]189  std::sort(newarray,newarray+size);
[190]190  if((size%2)==0) madfm = (newarray[size/2-1]+newarray[size/2])/2;
[3]191  else madfm = newarray[size/2];
192
193  delete [] newarray;
194}
[190]195template void findMedianStats<int>(int *array, int size,
[222]196                                      int &median, int &madfm);
[190]197template void findMedianStats<long>(long *array, int size,
[222]198                                       long &median, long &madfm);
[190]199template void findMedianStats<float>(float *array, int size,
[222]200                                        float &median, float &madfm);
[190]201template void findMedianStats<double>(double *array, int size,
[222]202                                         double &median, double &madfm);
[190]203//--------------------------------------------------------------------
204
[275]205template <class T> void findMedianStats(T *array, int size, bool *mask,
[190]206                                        T &median, T &madfm)
[3]207{
[222]208  /**
209   * Find the median and the median absolute deviation from the median
210   * value of a subset of an array of numbers. The subset is defined
211   * by an array of bool variables. Type independent.
212   *
213   * \param array The array of numbers.
214   * \param size The length of the array.
[275]215   * \param mask An array of the same length that says whether to
216   * include each member of the array in the calculations. Only use
217   * values where mask=true.
218   * \param median The median value of the array, returned as the same
219   * type as the array.
220   * \param madfm The median absolute deviation from the median value
221   * of the array, returned as the same type as the array.
[222]222   */
[190]223  int goodSize=0;
[275]224  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
[190]225  if(goodSize==0){
226    std::cerr << "Error in findMedianStats: no good values!\n";
227    return;
228  }
229  T *newarray = new T[goodSize];
[275]230  goodSize=0;
231  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
[212]232  std::sort(newarray,newarray+goodSize);
[190]233  if((goodSize%2)==0)
234    median = (newarray[goodSize/2-1]+newarray[goodSize/2])/2;
235  else
236    median = newarray[goodSize/2];
[3]237
[190]238  for(int i=0;i<goodSize;i++) newarray[i] = absval(newarray[i]-median);
[212]239  std::sort(newarray,newarray+goodSize);
[190]240  if((goodSize%2)==0)
241    madfm = (newarray[goodSize/2-1]+newarray[goodSize/2])/2;
242  else
243    madfm = newarray[goodSize/2];
[3]244
245  delete [] newarray;
246}
[275]247template void findMedianStats<int>(int *array, int size, bool *mask,
[222]248                                      int &median, int &madfm);
[275]249template void findMedianStats<long>(long *array, int size, bool *mask,
[222]250                                       long &median, long &madfm);
[275]251template void findMedianStats<float>(float *array, int size, bool *mask,
[222]252                                        float &median, float &madfm);
[275]253template void findMedianStats<double>(double *array, int size, bool *mask,
[222]254                                         double &median, double &madfm);
[190]255//--------------------------------------------------------------------
[3]256 
257
[190]258template <class T> void findNormalStats(T *array, int size,
259                                        float &mean, float &stddev)
[3]260{
[222]261  /**
[275]262   * Find the mean and rms or standard deviation of an array of
263   * numbers. Type independent.
[222]264   *
265   * \param array The array of numbers.
266   * \param size The length of the array.
267   * \param mean The mean value of the array, returned as a float.
[275]268   * \param stddev The rms or standard deviation of the array,
269   * returned as a float.
[222]270   */
[190]271  if(size==0){
272    std::cerr << "Error in findNormalStats: zero sized array!\n";
273    return;
274  }
[3]275  mean = array[0];
276  for(int i=1;i<size;i++){
277    mean += array[i];
278  }
279  mean /= float(size);
280
[79]281  stddev = (array[0]-mean) * (array[0]-mean);
[275]282  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
[79]283  stddev = sqrt(stddev/float(size-1));
[3]284
285}
[190]286template void findNormalStats<int>(int *array, int size,
[222]287                                      float &mean, float &stddev);
[190]288template void findNormalStats<long>(long *array, int size,
[222]289                                       float &mean, float &stddev);
[190]290template void findNormalStats<float>(float *array, int size,
[222]291                                        float &mean, float &stddev);
[190]292template void findNormalStats<double>(double *array, int size,
[222]293                                         float &mean, float &stddev);
[190]294//--------------------------------------------------------------------
[3]295
[275]296template <class T> void findNormalStats(T *array, int size, bool *mask,
[190]297                                        float &mean, float &stddev)
[70]298{
[222]299  /**
300   * Find the mean and rms or standard deviation of a subset of an
301   * array of numbers. The subset is defined by an array of bool
302   * variables. Type independent.
303   *
304   * \param array The array of numbers.
305   * \param size The length of the array.
[275]306   * \param mask An array of the same length that says whether to
307   * include each member of the array in the calculations. Only look
308   * at values where mask=true.
[222]309   * \param mean The mean value of the array, returned as a float.
[275]310   * \param stddev The rms or standard deviation of the array,
311   * returned as a float.
[222]312   */
[190]313  int goodSize=0;
[275]314  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
[190]315  if(goodSize==0){
316    std::cerr << "Error in findNormalStats: no good values!\n";
317    return;
318  }
319  int start=0;
[275]320  while(!mask[start]){start++;}
[190]321  mean = array[start];
322  for(int i=start+1;i<size;i++){
[275]323    if(mask[i]) mean += array[i];
[190]324  }
325  mean /= float(goodSize);
[70]326
[190]327  stddev = (array[start]-mean) * (array[start]-mean);
328  for(int i=1;i<size;i++){
[275]329    if(mask[i]) stddev += (array[i]-mean)*(array[i]-mean);
[190]330  }
331  stddev = sqrt(stddev/float(goodSize-1));
332
333}
[275]334template void findNormalStats<int>(int *array, int size, bool *mask,
[222]335                                      float &mean, float &stddev);
[275]336template void findNormalStats<long>(long *array, int size, bool *mask,
[222]337                                       float &mean, float &stddev);
[275]338template void findNormalStats<float>(float *array, int size, bool *mask,
[222]339                                        float &mean, float &stddev);
[275]340template void findNormalStats<double>(double *array, int size, bool *mask,
[222]341                                         float &mean, float &stddev);
[190]342//--------------------------------------------------------------------
[275]343//--------------------------------------------------------------------
344 
345
346template <class T> void findAllStats(T *array, int size,
347                                     float &mean, float &stddev,
348                                     T &median, T &madfm)
349{
350  /**
351   * Find the mean,rms (or standard deviation), median AND madfm of an
352   * array of numbers. Type independent.
353   *
354   * \param array The array of numbers.
355   * \param size The length of the array.
356   * \param mean The mean value of the array, returned as a float.
357   * \param stddev The rms or standard deviation of the array,
358   * returned as a float.
359   * \param median The median value of the array, returned as the same
360   * type as the array.
361   * \param madfm The median absolute deviation from the median value
362   * of the array, returned as the same type as the array.
363   */
364  if(size==0){
[285]365    std::cerr << "Error in findAllStats: zero sized array!\n";
[275]366    return;
367  }
368
369  T *newarray = new T[size];
370
371  for(int i=0;i<size;i++) newarray[i] = array[i];
[285]372
373  mean = array[0];
374  for(int i=1;i<size;i++) mean += array[i];
375  mean /= float(size);
376
377  stddev = (array[0]-mean) * (array[0]-mean);
378  for(int i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
379  stddev = sqrt(stddev/float(size-1));
380
[275]381  std::sort(newarray,newarray+size);
382  if((size%2)==0) median = (newarray[size/2-1]+newarray[size/2])/2;
383  else median = newarray[size/2];
384
[285]385  for(int i=0;i<size;i++) newarray[i] = absval(newarray[i]-median);
[275]386  std::sort(newarray,newarray+size);
387  if((size%2)==0) madfm = (newarray[size/2-1]+newarray[size/2])/2;
388  else madfm = newarray[size/2];
389
390  delete [] newarray;
391
392}
393template void findAllStats<int>(int *array, int size,
394                                float &mean, float &stddev,
395                                int &median, int &madfm);
396template void findAllStats<long>(long *array, int size,
397                                 float &mean, float &stddev,
398                                 long &median, long &madfm);
399template void findAllStats<float>(float *array, int size,
400                                  float &mean, float &stddev,
401                                  float &median, float &madfm);
402template void findAllStats<double>(double *array, int size,
403                                   float &mean, float &stddev,
404                                   double &median, double &madfm);
405//--------------------------------------------------------------------
406
407template <class T> void findAllStats(T *array, int size, bool *mask,
408                                     float &mean, float &stddev,
409                                     T &median, T &madfm)
410{
411  /**
412   * Find the mean,rms (or standard deviation), median AND madfm of a
413   * subset of an array of numbers. Type independent.The subset is
414   * defined by an array of bool variables. Type independent.
415   *
416   * \param array The array of numbers.
417   * \param size The length of the array.
418   * \param mask An array of the same length that says whether to
419   * include each member of the array in the calculations. Only look
420   * at values where mask=true.
421   * \param mean The mean value of the array, returned as a float.
422   * \param stddev The rms or standard deviation of the array,
423   * returned as a float.
424   * \param median The median value of the array, returned as the same
425   * type as the array.
426   * \param madfm The median absolute deviation from the median value
427   * of the array, returned as the same type as the array.
428   */
429  int goodSize=0;
430  for(int i=0;i<size;i++) if(mask[i]) goodSize++;
431  if(goodSize==0){
432    std::cerr << "Error in findAllStats: no good values!\n";
433    return;
434  }
435
436  T *newarray = new T[goodSize];
437  goodSize=0;
438  for(int i=0;i<size;i++) if(mask[i]) newarray[goodSize++] = array[i];
439
[282]440  mean = 0.;
441  for(int i=0;i<goodSize;i++) mean += newarray[i];
[275]442  mean /= float(goodSize);
443
[282]444  stddev = 0.;
445  for(int i=0;i<goodSize;i++) stddev += (newarray[i]-mean)*(newarray[i]-mean);
[275]446  stddev = sqrt(stddev/float(goodSize-1));
447
[285]448  std::sort(newarray,newarray+goodSize);
449  if((goodSize%2)==0) median = (newarray[goodSize/2-1]+newarray[goodSize/2])/2;
450  else median = newarray[goodSize/2];
451
452  for(int i=0;i<goodSize;i++) newarray[i] = absval(newarray[i]-median);
453  std::sort(newarray,newarray+goodSize);
454  if((goodSize%2)==0) madfm = (newarray[goodSize/2-1]+newarray[goodSize/2])/2;
455  else madfm = newarray[goodSize/2];
456
[282]457  delete [] newarray;
458
[275]459}
460template void findAllStats<int>(int *array, int size, bool *mask,
461                                float &mean, float &stddev,
462                                int &median, int &madfm);
463template void findAllStats<long>(long *array, int size, bool *mask,
464                                 float &mean, float &stddev,
465                                 long &median, long &madfm);
466template void findAllStats<float>(float *array, int size, bool *mask,
467                                  float &mean, float &stddev,
468                                  float &median, float &madfm);
469template void findAllStats<double>(double *array, int size, bool *mask,
470                                   float &mean, float &stddev,
471                                   double &median, double &madfm);
472//--------------------------------------------------------------------
Note: See TracBrowser for help on using the repository browser.