source: trunk/src/Utils/getNormalStats.cc @ 891

Last change on this file since 891 was 891, checked in by MatthewWhiting, 12 years ago

Changing some of the non-robust stats calculations so that we do all summing using doubles. This should provide more accurate measures of the quantities being summed than using floats. Triggers errors on verification8.

File size: 17.2 KB
Line 
1// -----------------------------------------------------------------------
2// getStats.cc: Basic functions to calculate statistical parameters
3//              such as mean, median, rms, madfm, min, max.
4// -----------------------------------------------------------------------
5// Copyright (C) 2006, Matthew Whiting, ATNF
6//
7// This program is free software; you can redistribute it and/or modify it
8// under the terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2 of the License, or (at your
10// option) any later version.
11//
12// Duchamp is distributed in the hope that it will be useful, but WITHOUT
13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15// for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with Duchamp; if not, write to the Free Software Foundation,
19// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
20//
21// Correspondence concerning Duchamp may be directed to:
22//    Internet email: Matthew.Whiting [at] atnf.csiro.au
23//    Postal address: Dr. Matthew Whiting
24//                    Australia Telescope National Facility, CSIRO
25//                    PO Box 76
26//                    Epping NSW 1710
27//                    AUSTRALIA
28// -----------------------------------------------------------------------
29#include <iostream>
30#include <algorithm>
31#include <math.h>
32#include <duchamp/Utils/utils.hh>
33
34template <class T> float findMean(T *array, size_t size)
35{
36  /// @details
37  /// Find the mean of an array of numbers. Type independent.
38  /// \param array The array of numbers.
39  /// \param size The length of the array.
40  /// \return The mean value of the array, returned as a float
41  double mean = 0;
42  for(size_t i=0;i<size;i++) mean += double(array[i]);
43  if(size>0)
44    mean /= double(size);
45  return float(mean);
46}
47template float findMean<int>(int *array, size_t size);
48template float findMean<long>(long *array, size_t size);
49template float findMean<float>(float *array, size_t size);
50template float findMean<double>(double *array, size_t size);
51//--------------------------------------------------------------------
52
53template <class T> float findMeanDiff(T *first, T *second, size_t size)
54{
55  /// @details
56  /// Find the mean of an array of numbers. Type independent.
57  /// \param array The array of numbers.
58  /// \param size The length of the array.
59  /// \return The mean value of the array, returned as a float
60  double mean = 0;
61  for(size_t i=0;i<size;i++) mean += double(first[i]-second[i]);
62  if(size>0)
63    mean /= double(size);
64  return float(mean);
65}
66template float findMeanDiff<int>(int *first, int *second, size_t size);
67template float findMeanDiff<long>(long *first, long *second, size_t size);
68template float findMeanDiff<float>(float *first, float *second, size_t size);
69template float findMeanDiff<double>(double *first, double *second, size_t size);
70//--------------------------------------------------------------------
71
72template <class T> float findMean(T *array, bool *mask, size_t size)
73{
74  /// @details
75  /// Find the mean of an array of numbers. Type independent.
76  /// \param array The array of numbers.
77  /// \param mask An array of the same length that says whether to
78  /// include each member of the array in the calculations. Only use
79  /// values where mask=true.
80  /// \param size The length of the array.
81  /// \return The mean value of the array, returned as a float
82  double mean = 0.;
83  int ct=0;
84  for(size_t i=0;i<size;i++){
85    if(mask[i]){
86      mean += double(array[i]);
87      ct++;
88    }
89  }
90  if(ct>0) mean /= double(ct);
91  return float(mean);
92}
93template float findMean<int>(int *array, bool *mask, size_t size);
94template float findMean<long>(long *array, bool *mask, size_t size);
95template float findMean<float>(float *array, bool *mask, size_t size);
96template float findMean<double>(double *array, bool *mask, size_t size);
97//--------------------------------------------------------------------
98
99template <class T> float findMeanDiff(T *first, T *second, bool *mask, size_t size)
100{
101  /// @details
102  /// Find the mean of an array of numbers. Type independent.
103  /// \param array The array of numbers.
104  /// \param mask An array of the same length that says whether to
105  /// include each member of the array in the calculations. Only use
106  /// values where mask=true.
107  /// \param size The length of the array.
108  /// \return The mean value of the array, returned as a float
109  double mean = 0.;
110  int ct=0;
111  for(size_t i=0;i<size;i++){
112    if(mask[i]){
113      mean += double(first[i]-second[i]);
114      ct++;
115    }
116  }
117  if(ct>0) mean /= double(ct);
118  return float(mean);
119}
120template float findMeanDiff<int>(int *first, int *second, bool *mask, size_t size);
121template float findMeanDiff<long>(long *first, long *second, bool *mask, size_t size);
122template float findMeanDiff<float>(float *first, float *second, bool *mask, size_t size);
123template float findMeanDiff<double>(double *first, double *second, bool *mask, size_t size);
124//--------------------------------------------------------------------
125
126template <class T> float findStddev(T *array, size_t size)
127{
128  /// @details Find the rms or standard deviation of an array of
129  /// numbers. Type independent. Calculated by iterating only once,
130  /// using \sum x and \sum x^2 (no call to findMean)
131  /// \param array The array of numbers.
132  /// \param size The length of the array.
133  /// \return The rms value of the array, returned as a float
134
135  // double mean = double(findMean(array,size));
136  // double stddev = (double(array[0])-mean) * (double(array[0])-mean);
137  // for(size_t i=1;i<size;i++) stddev += (double(array[i])-mean)*(double(array[i])-mean);
138  // double stddev = stddev/double(size-1));
139
140  // T sumx=0,sumxx=0;
141  double sumx=0.,sumxx=0.;
142  double stddev=0.;
143  double dsize=double(size);
144  for(size_t i=0;i<size;i++){
145    sumx += array[i];
146    sumxx += (array[i]*array[i]);
147  }
148   // std::cerr << sumx << " " << sumxx << " " << size << " " << size*size <<"\n";
149   // std::cerr << double(sumx) << " " << double(sumxx) << " " << double(size) << " " << double(size*size)<< "\n";
150   //  double mean=double(sumx)/double(size);
151  double mean=sumx/dsize;
152  if(size>0)
153    // stddev = sqrt(double(sumxx)/double(size) - double(sumx*sumx)/double(size*size));
154    //    stddev = sqrt(double(sumxx)/double(size) - mean*mean);
155    stddev = sqrt(sumxx/dsize - mean*mean);
156   // std::cerr << stddev << " " << float(stddev) << "\n";
157  return float(stddev);
158}
159template float findStddev<int>(int *array, size_t size);
160template float findStddev<long>(long *array, size_t size);
161template float findStddev<float>(float *array, size_t size);
162template float findStddev<double>(double *array, size_t size);
163//--------------------------------------------------------------------
164
165template <class T> float findStddevDiff(T *first, T *second, size_t size)
166{
167  /// @details Find the rms or standard deviation of an array of
168  /// numbers. Type independent. Calculated by iterating only once,
169  /// using \sum x and \sum x^2 (no call to findMean)
170  /// \param array The array of numbers.
171  /// \param size The length of the array.
172  /// \return The rms value of the array, returned as a float
173
174  // T sumx=0,sumxx=0;
175  double sumx=0.,sumxx=0.;
176  double stddev=0.;
177  double dsize=double(size);
178  for(size_t i=0;i<size;i++){
179    sumx += (first[i]-second[i]);
180    sumxx += ((first[i]-second[i])*(first[i]-second[i]));
181  }
182  double mean=sumx/dsize;
183  if(size>0)
184    stddev = sqrt(sumxx/dsize - mean*mean);
185    // stddev = sqrt(double(sumxx)/double(size) - double(sumx*sumx)/double(size*size));
186  return float(stddev);
187}
188template float findStddevDiff<int>(int *first, int *second, size_t size);
189template float findStddevDiff<long>(long *first, long *second, size_t size);
190template float findStddevDiff<float>(float *first, float *second, size_t size);
191template float findStddevDiff<double>(double *first, double *second, size_t size);
192//--------------------------------------------------------------------
193
194template <class T> float findStddev(T *array, bool *mask, size_t size)
195{
196  /// @details Find the rms or standard deviation of an array of
197  /// numbers. Type independent. Calculated by iterating only once,
198  /// using \sum x and \sum x^2 (no call to findMean)
199  /// \param array The array of numbers.
200  /// \param mask An array of the same length that says whether to
201  /// include each member of the array in the calculations. Only use
202  /// values where mask=true.
203  /// \param size The length of the array.
204  /// \return The rms value of the array, returned as a float
205
206  // T sumx=0,sumxx=0;
207  double sumx=0.,sumxx=0.;
208  double stddev=0.;
209  int ct=0;
210  for(size_t i=0;i<size;i++){
211    if(mask[i]){
212      sumx += array[i];
213      sumxx += (array[i]*array[i]);
214      ct++;
215    }
216  }
217  double dct=double(ct);
218  double mean=sumx/dct;
219  if(ct>0)
220    stddev = sqrt(sumxx/dct - mean*mean);
221    // stddev = sqrt(double(sumxx)/double(ct) - double(sumx*sumx)/double(ct*ct));
222  return float(stddev);
223}
224template float findStddev<int>(int *array, bool *mask, size_t size);
225template float findStddev<long>(long *array, bool *mask, size_t size);
226template float findStddev<float>(float *array, bool *mask, size_t size);
227template float findStddev<double>(double *array, bool *mask, size_t size);
228//--------------------------------------------------------------------
229
230template <class T> float findStddevDiff(T *first, T *second, bool *mask, size_t size)
231{
232  /// @details Find the rms or standard deviation of an array of
233  /// numbers. Type independent. Calculated by iterating only once,
234  /// using \sum x and \sum x^2 (no call to findMean)
235  /// \param array The array of numbers.
236  /// \param mask An array of the same length that says whether to
237  /// include each member of the array in the calculations. Only use
238  /// values where mask=true.
239  /// \param size The length of the array.
240  /// \return The rms value of the array, returned as a float
241
242  // T sumx=0,sumxx=0;
243  double sumx=0,sumxx=0;
244  double stddev=0;
245  int ct=0;
246  for(size_t i=0;i<size;i++){
247    if(mask[i]){
248      sumx += (first[i]-second[i]);
249      sumxx += ((first[i]-second[i])*(first[i]-second[i]));
250      ct++;
251    }
252  }
253  double dct=double(ct);
254  double mean=sumx/dct;
255  if(ct>0)
256    stddev = sqrt(sumxx/dct - mean*mean);
257    // stddev = sqrt(double(sumxx)/double(ct) - double(sumx*sumx)/double(ct*ct));
258  return float(stddev);
259}
260template float findStddevDiff<int>(int *first, int *second, bool *mask, size_t size);
261template float findStddevDiff<long>(long *first, long *second, bool *mask, size_t size);
262template float findStddevDiff<float>(float *first, float *second, bool *mask, size_t size);
263template float findStddevDiff<double>(double *first, double *second, bool *mask, size_t size);
264//--------------------------------------------------------------------
265
266template <class T> void findNormalStats(T *array, size_t size,
267                                        float &mean, float &stddev)
268{
269  /// @details
270  /// Find the mean and rms or standard deviation of an array of
271  /// numbers. Type independent.
272  ///
273  /// \param array The array of numbers.
274  /// \param size The length of the array.
275  /// \param mean The mean value of the array, returned as a float.
276  /// \param stddev The rms or standard deviation of the array,
277  /// returned as a float.
278
279  if(size==0){
280    std::cerr << "Error in findNormalStats: zero sized array!\n";
281    return;
282  }
283  mean = array[0];
284  for(size_t i=1;i<size;i++){
285    mean += array[i];
286  }
287  mean /= float(size);
288
289  stddev = (array[0]-mean) * (array[0]-mean);
290  for(size_t i=1;i<size;i++) stddev += (array[i]-mean)*(array[i]-mean);
291  stddev = sqrt(stddev/float(size-1));
292
293}
294template void findNormalStats<int>(int *array, size_t size,
295                                      float &mean, float &stddev);
296template void findNormalStats<long>(long *array, size_t size,
297                                       float &mean, float &stddev);
298template void findNormalStats<float>(float *array, size_t size,
299                                        float &mean, float &stddev);
300template void findNormalStats<double>(double *array, size_t size,
301                                         float &mean, float &stddev);
302//--------------------------------------------------------------------
303
304template <class T> void findNormalStats(T *array, size_t size, bool *mask,
305                                        float &mean, float &stddev)
306{
307  /// @details
308  /// Find the mean and rms or standard deviation of a subset of an
309  /// array of numbers. The subset is defined by an array of bool
310  /// variables. Type independent.
311  ///
312  /// \param array The array of numbers.
313  /// \param size The length of the array.
314  /// \param mask An array of the same length that says whether to
315  /// include each member of the array in the calculations. Only look
316  /// at values where mask=true.
317  /// \param mean The mean value of the array, returned as a float.
318  /// \param stddev The rms or standard deviation of the array,
319  /// returned as a float.
320
321  int goodSize=0;
322  for(size_t i=0;i<size;i++) if(mask[i]) goodSize++;
323  if(goodSize==0){
324    std::cerr << "Error in findNormalStats: no good values!\n";
325    return;
326  }
327  int start=0;
328  while(!mask[start]){start++;}
329  mean = array[start];
330  for(size_t i=start+1;i<size;i++){
331    if(mask[i]) mean += array[i];
332  }
333  mean /= float(goodSize);
334
335  stddev = (array[start]-mean) * (array[start]-mean);
336  for(size_t i=1;i<size;i++){
337    if(mask[i]) stddev += (array[i]-mean)*(array[i]-mean);
338  }
339  stddev = sqrt(stddev/float(goodSize-1));
340
341}
342template void findNormalStats<int>(int *array, size_t size, bool *mask,
343                                      float &mean, float &stddev);
344template void findNormalStats<long>(long *array, size_t size, bool *mask,
345                                       float &mean, float &stddev);
346template void findNormalStats<float>(float *array, size_t size, bool *mask,
347                                        float &mean, float &stddev);
348template void findNormalStats<double>(double *array, size_t size, bool *mask,
349                                         float &mean, float &stddev);
350//-------------------------------------------------------------------- 
351
352template <class T> void findNormalStatsDiff(T *first, T *second, size_t size,
353                                            float &mean, float &stddev)
354{
355  /// @details
356  /// Find the mean and rms or standard deviation of  the difference between two arrays of
357  /// numbers. The difference is defined as first - second. Type independent.
358  ///
359  /// \param first The first array
360  /// \param second The second array
361  /// \param size The length of the array.
362  /// \param mean The mean value of the array, returned as a float.
363  /// \param stddev The rms or standard deviation of the array,
364  /// returned as a float.
365
366  if(size==0){
367    std::cerr << "Error in findNormalStats: zero sized array!\n";
368    return;
369  }
370  mean = first[0]-second[0];
371  for(size_t i=1;i<size;i++){
372    mean += (first[i]-second[i]);
373  }
374  mean /= float(size);
375
376  stddev = (first[0]-second[0]-mean) * (first[0]-second[0]-mean);
377  for(size_t i=1;i<size;i++) stddev += (first[i]-second[i]-mean)*(first[i]-second[i]-mean);
378  stddev = sqrt(stddev/float(size-1));
379
380}
381template void findNormalStatsDiff<int>(int *first, int *second, size_t size,
382                                       float &mean, float &stddev);
383template void findNormalStatsDiff<long>(long *first, long *second, size_t size,
384                                        float &mean, float &stddev);
385template void findNormalStatsDiff<float>(float *first, float *second, size_t size,
386                                         float &mean, float &stddev);
387template void findNormalStatsDiff<double>(double *first, double *second, size_t size,
388                                          float &mean, float &stddev);
389//--------------------------------------------------------------------
390
391template <class T> void findNormalStatsDiff(T *first, T *second, size_t size, bool *mask,
392                                            float &mean, float &stddev)
393{
394  /// @details Find the mean and rms or standard deviation of the
395  /// difference between two arrays of numbers, where some elements
396  /// are masked out. The mask is defined by an array of bool
397  /// variables, and the difference is defined as first - second. Type
398  /// independent.
399  ///
400  /// \param first The first array
401  /// \param second The second array
402  /// \param size The length of the array.
403  /// \param mask An array of the same length that says whether to
404  /// include each member of the array in the calculations. Only look
405  /// at values where mask=true.
406  /// \param mean The mean value of the array, returned as a float.
407  /// \param stddev The rms or standard deviation of the array,
408  /// returned as a float.
409
410  int goodSize=0;
411  for(size_t i=0;i<size;i++) if(mask[i]) goodSize++;
412  if(goodSize==0){
413    std::cerr << "Error in findNormalStats: no good values!\n";
414    return;
415  }
416  int start=0;
417  while(!mask[start]){start++;}
418  mean = first[start]-second[start];
419  for(size_t i=start+1;i<size;i++){
420    if(mask[i]) mean += (first[i]-second[i]);
421  }
422  mean /= float(goodSize);
423
424  stddev = (first[start]-second[start]-mean) * (first[start]-second[start]-mean);
425  for(size_t i=1;i<size;i++){
426    if(mask[i]) stddev += (first[i]-second[i]-mean)*(first[i]-second[i]-mean);
427  }
428  stddev = sqrt(stddev/float(goodSize-1));
429
430}
431template void findNormalStatsDiff<int>(int *first, int *second, size_t size, bool *mask,
432                                       float &mean, float &stddev);
433template void findNormalStatsDiff<long>(long *first, long *second, size_t size, bool *mask,
434                                        float &mean, float &stddev);
435template void findNormalStatsDiff<float>(float *first, float *second, size_t size, bool *mask,
436                                         float &mean, float &stddev);
437template void findNormalStatsDiff<double>(double *first, double *second, size_t size, bool *mask,
438                                          float &mean, float &stddev);
439//--------------------------------------------------------------------
Note: See TracBrowser for help on using the repository browser.