Ignore:
Timestamp:
06/09/10 19:03:06 (14 years ago)
Author:
Kana Sugimoto
Message:

New Development: Yes

JIRA Issue: Yes (CAS-2211)

Ready for Test: Yes

Interface Changes: Yes

What Interface Changed: ASAP 3.0.0 interface changes

Test Programs:

Put in Release Notes: Yes

Module(s): all the CASA sd tools and tasks are affected.

Description: Merged ATNF-ASAP 3.0.0 developments to CASA (alma) branch.

Note you also need to update casa/code/atnf.


Location:
branches/alma
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/alma

  • branches/alma/src/STLineFinder.cpp

    r1603 r1757  
    3434#include "STLineFinder.h"
    3535#include "STFitter.h"
     36#include "IndexedCompare.h"
    3637
    3738// STL
     
    110111
    111112protected:
    112    // supplementary function to control running mean calculations.
    113    // It adds a specified channel to the running mean box and
     113   // supplementary function to control running mean/median calculations.
     114   // It adds a specified channel to the running box and
    114115   // removes (ch-maxboxnchan+1)'th channel from there
    115116   // Channels, for which the mask is false or index is beyond the
     
    152153                                           // last point of the detected line
    153154                                           //
     155   bool itsUseMedian;                      // true if median statistics is used
     156                                           // to determine the noise level, otherwise
     157                                           // it is the mean of the lowest 80% of deviations
     158                                           // (default)
     159   int itsNoiseSampleSize;                 // sample size used to estimate the noise statistics
     160                                           // Negative value means the whole spectrum is used (default)
    154161public:
    155162
     
    157164   LFAboveThreshold(std::list<pair<int,int> > &in_lines,
    158165                    int in_min_nchan = 3,
    159                     casa::Float in_threshold = 5) throw();
     166                    casa::Float in_threshold = 5,
     167                    bool use_median = false,
     168                    int noise_sample_size = -1) throw();
    160169   virtual ~LFAboveThreshold() throw();
    161170
     
    200209///////////////////////////////////////////////////////////////////////////////
    201210
     211///////////////////////////////////////////////////////////////////////////////
     212//
     213// LFNoiseEstimator  a helper class designed to estimate off-line variance
     214//                   using statistics depending on the distribution of
     215//                   values (e.g. like a median)
     216//
     217//                   Two statistics are supported: median and an average of
     218//                   80% of smallest values.
     219//
     220
     221struct LFNoiseEstimator {
     222   // construct an object
     223   // size - maximum sample size. After a size number of elements is processed
     224   // any new samples would cause the algorithm to drop the oldest samples in the
     225   // buffer.
     226   explicit LFNoiseEstimator(size_t size);
     227
     228   // add a new sample
     229   // in - the new value
     230   void add(float in);
     231
     232   // median of the distribution
     233   float median() const;
     234
     235   // mean of lowest 80% of the samples
     236   float meanLowest80Percent() const;
     237
     238   // return true if the buffer is full (i.e. statistics are representative)
     239   inline bool filledToCapacity() const { return itsBufferFull;}
     240
     241protected:
     242   // update cache of sorted indices
     243   // (it is assumed that itsSampleNumber points to the newly
     244   // replaced element)
     245   void updateSortedCache() const;
     246
     247   // build sorted cache from the scratch
     248   void buildSortedCache() const;
     249
     250   // number of samples accumulated so far
     251   // (can be less than the buffer size)
     252   size_t numberOfSamples() const;
     253
     254   // this helper method builds the cache if
     255   // necessary using one of the methods
     256   void fillCacheIfNecessary() const;
     257
     258private:
     259   // buffer with samples (unsorted)
     260   std::vector<float> itsVariances;
     261   // current sample number (<=itsVariances.size())
     262   size_t itsSampleNumber;
     263   // true, if the buffer all values in the sample buffer are used
     264   bool itsBufferFull;
     265   // cached indices into vector of samples
     266   mutable std::vector<size_t> itsSortedIndices;
     267   // true if any of the statistics have been obtained at least
     268   // once. This flag allows to implement a more efficient way of
     269   // calculating statistics, if they are needed at once and not
     270   // after each addition of a new element
     271   mutable bool itsStatisticsAccessed;
     272};
     273
     274//
     275///////////////////////////////////////////////////////////////////////////////
     276
     277
    202278} // namespace asap
     279
     280///////////////////////////////////////////////////////////////////////////////
     281//
     282// LFNoiseEstimator  a helper class designed to estimate off-line variance
     283//                   using statistics depending on the distribution of
     284//                   values (e.g. like a median)
     285//
     286//                   Two statistics are supported: median and an average of
     287//                   80% of smallest values.
     288//
     289
     290// construct an object
     291// size - maximum sample size. After a size number of elements is processed
     292// any new samples would cause the algorithm to drop the oldest samples in the
     293// buffer.
     294LFNoiseEstimator::LFNoiseEstimator(size_t size) : itsVariances(size),
     295     itsSampleNumber(0), itsBufferFull(false), itsSortedIndices(size),
     296     itsStatisticsAccessed(false)
     297{
     298   AlwaysAssert(size>0,AipsError);
     299}
     300
     301
     302// add a new sample
     303// in - the new value
     304void LFNoiseEstimator::add(float in)
     305{
     306   if (isnan(in)) {
     307       // normally it shouldn't happen
     308       return;
     309   }
     310   itsVariances[itsSampleNumber] = in;
     311
     312   if (itsStatisticsAccessed) {
     313       // only do element by element addition if on-the-fly
     314       // statistics are needed
     315       updateSortedCache();
     316   }
     317
     318   // advance itsSampleNumber now
     319   ++itsSampleNumber;
     320   if (itsSampleNumber == itsVariances.size()) {
     321       itsSampleNumber = 0;
     322       itsBufferFull = true;
     323   }
     324   AlwaysAssert(itsSampleNumber<itsVariances.size(),AipsError);
     325}
     326
     327// number of samples accumulated so far
     328// (can be less than the buffer size)
     329size_t LFNoiseEstimator::numberOfSamples() const
     330{
     331  // the number of samples accumulated so far may be less than the
     332  // buffer size
     333  const size_t nSamples = itsBufferFull ? itsVariances.size(): itsSampleNumber;
     334  AlwaysAssert( (nSamples > 0) && (nSamples <= itsVariances.size()), AipsError);
     335  return nSamples;
     336}
     337
     338// this helper method builds the cache if
     339// necessary using one of the methods
     340void LFNoiseEstimator::fillCacheIfNecessary() const
     341{
     342  if (!itsStatisticsAccessed) {
     343      if ((itsSampleNumber!=0) || itsBufferFull) {
     344          // build the whole cache efficiently
     345          buildSortedCache();
     346      } else {
     347          updateSortedCache();
     348      }
     349      itsStatisticsAccessed = true;
     350  } // otherwise, it is updated in 'add' using on-the-fly method
     351}
     352
     353// median of the distribution
     354float LFNoiseEstimator::median() const
     355{
     356  fillCacheIfNecessary();
     357  // the number of samples accumulated so far may be less than the
     358  // buffer size
     359  const size_t nSamples = numberOfSamples();
     360  const size_t medSample = nSamples / 2;
     361  AlwaysAssert(medSample < itsSortedIndices.size(), AipsError);
     362  return itsVariances[itsSortedIndices[medSample]];
     363}
     364
     365// mean of lowest 80% of the samples
     366float LFNoiseEstimator::meanLowest80Percent() const
     367{
     368  fillCacheIfNecessary();
     369  // the number of samples accumulated so far may be less than the
     370  // buffer size
     371  const size_t nSamples = numberOfSamples();
     372  float result = 0;
     373  size_t numpt=size_t(0.8*nSamples);
     374  if (!numpt) {
     375      numpt=nSamples; // no much else left,
     376                     // although it is very inaccurate
     377  }
     378  AlwaysAssert( (numpt > 0) && (numpt<itsSortedIndices.size()), AipsError);
     379  for (size_t ch=0; ch<numpt; ++ch) {
     380       result += itsVariances[itsSortedIndices[ch]];
     381  }
     382  result /= float(numpt);
     383  return result;
     384}
     385
     386// update cache of sorted indices
     387// (it is assumed that itsSampleNumber points to the newly
     388// replaced element)
     389void LFNoiseEstimator::updateSortedCache() const
     390{
     391  // the number of samples accumulated so far may be less than the
     392  // buffer size
     393  const size_t nSamples = numberOfSamples();
     394
     395  if (itsBufferFull) {
     396      // first find the index of the element which is being replaced
     397      size_t index = nSamples;
     398      for (size_t i=0; i<nSamples; ++i) {
     399           AlwaysAssert(i < itsSortedIndices.size(), AipsError);
     400           if (itsSortedIndices[i] == itsSampleNumber) {
     401               index = i;
     402               break;
     403           }
     404      }
     405      AlwaysAssert( index < nSamples, AipsError);
     406
     407      const vector<size_t>::iterator indStart = itsSortedIndices.begin();
     408      // merge this element with preceeding block first
     409      if (index != 0) {
     410          // merge indices on the basis of variances
     411          inplace_merge(indStart,indStart+index,indStart+index+1,
     412                        indexedCompare<size_t>(itsVariances.begin()));
     413      }
     414      // merge with the following block
     415      if (index + 1 != nSamples) {
     416          // merge indices on the basis of variances
     417          inplace_merge(indStart,indStart+index+1,indStart+nSamples,
     418                        indexedCompare<size_t>(itsVariances.begin()));
     419      }
     420  } else {
     421      // itsSampleNumber is the index of the new element
     422      AlwaysAssert(itsSampleNumber < itsSortedIndices.size(), AipsError);
     423      itsSortedIndices[itsSampleNumber] = itsSampleNumber;
     424      if (itsSampleNumber >= 1) {
     425          // we have to place this new sample in
     426          const vector<size_t>::iterator indStart = itsSortedIndices.begin();
     427          // merge indices on the basis of variances
     428          inplace_merge(indStart,indStart+itsSampleNumber,indStart+itsSampleNumber+1,
     429                        indexedCompare<size_t>(itsVariances.begin()));
     430      }
     431  }
     432}
     433
     434// build sorted cache from the scratch
     435void LFNoiseEstimator::buildSortedCache() const
     436{
     437  // the number of samples accumulated so far may be less than the
     438  // buffer size
     439  const size_t nSamples = numberOfSamples();
     440  AlwaysAssert(nSamples <= itsSortedIndices.size(), AipsError);
     441  for (size_t i=0; i<nSamples; ++i) {
     442       itsSortedIndices[i]=i;
     443  }
     444
     445  // sort indices, but check the array of variances
     446  const vector<size_t>::iterator indStart = itsSortedIndices.begin();
     447  stable_sort(indStart,indStart+nSamples, indexedCompare<size_t>(itsVariances.begin()));
     448}
     449
     450//
     451///////////////////////////////////////////////////////////////////////////////
    203452
    204453///////////////////////////////////////////////////////////////////////////////
     
    275524}
    276525
    277 // supplementary function to control running mean calculations.
    278 // It adds a specified channel to the running mean box and
     526// supplementary function to control running mean/median calculations.
     527// It adds a specified channel to the running box and
    279528// removes (ch-max_box_nchan+1)'th channel from there
    280529// Channels, for which the mask is false or index is beyond the
     
    339588                (meanch2-square(meanch));
    340589      linmean=coeff*(Float(cur_channel)-meanch)+mean;
    341       linvariance=sqrt(sumf2/Float(box_chan_cntr)-square(mean)-
    342                     square(coeff)*(meanch2-square(meanch)));
     590      linvariance=sumf2/Float(box_chan_cntr)-square(mean)-
     591                    square(coeff)*(meanch2-square(meanch));
     592      if (linvariance<0.) {
     593          // this shouldn't happen normally, but could be due to round-off error
     594          linvariance = 0;
     595      } else {
     596          linvariance = sqrt(linvariance);
     597      }
    343598  }
    344599  need2recalculate=False;
     
    351606///////////////////////////////////////////////////////////////////////////////
    352607//
    353 // LFAboveThreshold - a running mean algorithm for line detection
     608// LFAboveThreshold - a running mean/median algorithm for line detection
    354609//
    355610//
     
    359614LFAboveThreshold::LFAboveThreshold(std::list<pair<int,int> > &in_lines,
    360615                                   int in_min_nchan,
    361                                    casa::Float in_threshold) throw() :
     616                                   casa::Float in_threshold,
     617                                   bool use_median,
     618                                   int noise_sample_size) throw() :
    362619             min_nchan(in_min_nchan), threshold(in_threshold),
    363              lines(in_lines), running_box(NULL) {}
     620             lines(in_lines), running_box(NULL), itsUseMedian(use_median),
     621             itsNoiseSampleSize(noise_sample_size) {}
    364622
    365623LFAboveThreshold::~LFAboveThreshold() throw()
     
    474732      running_box=new RunningBox(spectrum,mask,edge,max_box_nchan);
    475733
    476 
    477734      // determine the off-line variance first
    478735      // an assumption made: lines occupy a small part of the spectrum
    479736
    480       std::vector<float> variances(edge.second-edge.first);
    481       DebugAssert(variances.size(),AipsError);
    482 
    483       for (;running_box->haveMore();running_box->next())
    484            variances[running_box->getChannel()-edge.first]=
    485                                 running_box->getLinVariance();
    486 
    487       // in the future we probably should do a proper Chi^2 estimation
    488       // now a simple 80% of smaller values will be used.
    489       // it may degrade the performance of the algorithm for weak lines
    490       // due to a bias of the Chi^2 distribution.
    491       stable_sort(variances.begin(),variances.end());
    492 
    493       Float offline_variance=0;
    494       uInt offline_cnt=uInt(0.8*variances.size());
    495       if (!offline_cnt) offline_cnt=variances.size(); // no much else left,
    496                                     // although it is very inaccurate
    497       for (uInt n=0;n<offline_cnt;++n)
    498            offline_variance+=variances[n];
    499       offline_variance/=Float(offline_cnt);
     737      const size_t noiseSampleSize = itsNoiseSampleSize<0 ? size_t(edge.second-edge.first) :
     738                      std::min(size_t(itsNoiseSampleSize), size_t(edge.second-edge.first));
     739      DebugAssert(noiseSampleSize,AipsError);
     740      const bool globalNoise = (size_t(edge.second - edge.first) == noiseSampleSize);
     741      LFNoiseEstimator ne(noiseSampleSize);
     742
     743      for (;running_box->haveMore();running_box->next()) {
     744           ne.add(running_box->getLinVariance());
     745           if (ne.filledToCapacity()) {
     746               break;
     747           }
     748      }
     749
     750      Float offline_variance = -1; // just a flag that it is unset
     751           
     752      if (globalNoise) {
     753          offline_variance = itsUseMedian ? ne.median() : ne.meanLowest80Percent();
     754      }
    500755
    501756      // actual search algorithm
     
    510765                                 running_box->next()) {
    511766           const int ch=running_box->getChannel();
    512            if (running_box->getNumberOfBoxPoints()>=minboxnchan)
     767           if (!globalNoise) {
     768               // add a next point for a local noise estimate
     769               ne.add(running_box->getLinVariance());
     770           }   
     771           if (running_box->getNumberOfBoxPoints()>=minboxnchan) {
     772               if (!globalNoise) {
     773                   offline_variance = itsUseMedian ? ne.median() : ne.meanLowest80Percent();
     774               }
     775               AlwaysAssert(offline_variance>0.,AipsError);
    513776               processChannel(mask[ch] && (fabs(running_box->aboveMean()) >=
    514777                  threshold*offline_variance), mask);
    515            else processCurLine(mask); // just finish what was accumulated before
     778           } else processCurLine(mask); // just finish what was accumulated before
    516779
    517780           signs[ch]=getAboveMeanSign();
    518            // os<<ch<<" "<<spectrum[ch]<<" "<<fabs(running_box->aboveMean())<<" "<<
    519            // threshold*offline_variance<<endl;
    520 
    521            const Float buf=running_box->aboveMean();
    522            if (buf>0) signs[ch]=1;
    523            else if (buf<0) signs[ch]=-1;
    524            else if (buf==0) signs[ch]=0;
    525            //os<<ch<<" "<<spectrum[ch]<<" "<<running_box->getLinMean()<<" "<<
    526            //             threshold*offline_variance<<endl;
     781            //os<<ch<<" "<<spectrum[ch]<<" "<<fabs(running_box->aboveMean())<<" "<<
     782            //threshold*offline_variance<<endl;
    527783      }
    528784      if (lines.size())
     
    649905//              Setting a very large value doesn't usually provide
    650906//              valid detections.
    651 // in_box_size  the box size for running mean calculation. Default is
     907// in_box_size  the box size for running mean/median calculation. Default is
    652908//              1./5. of the whole spectrum size
     909// in_noise_box the box size for off-line noise estimation (if working with
     910//              local noise. Negative value means use global noise estimate
     911//              Default is -1 (i.e. estimate using the whole spectrum)
     912// in_median    true if median statistics is used as opposed to average of
     913//              the lowest 80% of deviations (default)
    653914void STLineFinder::setOptions(const casa::Float &in_threshold,
    654915                              const casa::Int &in_min_nchan,
    655916                              const casa::Int &in_avg_limit,
    656                               const casa::Float &in_box_size) throw()
     917                              const casa::Float &in_box_size,
     918                              const casa::Float &in_noise_box,
     919                              const casa::Bool &in_median) throw()
    657920{
    658921  threshold=in_threshold;
     
    660923  avg_limit=in_avg_limit;
    661924  box_size=in_box_size;
     925  itsNoiseBox = in_noise_box;
     926  itsUseMedian = in_median;
    662927}
    663928
     
    683948                const casa::uInt &whichRow) throw(casa::AipsError)
    684949{
    685   //const int minboxnchan=4;
    686950  if (scan.null())
    687951      throw AipsError("STLineFinder::findLines - a scan should be set first,"
     
    700964      throw AipsError("STLineFinder::findLines - in_scan and in_mask have different"
    701965            "number of spectral channels.");
     966
     967  // taking flagged channels into account
     968  vector<bool> flaggedChannels = scan->getMask(whichRow);
     969  if (flaggedChannels.size()) {
     970      // there is a mask set for this row
     971      if (flaggedChannels.size() != mask.nelements()) {
     972          throw AipsError("STLineFinder::findLines - internal inconsistency: number of mask elements do not match the number of channels");
     973      }
     974      for (size_t ch = 0; ch<mask.nelements(); ++ch) {
     975           mask[ch] &= flaggedChannels[ch];
     976      }
     977  }
     978
    702979  // number of elements in in_edge
    703980  if (in_edge.size()>2)
     
    7321009      throw AipsError("STLineFinder::findLines - box_size is too small");
    7331010
     1011  // number of elements in the sample for noise estimate
     1012  const int noise_box = itsNoiseBox<0 ? -1 : int(nchan * itsNoiseBox);
     1013
     1014  if ((noise_box!= -1) and (noise_box<2))
     1015      throw AipsError("STLineFinder::findLines - noise_box is supposed to be at least 2 elements");
     1016
    7341017  spectrum.resize();
    7351018  spectrum = Vector<Float>(scan->getSpectrum(whichRow));
     
    7551038     try {
    7561039         // line find algorithm
    757          LFAboveThreshold lfalg(new_lines,avg_factor*min_nchan, threshold);
     1040         LFAboveThreshold lfalg(new_lines,avg_factor*min_nchan, threshold, itsUseMedian,noise_box);
    7581041         lfalg.findLines(spectrum,temp_mask,edge,max_box_nchan);
    7591042         signs.resize(lfalg.getSigns().nelements());
Note: See TracChangeset for help on using the changeset viewer.