Changeset 351 for trunk/src


Ignore:
Timestamp:
02/02/05 15:42:50 (20 years ago)
Author:
vor010
Message:

SDLineFinder - interface of auxiliary classes
has been changed to simplify experiments with algorithms. Functionality is
the same as before

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/SDLineFinder.cc

    r344 r351  
    3838#include <algorithm>
    3939#include <iostream>
     40#include <fstream>
    4041
    4142using namespace asap;
     
    101102///////////////////////////////////////////////////////////////////////////////
    102103//
    103 // LFRunningMean - a running mean algorithm for line detection
    104 //
    105 //
    106 
    107 // An auxiliary class implementing one pass of the line search algorithm,
    108 // which uses a running mean. We define this class here because it is
    109 // used in SDLineFinder only. The incapsulation of this code into a separate
    110 // class will provide a possibility to add new algorithms with minor changes
    111 class LFRunningMean {
     104// RunningBox -    a running box calculator. This class implements
     105//                 interations over the specified spectrum and calculates
     106//                 running box filter statistics.
     107//
     108
     109class RunningBox {
    112110   // The input data to work with. Use reference symantics to avoid
    113111   // an unnecessary copying   
     
    117115                                           // to work with
    118116   
    119    // statistics for running mean filtering
    120    casa::Float sum;       // sum of fluxes
    121    casa::Float sumsq;     // sum of squares of fluxes
     117   // statistics for running box filtering
     118   casa::Float sumf;       // sum of fluxes
     119   casa::Float sumf2;     // sum of squares of fluxes
     120   casa::Float sumch;       // sum of channel numbers (for linear fit)
     121   casa::Float sumch2;     // sum of squares of channel numbers (for linear fit)
     122   casa::Float sumfch;     // sum of flux*(channel number) (for linear fit)
     123   
    122124   int box_chan_cntr;     // actual number of channels in the box
    123125   int max_box_nchan;     // maximum allowed number of channels in the box
    124126                          // (calculated from boxsize and actual spectrum size)
    125 
     127   // cache for derivative statistics
     128   mutable casa::Bool need2recalculate; // if true, values of the statistics
     129                                       // below are invalid
     130   mutable casa::Float linmean;  // a value of the linear fit to the
     131                                 // points in the running box
     132   mutable casa::Float linvariance; // the same for variance
     133   int cur_channel;       // the number of the current channel
     134   int start_advance;     // number of channel from which the box can
     135                          // be moved (the middle of the box, if there is no
     136                          // masking)
     137public:
     138   // set up the object with the references to actual data
     139   // as well as the number of channels in the running box
     140   RunningBox(const casa::Vector<casa::Float>  &in_spectrum,
     141                 const casa::Vector<casa::Bool>   &in_mask,
     142                 const std::pair<int,int>         &in_edge,
     143                 int in_max_box_nchan) throw(AipsError);
     144                 
     145   // access to the statistics
     146   const casa::Float& getLinMean() const throw(AipsError);
     147   const casa::Float& getLinVariance() const throw(AipsError);
     148   const casa::Float aboveMean() const throw(AipsError);
     149   int getChannel() const throw();
     150   
     151   // actual number of channels in the box (max_box_nchan, if no channels
     152   // are masked)
     153   int getNumberOfBoxPoints() const throw();
     154
     155   // next channel
     156   void next() throw(AipsError);
     157
     158   // checking whether there are still elements
     159   casa::Bool haveMore() const throw();
     160
     161   // go to start
     162   void rewind() throw(AipsError);
     163 
     164protected:
     165   // supplementary function to control running mean calculations.
     166   // It adds a specified channel to the running mean box and
     167   // removes (ch-maxboxnchan+1)'th channel from there
     168   // Channels, for which the mask is false or index is beyond the
     169   // allowed range, are ignored
     170   void advanceRunningBox(int ch) throw(casa::AipsError);
     171
     172   // calculate derivative statistics. This function is const, because
     173   // it updates the cache only
     174   void updateDerivativeStatistics() const throw(AipsError);
     175};
     176
     177//
     178///////////////////////////////////////////////////////////////////////////////
     179
     180///////////////////////////////////////////////////////////////////////////////
     181//
     182// LFAboveThreshold   An algorithm for line detection using running box
     183//                    statistics.  Line is detected if it is above the
     184//                    specified threshold at the specified number of
     185//                    consequtive channels. Prefix LF stands for Line Finder
     186//
     187class LFAboveThreshold {
    126188   // temporary line edge channels and flag, which is True if the line
    127189   // was detected in the previous channels.
     
    134196   casa::Float threshold;                  // detection threshold - the
    135197                                           // minimal signal to noise ratio
     198   std::list<pair<int,int> > &lines;       // list where detections are saved
     199                                           // (pair: start and stop+1 channel)
     200   RunningBox *running_box;                // running box filter
    136201public:
    137    // set up the object with the references to actual data
    138    // as well as the detection criterion (min_nchan and threshold, see above)
    139    // and the number of channels in the running box
    140    LFRunningMean(const casa::Vector<casa::Float>  &in_spectrum,
    141                  const casa::Vector<casa::Bool>   &in_mask,
    142                  const std::pair<int,int>         &in_edge,
    143                  int in_max_box_nchan,
    144                  int in_min_nchan = 3,
    145                  casa::Float in_threshold = 5);
    146                  
     202
     203   // set up the detection criterion
     204   LFAboveThreshold(std::list<pair<int,int> > &in_lines,
     205                    int in_min_nchan = 3,
     206                    casa::Float in_threshold = 5) throw();
     207   virtual ~LFAboveThreshold() throw();
     208   
    147209   // replace the detection criterion
    148210   void setCriterion(int in_min_nchan, casa::Float in_threshold) throw();
     
    151213   // if statholder is not NULL, the accumulate function of it will be
    152214   // called for each channel to save statistics
    153    void findLines(std::list<pair<int,int> > &lines,
     215   //    spectrum, mask and edge - reference to the data
     216   //    max_box_nchan  - number of channels in the running box
     217   void findLines(const casa::Vector<casa::Float> &spectrum,
     218                  const casa::Vector<casa::Bool> &mask,
     219                  const std::pair<int,int> &edge,
     220                  int max_box_nchan,
    154221                  IStatHolder* statholder = NULL) throw(casa::AipsError);
    155    
     222
    156223protected:
    157    // supplementary function to control running mean calculations.
    158    // It adds a specified channel to the running mean box and
    159    // removes (ch-maxboxnchan+1)'th channel from there
    160    // Channels, for which the mask is false or index is beyond the
    161    // allowed range, are ignored
    162    void advanceRunningBox(int ch) throw(casa::AipsError);
    163    
    164 
    165    // test a channel against current running mean & rms
    166    // if channel specified is masked out or beyond the allowed indexes,
    167    // false is returned
    168    casa::Bool testChannel(int ch) const
    169                      throw(std::exception, casa::AipsError);
    170224
    171225   // process a channel: update curline and is_detected before and
    172226   // add a new line to the list, if necessary using processCurLine()
    173    void processChannel(std::list<pair<int,int> > &lines,
    174                        int ch) throw(casa::AipsError);
     227   // detect=true indicates that the current channel satisfies the criterion
     228   void processChannel(Bool detect, const casa::Vector<casa::Bool> &mask)
     229                                                  throw(casa::AipsError);
    175230
    176231   // process the interval of channels stored in curline
    177232   // if it satisfies the criterion, add this interval as a new line
    178    void processCurLine(std::list<pair<int,int> > &lines) throw(casa::AipsError);
    179 
     233   void processCurLine(const casa::Vector<casa::Bool> &mask)
     234                                                 throw(casa::AipsError);
    180235};
    181236
    182237//
    183238///////////////////////////////////////////////////////////////////////////////
     239
    184240} // namespace asap
    185241
     
    203259// box_nchan - number of channels in the box
    204260//
    205 void SignAccumulator::accumulate(int ch, Float sum, Float, int box_nchan)
     261void SignAccumulator::accumulate(int ch, Float sum, Float sum2, int box_nchan)
    206262                   throw(AipsError)
    207263{
     
    210266   DebugAssert(ch<=spectrum.nelements(), AipsError);
    211267   if (box_nchan) {
    212        Float buf=spectrum[ch]-sum/Float(box_nchan);
     268       Float buf=spectrum[ch]-sum; ///Float(box_nchan);
    213269       if (buf>0) sign[ch]=1;
    214270       else if (buf<0) sign[ch]=-1;
     
    227283///////////////////////////////////////////////////////////////////////////////
    228284
    229 
    230 ///////////////////////////////////////////////////////////////////////////////
    231 //
    232 // LFRunningMean - a running mean algorithm for line detection
    233 //
     285///////////////////////////////////////////////////////////////////////////////
     286//
     287// RunningBox -    a running box calculator. This class implements
     288//                 interations over the specified spectrum and calculates
     289//                 running box filter statistics.
    234290//
    235291
    236292// set up the object with the references to actual data
    237 // as well as the detection criterion (min_nchan and threshold, see above)
    238293// and the number of channels in the running box
    239 LFRunningMean::LFRunningMean(const casa::Vector<casa::Float>  &in_spectrum,
    240                              const casa::Vector<casa::Bool>   &in_mask,
    241                              const std::pair<int,int>         &in_edge,
    242                              int in_max_box_nchan,
    243                              int in_min_nchan, casa::Float in_threshold) :
     294RunningBox::RunningBox(const casa::Vector<casa::Float>  &in_spectrum,
     295                       const casa::Vector<casa::Bool>   &in_mask,
     296                       const std::pair<int,int>         &in_edge,
     297                       int in_max_box_nchan) throw(AipsError) :
    244298        spectrum(in_spectrum), mask(in_mask), edge(in_edge),
    245         max_box_nchan(in_max_box_nchan),
    246         min_nchan(in_min_nchan),threshold(in_threshold) {}
    247 
    248 // replace the detection criterion
    249 void LFRunningMean::setCriterion(int in_min_nchan, casa::Float in_threshold)
    250                                  throw()
    251 {
    252   min_nchan=in_min_nchan;
    253   threshold=in_threshold;
    254 }
    255 
     299        max_box_nchan(in_max_box_nchan)
     300{
     301  rewind();
     302}
     303
     304void RunningBox::rewind() throw(AipsError) {
     305  // fill statistics for initial box
     306  box_chan_cntr=0; // no channels are currently in the box
     307  sumf=0.;           // initialize statistics
     308  sumf2=0.;
     309  sumch=0.;
     310  sumch2=0.;
     311  sumfch=0.;
     312  int initial_box_ch=edge.first;
     313  for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
     314        ++initial_box_ch)
     315       advanceRunningBox(initial_box_ch);
     316 
     317  if (initial_box_ch==edge.second)       
     318      throw AipsError("RunningBox::rewind - too much channels are masked");
     319
     320  cur_channel=edge.first;
     321  start_advance=initial_box_ch-max_box_nchan/2; 
     322}
     323
     324// access to the statistics
     325const casa::Float& RunningBox::getLinMean() const throw(AipsError)
     326{
     327  DebugAssert(cur_channel<edge.second, AipsError);
     328  if (need2recalculate) updateDerivativeStatistics();
     329  return linmean;
     330}
     331
     332const casa::Float& RunningBox::getLinVariance() const throw(AipsError)
     333{
     334  DebugAssert(cur_channel<edge.second, AipsError);
     335  if (need2recalculate) updateDerivativeStatistics();
     336  return linvariance;
     337}
     338
     339const casa::Float RunningBox::aboveMean() const throw(AipsError)
     340{
     341  DebugAssert(cur_channel<edge.second, AipsError);
     342  if (need2recalculate) updateDerivativeStatistics();
     343  return spectrum[cur_channel]-linmean;
     344}
     345
     346int RunningBox::getChannel() const throw()
     347{
     348  return cur_channel;
     349}
     350
     351// actual number of channels in the box (max_box_nchan, if no channels
     352// are masked)
     353int RunningBox::getNumberOfBoxPoints() const throw()
     354{
     355  return box_chan_cntr;
     356}
    256357
    257358// supplementary function to control running mean calculations.
     
    260361// Channels, for which the mask is false or index is beyond the
    261362// allowed range, are ignored
    262 void LFRunningMean::advanceRunningBox(int ch) throw(AipsError)
     363void RunningBox::advanceRunningBox(int ch) throw(AipsError)
    263364{
    264365  if (ch>=edge.first && ch<edge.second)
    265366      if (mask[ch]) { // ch is a valid channel
    266367          ++box_chan_cntr;
    267           sum+=spectrum[ch];
    268           sumsq+=square(spectrum[ch]);         
     368          sumf+=spectrum[ch];
     369          sumf2+=square(spectrum[ch]);
     370          sumch+=Float(ch);
     371          sumch2+=square(Float(ch));
     372          sumfch+=spectrum[ch]*Float(ch);
     373          need2recalculate=True;
    269374      }
    270375  int ch2remove=ch-max_box_nchan;
     
    272377      if (mask[ch2remove]) { // ch2remove is a valid channel
    273378          --box_chan_cntr;
    274           sum-=spectrum[ch2remove];
    275           sumsq-=square(spectrum[ch2remove]); 
     379          sumf-=spectrum[ch2remove];
     380          sumf2-=square(spectrum[ch2remove]); 
     381          sumch-=Float(ch2remove);
     382          sumch2-=square(Float(ch2remove));
     383          sumfch-=spectrum[ch2remove]*Float(ch2remove);
     384          need2recalculate=True;
    276385      }
    277386}
    278387
    279 // test a channel against current running mean & rms
    280 // if channel specified is masked out or beyond the allowed indexes,
    281 // false is returned
    282 Bool LFRunningMean::testChannel(int ch) const throw(exception, AipsError)
    283 {
    284   if (ch<edge.first || ch>=edge.second) return False;
    285   if (!mask[ch]) return False;
    286   DebugAssert(box_chan_cntr, AipsError);
    287   Float mean=sum/Float(box_chan_cntr);
    288   Float variance=sqrt(sumsq/Float(box_chan_cntr)-square(mean));
    289   /*
    290   if (ch>3900 && ch<4100)
    291     cout<<"Tested "<<ch<<" mean="<<mean<<" variance="<<variance<<" sp-mean="<<spectrum[ch]-mean<<endl;
    292   */
    293   return fabs(spectrum[ch]-mean)>=threshold*variance;
    294 }
     388// next channel
     389void RunningBox::next() throw(AipsError)
     390{
     391   AlwaysAssert(cur_channel<edge.second,AipsError);
     392   ++cur_channel;
     393   if (cur_channel+max_box_nchan/2<edge.second && cur_channel>=start_advance)
     394       advanceRunningBox(cur_channel+max_box_nchan/2); // update statistics
     395}
     396
     397// checking whether there are still elements
     398casa::Bool RunningBox::haveMore() const throw()
     399{
     400   return cur_channel<edge.second;
     401}
     402
     403// calculate derivative statistics. This function is const, because
     404// it updates the cache only
     405void RunningBox::updateDerivativeStatistics() const throw(AipsError)
     406{
     407  AlwaysAssert(box_chan_cntr, AipsError);
     408 
     409  Float mean=sumf/Float(box_chan_cntr);
     410
     411  // linear LSF formulae
     412  Float meanch=sumch/Float(box_chan_cntr);
     413  Float meanch2=sumch2/Float(box_chan_cntr);
     414  if (meanch==meanch2 || box_chan_cntr<3) {
     415      // vertical line in the spectrum, can't calculate linmean and linvariance
     416      linmean=0.;
     417      linvariance=0.;
     418  } else {
     419      Float coeff=(sumfch/Float(box_chan_cntr)-meanch*mean)/
     420                (meanch2-square(meanch));
     421      linmean=coeff*(Float(cur_channel)-meanch)+mean;
     422      linvariance=sqrt(sumf2/Float(box_chan_cntr)-square(mean)-
     423                    square(coeff)*(meanch2-square(meanch)));
     424  }
     425  need2recalculate=False;
     426}
     427
     428
     429//
     430///////////////////////////////////////////////////////////////////////////////
     431
     432///////////////////////////////////////////////////////////////////////////////
     433//
     434// LFAboveThreshold - a running mean algorithm for line detection
     435//
     436//
     437
     438
     439// set up the detection criterion
     440LFAboveThreshold::LFAboveThreshold(std::list<pair<int,int> > &in_lines,
     441                                   int in_min_nchan,
     442                                   casa::Float in_threshold) throw() :
     443             min_nchan(in_min_nchan), threshold(in_threshold),
     444             lines(in_lines), running_box(NULL) {}
     445
     446LFAboveThreshold::~LFAboveThreshold() throw()
     447{
     448  if (running_box!=NULL) delete running_box;
     449}
     450
     451// replace the detection criterion
     452void LFAboveThreshold::setCriterion(int in_min_nchan, casa::Float in_threshold)
     453                                 throw()
     454{
     455  min_nchan=in_min_nchan;
     456  threshold=in_threshold;
     457}
     458
    295459
    296460// process a channel: update cur_line and is_detected before and
    297461// add a new line to the list, if necessary
    298 void LFRunningMean::processChannel(std::list<pair<int,int> > &lines,
    299                                    int ch) throw(casa::AipsError)
     462void LFAboveThreshold::processChannel(Bool detect,
     463                 const casa::Vector<casa::Bool> &mask) throw(casa::AipsError)
    300464{
    301465  try {
    302        if (testChannel(ch)) {
     466       if (detect) {
    303467           if (is_detected_before)
    304                cur_line.second=ch+1;
     468               cur_line.second=running_box->getChannel()+1;
    305469           else {
    306470               is_detected_before=True;
    307                cur_line.first=ch;
    308                cur_line.second=ch+1;
     471               cur_line.first=running_box->getChannel();
     472               cur_line.second=running_box->getChannel()+1;
    309473           }
    310        } else processCurLine(lines);   
     474       } else processCurLine(mask);   
    311475  }
    312476  catch (const AipsError &ae) {
     
    314478  } 
    315479  catch (const exception &ex) {
    316       throw AipsError(String("SDLineFinder::processChannel - STL error: ")+ex.what());
     480      throw AipsError(String("LFAboveThreshold::processChannel - STL error: ")+ex.what());
    317481  }
    318482}
     
    320484// process the interval of channels stored in cur_line
    321485// if it satisfies the criterion, add this interval as a new line
    322 void LFRunningMean::processCurLine(std::list<pair<int,int> > &lines)
     486void LFAboveThreshold::processCurLine(const casa::Vector<casa::Bool> &mask)
    323487                                   throw(casa::AipsError)
    324488{
     
    347511  } 
    348512  catch (const exception &ex) {
    349       throw AipsError(String("SDLineFinder::processCurLine - STL error: ")+ex.what());
     513      throw AipsError(String("LFAboveThreshold::processCurLine - STL error: ")+ex.what());
    350514  }
    351515}
    352516
    353517// find spectral lines and add them into list
    354 void LFRunningMean::findLines(std::list<pair<int,int> > &lines,
     518void LFAboveThreshold::findLines(const casa::Vector<casa::Float> &spectrum,
     519                              const casa::Vector<casa::Bool> &mask,
     520                              const std::pair<int,int> &edge,
     521                              int max_box_nchan,
    355522                              IStatHolder* statholder)
    356523                        throw(casa::AipsError)
    357524{
    358525  const int minboxnchan=4;
    359 
    360   // fill statistics for initial box
    361   box_chan_cntr=0; // no channels are currently in the box
    362   sum=0;           // initialize statistics
    363   sumsq=0;
    364   int initial_box_ch=edge.first;
    365   for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
    366         ++initial_box_ch)
    367        advanceRunningBox(initial_box_ch);
     526  try {
     527
     528      if (running_box!=NULL) delete running_box;
     529      running_box=new RunningBox(spectrum,mask,edge,max_box_nchan);
    368530   
    369   if (initial_box_ch==edge.second)       
    370       throw AipsError("LFRunningMean::findLines - too much channels are masked");
    371 
    372   // actual search algorithm
    373   is_detected_before=False;
    374 
    375   if (statholder!=NULL)
    376       for (int n=0;n<initial_box_ch-max_box_nchan/2;++n)
    377            statholder->accumulate(n,sum,sumsq,box_chan_cntr);
    378 
    379   if (box_chan_cntr>=minboxnchan)
    380       // there is a minimum amount of data. We can search in the
    381       // half of the initial box   
    382       for (int n=edge.first;n<initial_box_ch-max_box_nchan/2;++n)
    383            processChannel(lines,n);               
    384  
    385   // now the box can be moved. n+max_box_nchan/2 is a new index which haven't
    386   // yet been included in the running mean.
    387   for (int n=initial_box_ch-max_box_nchan/2;n<edge.second;++n) {
    388       advanceRunningBox(n+max_box_nchan/2); // update running mean & variance
    389       if (statholder!=NULL)
    390           statholder->accumulate(n,sum,sumsq,box_chan_cntr);
    391       if (box_chan_cntr>=minboxnchan) // have enough data to process
    392           processChannel(lines,n);
    393       else processCurLine(lines); // just finish what was accumulated before
    394   }
    395   if (statholder!=NULL)
    396       for (int n=edge.second;n<spectrum.nelements();++n)
    397            statholder->accumulate(n,sum,sumsq,box_chan_cntr);
     531      // actual search algorithm
     532      is_detected_before=False;
     533   
     534      for (;running_box->haveMore();running_box->next()) {
     535           const int ch=running_box->getChannel();
     536           if (running_box->getNumberOfBoxPoints()>=minboxnchan)
     537               processChannel(mask[ch] && (fabs(spectrum[ch]-
     538                  running_box->getLinMean()) >=
     539                  threshold*running_box->getLinVariance()), mask);
     540           else processCurLine(mask); // just finish what was accumulated before
     541           if (statholder!=NULL)
     542              statholder->accumulate(running_box->getChannel(),
     543                                     running_box->getLinMean(),
     544                                     running_box->getLinVariance(),
     545                                     running_box->getNumberOfBoxPoints());
     546      }
     547  }
     548  catch (const AipsError &ae) {
     549      throw;
     550  } 
     551  catch (const exception &ex) {
     552      throw AipsError(String("LFAboveThreshold::findLines - STL error: ")+ex.what());
     553  }
    398554}
    399555
     
    493649  // detection threshold - the minimal signal to noise ratio
    494650  threshold=3.; // 3 sigma is a default
    495   box_size=1./16.; // default box size for running mean calculations is
    496                   // 1/16 of the whole spectrum
     651  box_size=1./5.; // default box size for running mean calculations is
     652                  // 1/5 of the whole spectrum
    497653  // A minimum number of consequtive channels, which should satisfy
    498654  // the detection criterion, to be a detection
     
    577733  lines.resize(0); // search from the scratch
    578734  Vector<Bool> temp_mask(mask);
    579  
     735
     736  Bool first_pass=True;
    580737  while (true) {
    581      // line find algorithm
    582      LFRunningMean lfalg(spectrum,temp_mask,edge,max_box_nchan,min_nchan,
    583                          threshold);
    584      SignAccumulator sacc(spectrum.nelements(),spectrum);
    585 
    586738     // a buffer for new lines found at this iteration
    587739     std::list<pair<int,int> > new_lines;
    588      
    589      lfalg.findLines(new_lines,&sacc);
     740
     741     // line find algorithm
     742     LFAboveThreshold lfalg(new_lines,min_nchan, threshold);
     743
     744     SignAccumulator sacc(spectrum.nelements(),spectrum);
     745
     746
     747     try {
     748         lfalg.findLines(spectrum,temp_mask,edge,max_box_nchan,&sacc);
     749     }
     750     catch(const AipsError &ae) {
     751         if (first_pass) throw;
     752         break; // nothing new
     753     }
     754     first_pass=False;
    590755     if (!new_lines.size()) break; // nothing new
    591756
Note: See TracChangeset for help on using the changeset viewer.