source: trunk/src/SDLineFinder.cc @ 342

Last change on this file since 342 was 331, checked in by vor010, 19 years ago

Line searching algorithm is now in the separate
class LFRunningMean. It allows to run the same code several passes for different
masks, criteria, etc. In the future this interface may be more readily modified
to have multiple algorithms

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.3 KB
Line 
1//#---------------------------------------------------------------------------
2//# SDLineFinder.cc: A class for automated spectral line search
3//#--------------------------------------------------------------------------
4//# Copyright (C) 2004
5//# ATNF
6//#
7//# This program is free software; you can redistribute it and/or modify it
8//# under the terms of the GNU General Public License as published by the Free
9//# Software Foundation; either version 2 of the License, or (at your option)
10//# any later version.
11//#
12//# This program is distributed in the hope that it will be useful, but
13//# WITHOUT ANY WARRANTY; without even the implied warranty of
14//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
15//# Public License for more details.
16//#
17//# You should have received a copy of the GNU General Public License along
18//# with this program; if not, write to the Free Software Foundation, Inc.,
19//# 675 Massachusetts Ave, Cambridge, MA 02139, USA.
20//#
21//# Correspondence concerning this software should be addressed as follows:
22//#        Internet email: Malte.Marquarding@csiro.au
23//#        Postal address: Malte Marquarding,
24//#                        Australia Telescope National Facility,
25//#                        P.O. Box 76,
26//#                        Epping, NSW, 2121,
27//#                        AUSTRALIA
28//#
29//# $Id:
30//#---------------------------------------------------------------------------
31
32
33// ASAP
34#include "SDLineFinder.h"
35
36// STL
37#include <iostream>
38
39using namespace asap;
40using namespace casa;
41using namespace std;
42using namespace boost::python;
43
44namespace asap {
45
46// An auxiliary class implementing one pass of the line search algorithm,
47// which uses a running mean. We define this class here because it is
48// used in SDLineFinder only. The incapsulation of this code into a separate
49// class will provide a possibility to add new algorithms with minor changes
50class LFRunningMean {
51   // The input data to work with. Use reference symantics to avoid
52   // an unnecessary copying   
53   const casa::Vector<casa::Float>  &spectrum; // a buffer for the spectrum
54   const casa::Vector<casa::Bool>   &mask; // associated mask
55   const std::pair<int,int>         &edge; // start and stop+1 channels
56                                           // to work with
57   
58   // statistics for running mean filtering
59   casa::Float sum;       // sum of fluxes
60   casa::Float sumsq;     // sum of squares of fluxes
61   int box_chan_cntr;     // actual number of channels in the box
62   int max_box_nchan;     // maximum allowed number of channels in the box
63                          // (calculated from boxsize and actual spectrum size)
64
65   // temporary line edge channels and flag, which is True if the line
66   // was detected in the previous channels.
67   std::pair<int,int> cur_line;
68   casa::Bool is_detected_before;
69   int  min_nchan;                         // A minimum number of consequtive
70                                           // channels, which should satisfy
71                                           // the detection criterion, to be
72                                           // a detection
73   casa::Float threshold;                  // detection threshold - the
74                                           // minimal signal to noise ratio
75public:
76   // set up the object with the references to actual data
77   // as well as the detection criterion (min_nchan and threshold, see above)
78   // and the number of channels in the running box
79   LFRunningMean(const casa::Vector<casa::Float>  &in_spectrum,
80                 const casa::Vector<casa::Bool>   &in_mask,
81                 const std::pair<int,int>         &in_edge,
82                 int in_max_box_nchan,
83                 int in_min_nchan = 3,
84                 casa::Float in_threshold = 5);
85                 
86   // replace the detection criterion
87   void setCriterion(int in_min_nchan, casa::Float in_threshold) throw();
88
89   // find spectral lines and add them into list
90   void findLines(std::list<pair<int,int> > &lines) throw(casa::AipsError);
91   
92protected:
93   // supplementary function to control running mean calculations.
94   // It adds a specified channel to the running mean box and
95   // removes (ch-maxboxnchan+1)'th channel from there
96   // Channels, for which the mask is false or index is beyond the
97   // allowed range, are ignored
98   void advanceRunningBox(int ch) throw(casa::AipsError);
99   
100
101   // test a channel against current running mean & rms
102   // if channel specified is masked out or beyond the allowed indexes,
103   // false is returned
104   casa::Bool testChannel(int ch) const
105                     throw(std::exception, casa::AipsError);
106
107   // process a channel: update curline and is_detected before and
108   // add a new line to the list, if necessary using processCurLine()
109   void processChannel(std::list<pair<int,int> > &lines,
110                       int ch) throw(casa::AipsError);
111
112   // process the interval of channels stored in curline
113   // if it satisfies the criterion, add this interval as a new line
114   void processCurLine(std::list<pair<int,int> > &lines) throw(casa::AipsError);
115
116};
117} // namespace asap
118
119///////////////////////////////////////////////////////////////////////////////
120//
121// LFRunningMean - a running mean algorithm for line detection
122//
123//
124
125// set up the object with the references to actual data
126// as well as the detection criterion (min_nchan and threshold, see above)
127// and the number of channels in the running box
128LFRunningMean::LFRunningMean(const casa::Vector<casa::Float>  &in_spectrum,
129                             const casa::Vector<casa::Bool>   &in_mask,
130                             const std::pair<int,int>         &in_edge,
131                             int in_max_box_nchan,
132                             int in_min_nchan, casa::Float in_threshold) :
133        spectrum(in_spectrum), mask(in_mask), edge(in_edge),
134        max_box_nchan(in_max_box_nchan),
135        min_nchan(in_min_nchan),threshold(in_threshold) {}
136
137// replace the detection criterion
138void LFRunningMean::setCriterion(int in_min_nchan, casa::Float in_threshold)
139                                 throw()
140{
141  min_nchan=in_min_nchan;
142  threshold=in_threshold;
143}
144
145
146// supplementary function to control running mean calculations.
147// It adds a specified channel to the running mean box and
148// removes (ch-max_box_nchan+1)'th channel from there
149// Channels, for which the mask is false or index is beyond the
150// allowed range, are ignored
151void LFRunningMean::advanceRunningBox(int ch) throw(AipsError)
152{
153  if (ch>=edge.first && ch<edge.second)
154      if (mask[ch]) { // ch is a valid channel
155          ++box_chan_cntr;
156          sum+=spectrum[ch];
157          sumsq+=square(spectrum[ch]);         
158      }
159  int ch2remove=ch-max_box_nchan;
160  if (ch2remove>=edge.first && ch2remove<edge.second)
161      if (mask[ch2remove]) { // ch2remove is a valid channel
162          --box_chan_cntr;
163          sum-=spectrum[ch2remove];
164          sumsq-=square(spectrum[ch2remove]); 
165      }
166}
167
168// test a channel against current running mean & rms
169// if channel specified is masked out or beyond the allowed indexes,
170// false is returned
171Bool LFRunningMean::testChannel(int ch) const throw(exception, AipsError)
172{
173  if (ch<edge.first || ch>=edge.second) return False;
174  if (!mask[ch]) return False;
175  DebugAssert(box_chan_cntr, AipsError);
176  Float mean=sum/Float(box_chan_cntr);
177  Float variance=sqrt(sumsq/Float(box_chan_cntr)-square(mean));
178  /*
179  if (ch>3900 && ch<4100)
180    cout<<"Tested "<<ch<<" mean="<<mean<<" variance="<<variance<<" sp-mean="<<spectrum[ch]-mean<<endl;
181  */
182  return fabs(spectrum[ch]-mean)>=threshold*variance;
183}
184
185// process a channel: update cur_line and is_detected before and
186// add a new line to the list, if necessary
187void LFRunningMean::processChannel(std::list<pair<int,int> > &lines,
188                                   int ch) throw(casa::AipsError)
189{
190  try {
191       if (testChannel(ch)) {
192           if (is_detected_before)
193               cur_line.second=ch+1;
194           else {
195               is_detected_before=True;
196               cur_line.first=ch;
197               cur_line.second=ch+1;
198           }
199       } else processCurLine(lines);   
200  }
201  catch (const AipsError &ae) {
202      throw;
203  } 
204  catch (const exception &ex) {
205      throw AipsError(String("SDLineFinder::processChannel - STL error: ")+ex.what());
206  }
207}
208
209// process the interval of channels stored in cur_line
210// if it satisfies the criterion, add this interval as a new line
211void LFRunningMean::processCurLine(std::list<pair<int,int> > &lines)
212                                   throw(casa::AipsError)
213{
214  try {
215       if (is_detected_before) {                     
216           if (cur_line.second-cur_line.first>min_nchan) {
217               // it was a detection. We need to change the list
218               Bool add_new_line=False;
219               if (lines.size()) {
220                   for (int i=lines.back().second;i<cur_line.first;++i)
221                        if (mask[i]) { // one valid channel in between
222                                //  means that we deal with a separate line
223                            add_new_line=True;
224                            break;
225                        }
226               } else add_new_line=True;
227               if (add_new_line)
228                   lines.push_back(cur_line);
229               else lines.back().second=cur_line.second;                   
230           }
231           is_detected_before=False;
232       }     
233  }
234  catch (const AipsError &ae) {
235      throw;
236  } 
237  catch (const exception &ex) {
238      throw AipsError(String("SDLineFinder::processCurLine - STL error: ")+ex.what());
239  }
240}
241
242// find spectral lines and add them into list
243void LFRunningMean::findLines(std::list<pair<int,int> > &lines)
244                        throw(casa::AipsError)
245{
246  const int minboxnchan=4;
247
248  // fill statistics for initial box
249  box_chan_cntr=0; // no channels are currently in the box
250  sum=0;           // initialize statistics
251  sumsq=0;
252  int initial_box_ch=edge.first;
253  for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
254        ++initial_box_ch)
255       advanceRunningBox(initial_box_ch);
256   
257  if (initial_box_ch==edge.second)       
258      throw AipsError("LFRunningMean::findLines - too much channels are masked");
259
260  // actual search algorithm
261  is_detected_before=False;
262
263  if (box_chan_cntr>=minboxnchan)
264      // there is a minimum amount of data. We can search in the
265      // half of the initial box   
266      for (int n=edge.first;n<initial_box_ch-max_box_nchan/2;++n)
267           processChannel(lines,n);               
268 
269  // now the box can be moved. n+max_box_nchan/2 is a new index which haven't
270  // yet been included in the running mean.
271  for (int n=initial_box_ch-max_box_nchan/2;n<edge.second;++n) {
272      advanceRunningBox(n+max_box_nchan/2); // update running mean & variance
273      if (box_chan_cntr>=minboxnchan) // have enough data to process
274          processChannel(lines,n);
275      else processCurLine(lines); // just finish what was accumulated before
276  } 
277}
278
279//
280///////////////////////////////////////////////////////////////////////////////
281
282
283// SDLineFinder  -  a class for automated spectral line search
284
285SDLineFinder::SDLineFinder() throw() : edge(0,0)
286{
287  // detection threshold - the minimal signal to noise ratio
288  threshold=3.; // 3 sigma is a default
289  box_size=1./16.; // default box size for running mean calculations is
290                  // 1/16 of the whole spectrum
291  // A minimum number of consequtive channels, which should satisfy
292  // the detection criterion, to be a detection
293  min_nchan=3;     // default is 3 channels
294}
295
296SDLineFinder::~SDLineFinder() throw(AipsError) {}
297
298// set scan to work with (in_scan parameter), associated mask (in_mask
299// parameter) and the edge channel rejection (in_edge parameter)
300//   if in_edge has zero length, all channels chosen by mask will be used
301//   if in_edge has one element only, it represents the number of
302//      channels to drop from both sides of the spectrum
303//   in_edge is introduced for convinience, although all functionality
304//   can be achieved using a spectrum mask only   
305void SDLineFinder::setScan(const SDMemTableWrapper &in_scan,
306               const std::vector<bool> &in_mask,
307               const boost::python::tuple &in_edge) throw(AipsError)
308{
309  try {
310       scan=in_scan.getCP();
311       AlwaysAssert(!scan.null(),AipsError);
312       if (scan->nRow()!=1)
313           throw AipsError("SDLineFinder::setScan - in_scan contains more than 1 row."
314                           "Choose one first.");       
315       mask=in_mask;
316       if (mask.nelements()!=scan->nChan())
317           throw AipsError("SDLineFinder::setScan - in_scan and in_mask have different"
318                           "number of spectral channels.");
319
320       // number of elements in the in_edge tuple
321       int n=extract<int>(in_edge.attr("__len__")());
322       if (n>2 || n<0)
323           throw AipsError("SDLineFinder::setScan - the length of the in_edge parameter"
324                           "should not exceed 2");
325       if (!n) {
326           // all spectrum, no rejection
327           edge.first=0;
328           edge.second=scan->nChan();
329       } else {
330           edge.first=extract<int>(in_edge.attr("__getitem__")(0));
331           if (edge.first<0)
332               throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
333                                "number of channels to drop");
334           if (edge.first>=scan->nChan())
335               throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
336           if (n==2) {
337               edge.second=extract<int>(in_edge.attr("__getitem__")(1));
338               if (edge.second<0)
339                   throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
340                                   "number of channels to drop");
341               edge.second=scan->nChan()-edge.second;
342           } else edge.second=scan->nChan()-edge.first;
343           if (edge.second<0 || (edge.second+edge.first)>scan->nChan())
344               throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
345       }       
346  }
347  catch(const AipsError &ae) {
348       // setScan is unsuccessfull, reset scan/mask/edge
349       scan=CountedConstPtr<SDMemTable>(); // null pointer
350       mask.resize(0);
351       edge=pair<int,int>(0,0);
352       throw;
353  }
354}
355
356// search for spectral lines. Number of lines found is returned
357int SDLineFinder::findLines() throw(casa::AipsError)
358{
359  const int minboxnchan=4;
360  if (scan.null())
361      throw AipsError("SDLineFinder::findLines - a scan should be set first,"
362                      " use set_scan");
363  DebugAssert(mask.nelements()==scan->nChan(), AipsError);
364  int max_box_nchan=int(scan->nChan()*box_size); // number of channels in running
365                                                 // box
366  if (max_box_nchan<2)
367      throw AipsError("SDLineFinder::findLines - box_size is too small");
368
369  scan->getSpectrum(spectrum);
370
371  lines.resize(0); // search from the scratch
372  Vector<Bool> temp_mask(mask);
373  size_t cursz;
374  do {
375     cursz=lines.size();
376     // line find algorithm
377     LFRunningMean lfalg(spectrum,temp_mask,edge,max_box_nchan,min_nchan,threshold);
378     lfalg.findLines(lines);
379     temp_mask=getMask();
380  } while (cursz!=lines.size());
381  return int(lines.size());
382}
383
384
385// get the mask to mask out all lines that have been found (default)
386// if invert=true, only channels belong to lines will be unmasked
387// Note: all channels originally masked by the input mask (in_mask
388//       in setScan) or dropped out by the edge parameter (in_edge
389//       in setScan) are still excluded regardless on the invert option
390std::vector<bool> SDLineFinder::getMask(bool invert)
391                                        const throw(casa::AipsError)
392{
393  try {
394       if (scan.null())
395           throw AipsError("SDLineFinder::getMask - a scan should be set first,"
396                      " use set_scan followed by find_lines");
397       DebugAssert(mask.nelements()==scan->nChan(), AipsError);
398       /*
399       if (!lines.size())
400           throw AipsError("SDLineFinder::getMask - one have to search for "
401                           "lines first, use find_lines");
402       */                         
403       std::vector<bool> res_mask(mask.nelements());
404       // iterator through lines
405       std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
406       for (int ch=0;ch<res_mask.size();++ch)
407            if (ch<edge.first || ch>=edge.second) res_mask[ch]=false;
408            else if (!mask[ch]) res_mask[ch]=false;
409            else {           
410                    res_mask[ch]=!invert; // no line by default
411                    if (cli==lines.end()) continue;
412                    if (ch>=cli->first && ch<cli->second)
413                        res_mask[ch]=invert; // this is a line
414                    if (ch>=cli->second)
415                        ++cli; // next line in the list
416                 }
417       
418       return res_mask;
419  }
420  catch (const AipsError &ae) {
421      throw;
422  } 
423  catch (const exception &ex) {
424      throw AipsError(String("SDLineFinder::getMask - STL error: ")+ex.what());
425  }
426}
427
428// get range for all lines found. If defunits is true (default), the
429// same units as used in the scan will be returned (e.g. velocity
430// instead of channels). If defunits is false, channels will be returned
431std::vector<int> SDLineFinder::getLineRanges(bool defunits)
432                             const throw(casa::AipsError)
433{
434  try {
435       if (scan.null())
436           throw AipsError("SDLineFinder::getLineRanges - a scan should be set first,"
437                      " use set_scan followed by find_lines");
438       DebugAssert(mask.nelements()==scan->nChan(), AipsError);
439       
440       if (!lines.size())
441           throw AipsError("SDLineFinder::getLineRanges - one have to search for "
442                           "lines first, use find_lines");
443                           
444       // temporary
445       if (defunits)
446           throw AipsError("SDLineFinder::getLineRanges - sorry, defunits=true have not "
447                           "yet been implemented");
448       //
449       std::vector<int> res(2*lines.size());
450       // iterator through lines & result
451       std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
452       std::vector<int>::iterator ri=res.begin();
453       for (;cli!=lines.end() && ri!=res.end();++cli,++ri) {           
454            *ri=cli->first;
455            if (++ri!=res.end())
456                *ri=cli->second-1;         
457       }
458       return res;
459  }
460  catch (const AipsError &ae) {
461      throw;
462  } 
463  catch (const exception &ex) {
464      throw AipsError(String("SDLineFinder::getLineRanges - STL error: ")+ex.what());
465  }
466}
467
468// concatenate two lists preserving the order. If two lines appear to
469// be adjacent, they are joined into the new one
470void SDLineFinder::addNewSearchResult(const std::list<pair<int, int> > &newlines)
471                        throw(AipsError)
472{
473  try {
474      for (std::list<pair<int,int> >::const_iterator cli=newlines.begin();
475           cli!=newlines.end();++cli) {
476           // search for a right place for the new line
477           //TODO
478      }
479  }
480  catch (const AipsError &ae) {
481      throw;
482  } 
483  catch (const exception &ex) {
484      throw AipsError(String("SDLineFinder::addNewSearchResult - STL error: ")+ex.what());
485  }
486
487}
Note: See TracBrowser for help on using the repository browser.