source: trunk/src/SDLineFinder.cc @ 343

Last change on this file since 343 was 343, checked in by vor010, 19 years ago

SDLineFinder - now algorithm does many iterations
to ensure that weak lines, which are close to the strong ones, are also found

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 22.9 KB
Line 
1//#---------------------------------------------------------------------------
2//# SDLineFinder.cc: A class for automated spectral line search
3//#--------------------------------------------------------------------------
4//# Copyright (C) 2004
5//# ATNF
6//#
7//# This program is free software; you can redistribute it and/or modify it
8//# under the terms of the GNU General Public License as published by the Free
9//# Software Foundation; either version 2 of the License, or (at your option)
10//# any later version.
11//#
12//# This program is distributed in the hope that it will be useful, but
13//# WITHOUT ANY WARRANTY; without even the implied warranty of
14//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
15//# Public License for more details.
16//#
17//# You should have received a copy of the GNU General Public License along
18//# with this program; if not, write to the Free Software Foundation, Inc.,
19//# 675 Massachusetts Ave, Cambridge, MA 02139, USA.
20//#
21//# Correspondence concerning this software should be addressed as follows:
22//#        Internet email: Malte.Marquarding@csiro.au
23//#        Postal address: Malte Marquarding,
24//#                        Australia Telescope National Facility,
25//#                        P.O. Box 76,
26//#                        Epping, NSW, 2121,
27//#                        AUSTRALIA
28//#
29//# $Id:
30//#---------------------------------------------------------------------------
31
32
33// ASAP
34#include "SDLineFinder.h"
35
36// STL
37#include <functional>
38#include <algorithm>
39#include <iostream>
40
41using namespace asap;
42using namespace casa;
43using namespace std;
44using namespace boost::python;
45
46///////////////////////////////////////////////////////////////////////////////
47//
48// LFRunningMean - a running mean algorithm for line detection
49//
50//
51
52namespace asap {
53
54// An auxiliary class implementing one pass of the line search algorithm,
55// which uses a running mean. We define this class here because it is
56// used in SDLineFinder only. The incapsulation of this code into a separate
57// class will provide a possibility to add new algorithms with minor changes
58class LFRunningMean {
59   // The input data to work with. Use reference symantics to avoid
60   // an unnecessary copying   
61   const casa::Vector<casa::Float>  &spectrum; // a buffer for the spectrum
62   const casa::Vector<casa::Bool>   &mask; // associated mask
63   const std::pair<int,int>         &edge; // start and stop+1 channels
64                                           // to work with
65   
66   // statistics for running mean filtering
67   casa::Float sum;       // sum of fluxes
68   casa::Float sumsq;     // sum of squares of fluxes
69   int box_chan_cntr;     // actual number of channels in the box
70   int max_box_nchan;     // maximum allowed number of channels in the box
71                          // (calculated from boxsize and actual spectrum size)
72
73   // temporary line edge channels and flag, which is True if the line
74   // was detected in the previous channels.
75   std::pair<int,int> cur_line;
76   casa::Bool is_detected_before;
77   int  min_nchan;                         // A minimum number of consequtive
78                                           // channels, which should satisfy
79                                           // the detection criterion, to be
80                                           // a detection
81   casa::Float threshold;                  // detection threshold - the
82                                           // minimal signal to noise ratio
83public:
84   // set up the object with the references to actual data
85   // as well as the detection criterion (min_nchan and threshold, see above)
86   // and the number of channels in the running box
87   LFRunningMean(const casa::Vector<casa::Float>  &in_spectrum,
88                 const casa::Vector<casa::Bool>   &in_mask,
89                 const std::pair<int,int>         &in_edge,
90                 int in_max_box_nchan,
91                 int in_min_nchan = 3,
92                 casa::Float in_threshold = 5);
93                 
94   // replace the detection criterion
95   void setCriterion(int in_min_nchan, casa::Float in_threshold) throw();
96
97   // find spectral lines and add them into list
98   void findLines(std::list<pair<int,int> > &lines) throw(casa::AipsError);
99   
100protected:
101   // supplementary function to control running mean calculations.
102   // It adds a specified channel to the running mean box and
103   // removes (ch-maxboxnchan+1)'th channel from there
104   // Channels, for which the mask is false or index is beyond the
105   // allowed range, are ignored
106   void advanceRunningBox(int ch) throw(casa::AipsError);
107   
108
109   // test a channel against current running mean & rms
110   // if channel specified is masked out or beyond the allowed indexes,
111   // false is returned
112   casa::Bool testChannel(int ch) const
113                     throw(std::exception, casa::AipsError);
114
115   // process a channel: update curline and is_detected before and
116   // add a new line to the list, if necessary using processCurLine()
117   void processChannel(std::list<pair<int,int> > &lines,
118                       int ch) throw(casa::AipsError);
119
120   // process the interval of channels stored in curline
121   // if it satisfies the criterion, add this interval as a new line
122   void processCurLine(std::list<pair<int,int> > &lines) throw(casa::AipsError);
123
124};
125} // namespace asap
126
127//
128///////////////////////////////////////////////////////////////////////////////
129
130
131///////////////////////////////////////////////////////////////////////////////
132//
133// LFRunningMean - a running mean algorithm for line detection
134//
135//
136
137// set up the object with the references to actual data
138// as well as the detection criterion (min_nchan and threshold, see above)
139// and the number of channels in the running box
140LFRunningMean::LFRunningMean(const casa::Vector<casa::Float>  &in_spectrum,
141                             const casa::Vector<casa::Bool>   &in_mask,
142                             const std::pair<int,int>         &in_edge,
143                             int in_max_box_nchan,
144                             int in_min_nchan, casa::Float in_threshold) :
145        spectrum(in_spectrum), mask(in_mask), edge(in_edge),
146        max_box_nchan(in_max_box_nchan),
147        min_nchan(in_min_nchan),threshold(in_threshold) {}
148
149// replace the detection criterion
150void LFRunningMean::setCriterion(int in_min_nchan, casa::Float in_threshold)
151                                 throw()
152{
153  min_nchan=in_min_nchan;
154  threshold=in_threshold;
155}
156
157
158// supplementary function to control running mean calculations.
159// It adds a specified channel to the running mean box and
160// removes (ch-max_box_nchan+1)'th channel from there
161// Channels, for which the mask is false or index is beyond the
162// allowed range, are ignored
163void LFRunningMean::advanceRunningBox(int ch) throw(AipsError)
164{
165  if (ch>=edge.first && ch<edge.second)
166      if (mask[ch]) { // ch is a valid channel
167          ++box_chan_cntr;
168          sum+=spectrum[ch];
169          sumsq+=square(spectrum[ch]);         
170      }
171  int ch2remove=ch-max_box_nchan;
172  if (ch2remove>=edge.first && ch2remove<edge.second)
173      if (mask[ch2remove]) { // ch2remove is a valid channel
174          --box_chan_cntr;
175          sum-=spectrum[ch2remove];
176          sumsq-=square(spectrum[ch2remove]); 
177      }
178}
179
180// test a channel against current running mean & rms
181// if channel specified is masked out or beyond the allowed indexes,
182// false is returned
183Bool LFRunningMean::testChannel(int ch) const throw(exception, AipsError)
184{
185  if (ch<edge.first || ch>=edge.second) return False;
186  if (!mask[ch]) return False;
187  DebugAssert(box_chan_cntr, AipsError);
188  Float mean=sum/Float(box_chan_cntr);
189  Float variance=sqrt(sumsq/Float(box_chan_cntr)-square(mean));
190  /*
191  if (ch>3900 && ch<4100)
192    cout<<"Tested "<<ch<<" mean="<<mean<<" variance="<<variance<<" sp-mean="<<spectrum[ch]-mean<<endl;
193  */
194  return fabs(spectrum[ch]-mean)>=threshold*variance;
195}
196
197// process a channel: update cur_line and is_detected before and
198// add a new line to the list, if necessary
199void LFRunningMean::processChannel(std::list<pair<int,int> > &lines,
200                                   int ch) throw(casa::AipsError)
201{
202  try {
203       if (testChannel(ch)) {
204           if (is_detected_before)
205               cur_line.second=ch+1;
206           else {
207               is_detected_before=True;
208               cur_line.first=ch;
209               cur_line.second=ch+1;
210           }
211       } else processCurLine(lines);   
212  }
213  catch (const AipsError &ae) {
214      throw;
215  } 
216  catch (const exception &ex) {
217      throw AipsError(String("SDLineFinder::processChannel - STL error: ")+ex.what());
218  }
219}
220
221// process the interval of channels stored in cur_line
222// if it satisfies the criterion, add this interval as a new line
223void LFRunningMean::processCurLine(std::list<pair<int,int> > &lines)
224                                   throw(casa::AipsError)
225{
226  try {
227       if (is_detected_before) {                     
228           if (cur_line.second-cur_line.first>min_nchan) {
229               // it was a detection. We need to change the list
230               Bool add_new_line=False;
231               if (lines.size()) {
232                   for (int i=lines.back().second;i<cur_line.first;++i)
233                        if (mask[i]) { // one valid channel in between
234                                //  means that we deal with a separate line
235                            add_new_line=True;
236                            break;
237                        }
238               } else add_new_line=True;
239               if (add_new_line)
240                   lines.push_back(cur_line);
241               else lines.back().second=cur_line.second;                   
242           }
243           is_detected_before=False;
244       }     
245  }
246  catch (const AipsError &ae) {
247      throw;
248  } 
249  catch (const exception &ex) {
250      throw AipsError(String("SDLineFinder::processCurLine - STL error: ")+ex.what());
251  }
252}
253
254// find spectral lines and add them into list
255void LFRunningMean::findLines(std::list<pair<int,int> > &lines)
256                        throw(casa::AipsError)
257{
258  const int minboxnchan=4;
259
260  // fill statistics for initial box
261  box_chan_cntr=0; // no channels are currently in the box
262  sum=0;           // initialize statistics
263  sumsq=0;
264  int initial_box_ch=edge.first;
265  for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
266        ++initial_box_ch)
267       advanceRunningBox(initial_box_ch);
268   
269  if (initial_box_ch==edge.second)       
270      throw AipsError("LFRunningMean::findLines - too much channels are masked");
271
272  // actual search algorithm
273  is_detected_before=False;
274
275  if (box_chan_cntr>=minboxnchan)
276      // there is a minimum amount of data. We can search in the
277      // half of the initial box   
278      for (int n=edge.first;n<initial_box_ch-max_box_nchan/2;++n)
279           processChannel(lines,n);               
280 
281  // now the box can be moved. n+max_box_nchan/2 is a new index which haven't
282  // yet been included in the running mean.
283  for (int n=initial_box_ch-max_box_nchan/2;n<edge.second;++n) {
284      advanceRunningBox(n+max_box_nchan/2); // update running mean & variance
285      if (box_chan_cntr>=minboxnchan) // have enough data to process
286          processChannel(lines,n);
287      else processCurLine(lines); // just finish what was accumulated before
288  } 
289}
290
291//
292///////////////////////////////////////////////////////////////////////////////
293
294///////////////////////////////////////////////////////////////////////////////
295//
296// SDLineFinder::IntersectsWith  -  An auxiliary object function to test
297// whether two lines have a non-void intersection
298//
299
300
301// line1 - range of the first line: start channel and stop+1
302SDLineFinder::IntersectsWith::IntersectsWith(const std::pair<int,int> &in_line1) :
303                          line1(in_line1) {}
304
305
306// return true if line2 intersects with line1 with at least one
307// common channel, and false otherwise
308// line2 - range of the second line: start channel and stop+1
309bool SDLineFinder::IntersectsWith::operator()(const std::pair<int,int> &line2)
310                          const throw()
311{
312  if (line2.second<line1.first) return false; // line2 is at lower channels
313  if (line2.first>line1.second) return false; // line2 is at upper channels
314  return true; // line2 has an intersection or is adjacent to line1
315}
316
317//
318///////////////////////////////////////////////////////////////////////////////
319
320///////////////////////////////////////////////////////////////////////////////
321//
322// SDLineFinder::BuildUnion - An auxiliary object function to build a union
323// of several lines to account for a possibility of merging the nearby lines
324//
325
326// set an initial line (can be a first line in the sequence)
327SDLineFinder::BuildUnion::BuildUnion(const std::pair<int,int> &line1) :
328                             temp_line(line1) {}
329
330// update temp_line with a union of temp_line and new_line
331// provided there is no gap between the lines
332void SDLineFinder::BuildUnion::operator()(const std::pair<int,int> &new_line)
333                                   throw()
334{
335  if (new_line.first<temp_line.first) temp_line.first=new_line.first;
336  if (new_line.second>temp_line.second) temp_line.second=new_line.second;
337}
338
339// return the result (temp_line)
340const std::pair<int,int>& SDLineFinder::BuildUnion::result() const throw()
341{
342  return temp_line;
343}
344
345//
346///////////////////////////////////////////////////////////////////////////////
347
348///////////////////////////////////////////////////////////////////////////////
349//
350// SDLineFinder::LaterThan - An auxiliary object function to test whether a
351// specified line is at lower spectral channels (to preserve the order in
352// the line list)
353//
354
355// setup the line to compare with
356SDLineFinder::LaterThan::LaterThan(const std::pair<int,int> &in_line1) :
357                         line1(in_line1) {}
358
359// return true if line2 should be placed later than line1
360// in the ordered list (so, it is at greater channel numbers)
361bool SDLineFinder::LaterThan::operator()(const std::pair<int,int> &line2)
362                          const throw()
363{
364  if (line2.second<line1.first) return false; // line2 is at lower channels
365  if (line2.first>line1.second) return true; // line2 is at upper channels
366 
367  // line2 intersects with line1. We should have no such situation in
368  // practice
369  return line2.first>line1.first;
370}
371
372//
373///////////////////////////////////////////////////////////////////////////////
374
375
376///////////////////////////////////////////////////////////////////////////////
377//
378// SDLineFinder  -  a class for automated spectral line search
379//
380//
381
382SDLineFinder::SDLineFinder() throw() : edge(0,0)
383{
384  // detection threshold - the minimal signal to noise ratio
385  threshold=3.; // 3 sigma is a default
386  box_size=1./16.; // default box size for running mean calculations is
387                  // 1/16 of the whole spectrum
388  // A minimum number of consequtive channels, which should satisfy
389  // the detection criterion, to be a detection
390  min_nchan=3;     // default is 3 channels
391}
392
393SDLineFinder::~SDLineFinder() throw(AipsError) {}
394
395// set scan to work with (in_scan parameter), associated mask (in_mask
396// parameter) and the edge channel rejection (in_edge parameter)
397//   if in_edge has zero length, all channels chosen by mask will be used
398//   if in_edge has one element only, it represents the number of
399//      channels to drop from both sides of the spectrum
400//   in_edge is introduced for convinience, although all functionality
401//   can be achieved using a spectrum mask only   
402void SDLineFinder::setScan(const SDMemTableWrapper &in_scan,
403               const std::vector<bool> &in_mask,
404               const boost::python::tuple &in_edge) throw(AipsError)
405{
406  try {
407       scan=in_scan.getCP();
408       AlwaysAssert(!scan.null(),AipsError);
409       if (scan->nRow()!=1)
410           throw AipsError("SDLineFinder::setScan - in_scan contains more than 1 row."
411                           "Choose one first.");       
412       mask=in_mask;
413       if (mask.nelements()!=scan->nChan())
414           throw AipsError("SDLineFinder::setScan - in_scan and in_mask have different"
415                           "number of spectral channels.");
416
417       // number of elements in the in_edge tuple
418       int n=extract<int>(in_edge.attr("__len__")());
419       if (n>2 || n<0)
420           throw AipsError("SDLineFinder::setScan - the length of the in_edge parameter"
421                           "should not exceed 2");
422       if (!n) {
423           // all spectrum, no rejection
424           edge.first=0;
425           edge.second=scan->nChan();
426       } else {
427           edge.first=extract<int>(in_edge.attr("__getitem__")(0));
428           if (edge.first<0)
429               throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
430                                "number of channels to drop");
431           if (edge.first>=scan->nChan())
432               throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
433           if (n==2) {
434               edge.second=extract<int>(in_edge.attr("__getitem__")(1));
435               if (edge.second<0)
436                   throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
437                                   "number of channels to drop");
438               edge.second=scan->nChan()-edge.second;
439           } else edge.second=scan->nChan()-edge.first;
440           if (edge.second<0 || (edge.second+edge.first)>scan->nChan())
441               throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
442       }       
443  }
444  catch(const AipsError &ae) {
445       // setScan is unsuccessfull, reset scan/mask/edge
446       scan=CountedConstPtr<SDMemTable>(); // null pointer
447       mask.resize(0);
448       edge=pair<int,int>(0,0);
449       throw;
450  }
451}
452
453// search for spectral lines. Number of lines found is returned
454int SDLineFinder::findLines() throw(casa::AipsError)
455{
456  const int minboxnchan=4;
457  if (scan.null())
458      throw AipsError("SDLineFinder::findLines - a scan should be set first,"
459                      " use set_scan");
460  DebugAssert(mask.nelements()==scan->nChan(), AipsError);
461  int max_box_nchan=int(scan->nChan()*box_size); // number of channels in running
462                                                 // box
463  if (max_box_nchan<2)
464      throw AipsError("SDLineFinder::findLines - box_size is too small");
465
466  scan->getSpectrum(spectrum);
467
468  lines.resize(0); // search from the scratch
469  Vector<Bool> temp_mask(mask);
470  Bool need2iterate=True;
471  while (need2iterate) {
472     // line find algorithm
473     LFRunningMean lfalg(spectrum,temp_mask,edge,max_box_nchan,min_nchan,
474                         threshold);
475     std::list<pair<int,int> > new_lines;
476     lfalg.findLines(new_lines);
477     if (!new_lines.size()) need2iterate=False;
478     temp_mask=getMask();
479     // update the list (lines) merging intervals, if necessary
480     addNewSearchResult(new_lines);
481  }
482  return int(lines.size());
483}
484
485
486// get the mask to mask out all lines that have been found (default)
487// if invert=true, only channels belong to lines will be unmasked
488// Note: all channels originally masked by the input mask (in_mask
489//       in setScan) or dropped out by the edge parameter (in_edge
490//       in setScan) are still excluded regardless on the invert option
491std::vector<bool> SDLineFinder::getMask(bool invert)
492                                        const throw(casa::AipsError)
493{
494  try {
495       if (scan.null())
496           throw AipsError("SDLineFinder::getMask - a scan should be set first,"
497                      " use set_scan followed by find_lines");
498       DebugAssert(mask.nelements()==scan->nChan(), AipsError);
499       /*
500       if (!lines.size())
501           throw AipsError("SDLineFinder::getMask - one have to search for "
502                           "lines first, use find_lines");
503       */                         
504       std::vector<bool> res_mask(mask.nelements());
505       // iterator through lines
506       std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
507       for (int ch=0;ch<res_mask.size();++ch)
508            if (ch<edge.first || ch>=edge.second) res_mask[ch]=false;
509            else if (!mask[ch]) res_mask[ch]=false;
510            else {           
511                    res_mask[ch]=!invert; // no line by default
512                    if (cli==lines.end()) continue;
513                    if (ch>=cli->first && ch<cli->second)
514                        res_mask[ch]=invert; // this is a line
515                    if (ch>=cli->second)
516                        ++cli; // next line in the list
517                 }
518       
519       return res_mask;
520  }
521  catch (const AipsError &ae) {
522      throw;
523  } 
524  catch (const exception &ex) {
525      throw AipsError(String("SDLineFinder::getMask - STL error: ")+ex.what());
526  }
527}
528
529// get range for all lines found. If defunits is true (default), the
530// same units as used in the scan will be returned (e.g. velocity
531// instead of channels). If defunits is false, channels will be returned
532std::vector<int> SDLineFinder::getLineRanges(bool defunits)
533                             const throw(casa::AipsError)
534{
535  try {
536       if (scan.null())
537           throw AipsError("SDLineFinder::getLineRanges - a scan should be set first,"
538                      " use set_scan followed by find_lines");
539       DebugAssert(mask.nelements()==scan->nChan(), AipsError);
540       
541       if (!lines.size())
542           throw AipsError("SDLineFinder::getLineRanges - one have to search for "
543                           "lines first, use find_lines");
544                           
545       // temporary
546       if (defunits)
547           throw AipsError("SDLineFinder::getLineRanges - sorry, defunits=true have not "
548                           "yet been implemented");
549       //
550       std::vector<int> res(2*lines.size());
551       // iterator through lines & result
552       std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
553       std::vector<int>::iterator ri=res.begin();
554       for (;cli!=lines.end() && ri!=res.end();++cli,++ri) {           
555            *ri=cli->first;
556            if (++ri!=res.end())
557                *ri=cli->second-1;         
558       }
559       return res;
560  }
561  catch (const AipsError &ae) {
562      throw;
563  } 
564  catch (const exception &ex) {
565      throw AipsError(String("SDLineFinder::getLineRanges - STL error: ")+ex.what());
566  }
567}
568
569// concatenate two lists preserving the order. If two lines appear to
570// be adjacent, they are joined into the new one
571void SDLineFinder::addNewSearchResult(const std::list<pair<int, int> > &newlines)
572                        throw(AipsError)
573{
574  try {
575      for (std::list<pair<int,int> >::const_iterator cli=newlines.begin();
576           cli!=newlines.end();++cli) {
577           
578           // the first item, which has a non-void intersection or touches
579           // the new line
580           std::list<pair<int,int> >::iterator pos_beg=find_if(lines.begin(),
581                          lines.end(), IntersectsWith(*cli));           
582           // the last such item         
583           std::list<pair<int,int> >::iterator pos_end=find_if(pos_beg,
584                          lines.end(), not1(IntersectsWith(*cli)));
585
586           // extract all lines which intersect or touch a new one into
587           // a temporary buffer. This may invalidate the iterators
588           // line_buffer may be empty, if no lines intersects with a new
589           // one.
590           std::list<pair<int,int> > lines_buffer;
591           lines_buffer.splice(lines_buffer.end(),lines, pos_beg, pos_end);
592
593           // build a union of all intersecting lines
594           pair<int,int> union_line=for_each(lines_buffer.begin(),
595                   lines_buffer.end(),BuildUnion(*cli)).result();
596           
597           // search for a right place for the new line (union_line) and add
598           std::list<pair<int,int> >::iterator pos2insert=find_if(lines.begin(),
599                          lines.end(), LaterThan(union_line));
600           lines.insert(pos2insert,union_line);
601      }
602  }
603  catch (const AipsError &ae) {
604      throw;
605  } 
606  catch (const exception &ex) {
607      throw AipsError(String("SDLineFinder::addNewSearchResult - STL error: ")+ex.what());
608  }
609}
Note: See TracBrowser for help on using the repository browser.