source: trunk/src/SDLineFinder.cc@ 343

Last change on this file since 343 was 343, checked in by vor010, 20 years ago

SDLineFinder - now algorithm does many iterations
to ensure that weak lines, which are close to the strong ones, are also found

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 22.9 KB
Line 
1//#---------------------------------------------------------------------------
2//# SDLineFinder.cc: A class for automated spectral line search
3//#--------------------------------------------------------------------------
4//# Copyright (C) 2004
5//# ATNF
6//#
7//# This program is free software; you can redistribute it and/or modify it
8//# under the terms of the GNU General Public License as published by the Free
9//# Software Foundation; either version 2 of the License, or (at your option)
10//# any later version.
11//#
12//# This program is distributed in the hope that it will be useful, but
13//# WITHOUT ANY WARRANTY; without even the implied warranty of
14//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15//# Public License for more details.
16//#
17//# You should have received a copy of the GNU General Public License along
18//# with this program; if not, write to the Free Software Foundation, Inc.,
19//# 675 Massachusetts Ave, Cambridge, MA 02139, USA.
20//#
21//# Correspondence concerning this software should be addressed as follows:
22//# Internet email: Malte.Marquarding@csiro.au
23//# Postal address: Malte Marquarding,
24//# Australia Telescope National Facility,
25//# P.O. Box 76,
26//# Epping, NSW, 2121,
27//# AUSTRALIA
28//#
29//# $Id:
30//#---------------------------------------------------------------------------
31
32
33// ASAP
34#include "SDLineFinder.h"
35
36// STL
37#include <functional>
38#include <algorithm>
39#include <iostream>
40
41using namespace asap;
42using namespace casa;
43using namespace std;
44using namespace boost::python;
45
46///////////////////////////////////////////////////////////////////////////////
47//
48// LFRunningMean - a running mean algorithm for line detection
49//
50//
51
52namespace asap {
53
54// An auxiliary class implementing one pass of the line search algorithm,
55// which uses a running mean. We define this class here because it is
56// used in SDLineFinder only. The incapsulation of this code into a separate
57// class will provide a possibility to add new algorithms with minor changes
58class LFRunningMean {
59 // The input data to work with. Use reference symantics to avoid
60 // an unnecessary copying
61 const casa::Vector<casa::Float> &spectrum; // a buffer for the spectrum
62 const casa::Vector<casa::Bool> &mask; // associated mask
63 const std::pair<int,int> &edge; // start and stop+1 channels
64 // to work with
65
66 // statistics for running mean filtering
67 casa::Float sum; // sum of fluxes
68 casa::Float sumsq; // sum of squares of fluxes
69 int box_chan_cntr; // actual number of channels in the box
70 int max_box_nchan; // maximum allowed number of channels in the box
71 // (calculated from boxsize and actual spectrum size)
72
73 // temporary line edge channels and flag, which is True if the line
74 // was detected in the previous channels.
75 std::pair<int,int> cur_line;
76 casa::Bool is_detected_before;
77 int min_nchan; // A minimum number of consequtive
78 // channels, which should satisfy
79 // the detection criterion, to be
80 // a detection
81 casa::Float threshold; // detection threshold - the
82 // minimal signal to noise ratio
83public:
84 // set up the object with the references to actual data
85 // as well as the detection criterion (min_nchan and threshold, see above)
86 // and the number of channels in the running box
87 LFRunningMean(const casa::Vector<casa::Float> &in_spectrum,
88 const casa::Vector<casa::Bool> &in_mask,
89 const std::pair<int,int> &in_edge,
90 int in_max_box_nchan,
91 int in_min_nchan = 3,
92 casa::Float in_threshold = 5);
93
94 // replace the detection criterion
95 void setCriterion(int in_min_nchan, casa::Float in_threshold) throw();
96
97 // find spectral lines and add them into list
98 void findLines(std::list<pair<int,int> > &lines) throw(casa::AipsError);
99
100protected:
101 // supplementary function to control running mean calculations.
102 // It adds a specified channel to the running mean box and
103 // removes (ch-maxboxnchan+1)'th channel from there
104 // Channels, for which the mask is false or index is beyond the
105 // allowed range, are ignored
106 void advanceRunningBox(int ch) throw(casa::AipsError);
107
108
109 // test a channel against current running mean & rms
110 // if channel specified is masked out or beyond the allowed indexes,
111 // false is returned
112 casa::Bool testChannel(int ch) const
113 throw(std::exception, casa::AipsError);
114
115 // process a channel: update curline and is_detected before and
116 // add a new line to the list, if necessary using processCurLine()
117 void processChannel(std::list<pair<int,int> > &lines,
118 int ch) throw(casa::AipsError);
119
120 // process the interval of channels stored in curline
121 // if it satisfies the criterion, add this interval as a new line
122 void processCurLine(std::list<pair<int,int> > &lines) throw(casa::AipsError);
123
124};
125} // namespace asap
126
127//
128///////////////////////////////////////////////////////////////////////////////
129
130
131///////////////////////////////////////////////////////////////////////////////
132//
133// LFRunningMean - a running mean algorithm for line detection
134//
135//
136
137// set up the object with the references to actual data
138// as well as the detection criterion (min_nchan and threshold, see above)
139// and the number of channels in the running box
140LFRunningMean::LFRunningMean(const casa::Vector<casa::Float> &in_spectrum,
141 const casa::Vector<casa::Bool> &in_mask,
142 const std::pair<int,int> &in_edge,
143 int in_max_box_nchan,
144 int in_min_nchan, casa::Float in_threshold) :
145 spectrum(in_spectrum), mask(in_mask), edge(in_edge),
146 max_box_nchan(in_max_box_nchan),
147 min_nchan(in_min_nchan),threshold(in_threshold) {}
148
149// replace the detection criterion
150void LFRunningMean::setCriterion(int in_min_nchan, casa::Float in_threshold)
151 throw()
152{
153 min_nchan=in_min_nchan;
154 threshold=in_threshold;
155}
156
157
158// supplementary function to control running mean calculations.
159// It adds a specified channel to the running mean box and
160// removes (ch-max_box_nchan+1)'th channel from there
161// Channels, for which the mask is false or index is beyond the
162// allowed range, are ignored
163void LFRunningMean::advanceRunningBox(int ch) throw(AipsError)
164{
165 if (ch>=edge.first && ch<edge.second)
166 if (mask[ch]) { // ch is a valid channel
167 ++box_chan_cntr;
168 sum+=spectrum[ch];
169 sumsq+=square(spectrum[ch]);
170 }
171 int ch2remove=ch-max_box_nchan;
172 if (ch2remove>=edge.first && ch2remove<edge.second)
173 if (mask[ch2remove]) { // ch2remove is a valid channel
174 --box_chan_cntr;
175 sum-=spectrum[ch2remove];
176 sumsq-=square(spectrum[ch2remove]);
177 }
178}
179
180// test a channel against current running mean & rms
181// if channel specified is masked out or beyond the allowed indexes,
182// false is returned
183Bool LFRunningMean::testChannel(int ch) const throw(exception, AipsError)
184{
185 if (ch<edge.first || ch>=edge.second) return False;
186 if (!mask[ch]) return False;
187 DebugAssert(box_chan_cntr, AipsError);
188 Float mean=sum/Float(box_chan_cntr);
189 Float variance=sqrt(sumsq/Float(box_chan_cntr)-square(mean));
190 /*
191 if (ch>3900 && ch<4100)
192 cout<<"Tested "<<ch<<" mean="<<mean<<" variance="<<variance<<" sp-mean="<<spectrum[ch]-mean<<endl;
193 */
194 return fabs(spectrum[ch]-mean)>=threshold*variance;
195}
196
197// process a channel: update cur_line and is_detected before and
198// add a new line to the list, if necessary
199void LFRunningMean::processChannel(std::list<pair<int,int> > &lines,
200 int ch) throw(casa::AipsError)
201{
202 try {
203 if (testChannel(ch)) {
204 if (is_detected_before)
205 cur_line.second=ch+1;
206 else {
207 is_detected_before=True;
208 cur_line.first=ch;
209 cur_line.second=ch+1;
210 }
211 } else processCurLine(lines);
212 }
213 catch (const AipsError &ae) {
214 throw;
215 }
216 catch (const exception &ex) {
217 throw AipsError(String("SDLineFinder::processChannel - STL error: ")+ex.what());
218 }
219}
220
221// process the interval of channels stored in cur_line
222// if it satisfies the criterion, add this interval as a new line
223void LFRunningMean::processCurLine(std::list<pair<int,int> > &lines)
224 throw(casa::AipsError)
225{
226 try {
227 if (is_detected_before) {
228 if (cur_line.second-cur_line.first>min_nchan) {
229 // it was a detection. We need to change the list
230 Bool add_new_line=False;
231 if (lines.size()) {
232 for (int i=lines.back().second;i<cur_line.first;++i)
233 if (mask[i]) { // one valid channel in between
234 // means that we deal with a separate line
235 add_new_line=True;
236 break;
237 }
238 } else add_new_line=True;
239 if (add_new_line)
240 lines.push_back(cur_line);
241 else lines.back().second=cur_line.second;
242 }
243 is_detected_before=False;
244 }
245 }
246 catch (const AipsError &ae) {
247 throw;
248 }
249 catch (const exception &ex) {
250 throw AipsError(String("SDLineFinder::processCurLine - STL error: ")+ex.what());
251 }
252}
253
254// find spectral lines and add them into list
255void LFRunningMean::findLines(std::list<pair<int,int> > &lines)
256 throw(casa::AipsError)
257{
258 const int minboxnchan=4;
259
260 // fill statistics for initial box
261 box_chan_cntr=0; // no channels are currently in the box
262 sum=0; // initialize statistics
263 sumsq=0;
264 int initial_box_ch=edge.first;
265 for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
266 ++initial_box_ch)
267 advanceRunningBox(initial_box_ch);
268
269 if (initial_box_ch==edge.second)
270 throw AipsError("LFRunningMean::findLines - too much channels are masked");
271
272 // actual search algorithm
273 is_detected_before=False;
274
275 if (box_chan_cntr>=minboxnchan)
276 // there is a minimum amount of data. We can search in the
277 // half of the initial box
278 for (int n=edge.first;n<initial_box_ch-max_box_nchan/2;++n)
279 processChannel(lines,n);
280
281 // now the box can be moved. n+max_box_nchan/2 is a new index which haven't
282 // yet been included in the running mean.
283 for (int n=initial_box_ch-max_box_nchan/2;n<edge.second;++n) {
284 advanceRunningBox(n+max_box_nchan/2); // update running mean & variance
285 if (box_chan_cntr>=minboxnchan) // have enough data to process
286 processChannel(lines,n);
287 else processCurLine(lines); // just finish what was accumulated before
288 }
289}
290
291//
292///////////////////////////////////////////////////////////////////////////////
293
294///////////////////////////////////////////////////////////////////////////////
295//
296// SDLineFinder::IntersectsWith - An auxiliary object function to test
297// whether two lines have a non-void intersection
298//
299
300
301// line1 - range of the first line: start channel and stop+1
302SDLineFinder::IntersectsWith::IntersectsWith(const std::pair<int,int> &in_line1) :
303 line1(in_line1) {}
304
305
306// return true if line2 intersects with line1 with at least one
307// common channel, and false otherwise
308// line2 - range of the second line: start channel and stop+1
309bool SDLineFinder::IntersectsWith::operator()(const std::pair<int,int> &line2)
310 const throw()
311{
312 if (line2.second<line1.first) return false; // line2 is at lower channels
313 if (line2.first>line1.second) return false; // line2 is at upper channels
314 return true; // line2 has an intersection or is adjacent to line1
315}
316
317//
318///////////////////////////////////////////////////////////////////////////////
319
320///////////////////////////////////////////////////////////////////////////////
321//
322// SDLineFinder::BuildUnion - An auxiliary object function to build a union
323// of several lines to account for a possibility of merging the nearby lines
324//
325
326// set an initial line (can be a first line in the sequence)
327SDLineFinder::BuildUnion::BuildUnion(const std::pair<int,int> &line1) :
328 temp_line(line1) {}
329
330// update temp_line with a union of temp_line and new_line
331// provided there is no gap between the lines
332void SDLineFinder::BuildUnion::operator()(const std::pair<int,int> &new_line)
333 throw()
334{
335 if (new_line.first<temp_line.first) temp_line.first=new_line.first;
336 if (new_line.second>temp_line.second) temp_line.second=new_line.second;
337}
338
339// return the result (temp_line)
340const std::pair<int,int>& SDLineFinder::BuildUnion::result() const throw()
341{
342 return temp_line;
343}
344
345//
346///////////////////////////////////////////////////////////////////////////////
347
348///////////////////////////////////////////////////////////////////////////////
349//
350// SDLineFinder::LaterThan - An auxiliary object function to test whether a
351// specified line is at lower spectral channels (to preserve the order in
352// the line list)
353//
354
355// setup the line to compare with
356SDLineFinder::LaterThan::LaterThan(const std::pair<int,int> &in_line1) :
357 line1(in_line1) {}
358
359// return true if line2 should be placed later than line1
360// in the ordered list (so, it is at greater channel numbers)
361bool SDLineFinder::LaterThan::operator()(const std::pair<int,int> &line2)
362 const throw()
363{
364 if (line2.second<line1.first) return false; // line2 is at lower channels
365 if (line2.first>line1.second) return true; // line2 is at upper channels
366
367 // line2 intersects with line1. We should have no such situation in
368 // practice
369 return line2.first>line1.first;
370}
371
372//
373///////////////////////////////////////////////////////////////////////////////
374
375
376///////////////////////////////////////////////////////////////////////////////
377//
378// SDLineFinder - a class for automated spectral line search
379//
380//
381
382SDLineFinder::SDLineFinder() throw() : edge(0,0)
383{
384 // detection threshold - the minimal signal to noise ratio
385 threshold=3.; // 3 sigma is a default
386 box_size=1./16.; // default box size for running mean calculations is
387 // 1/16 of the whole spectrum
388 // A minimum number of consequtive channels, which should satisfy
389 // the detection criterion, to be a detection
390 min_nchan=3; // default is 3 channels
391}
392
393SDLineFinder::~SDLineFinder() throw(AipsError) {}
394
395// set scan to work with (in_scan parameter), associated mask (in_mask
396// parameter) and the edge channel rejection (in_edge parameter)
397// if in_edge has zero length, all channels chosen by mask will be used
398// if in_edge has one element only, it represents the number of
399// channels to drop from both sides of the spectrum
400// in_edge is introduced for convinience, although all functionality
401// can be achieved using a spectrum mask only
402void SDLineFinder::setScan(const SDMemTableWrapper &in_scan,
403 const std::vector<bool> &in_mask,
404 const boost::python::tuple &in_edge) throw(AipsError)
405{
406 try {
407 scan=in_scan.getCP();
408 AlwaysAssert(!scan.null(),AipsError);
409 if (scan->nRow()!=1)
410 throw AipsError("SDLineFinder::setScan - in_scan contains more than 1 row."
411 "Choose one first.");
412 mask=in_mask;
413 if (mask.nelements()!=scan->nChan())
414 throw AipsError("SDLineFinder::setScan - in_scan and in_mask have different"
415 "number of spectral channels.");
416
417 // number of elements in the in_edge tuple
418 int n=extract<int>(in_edge.attr("__len__")());
419 if (n>2 || n<0)
420 throw AipsError("SDLineFinder::setScan - the length of the in_edge parameter"
421 "should not exceed 2");
422 if (!n) {
423 // all spectrum, no rejection
424 edge.first=0;
425 edge.second=scan->nChan();
426 } else {
427 edge.first=extract<int>(in_edge.attr("__getitem__")(0));
428 if (edge.first<0)
429 throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
430 "number of channels to drop");
431 if (edge.first>=scan->nChan())
432 throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
433 if (n==2) {
434 edge.second=extract<int>(in_edge.attr("__getitem__")(1));
435 if (edge.second<0)
436 throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
437 "number of channels to drop");
438 edge.second=scan->nChan()-edge.second;
439 } else edge.second=scan->nChan()-edge.first;
440 if (edge.second<0 || (edge.second+edge.first)>scan->nChan())
441 throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
442 }
443 }
444 catch(const AipsError &ae) {
445 // setScan is unsuccessfull, reset scan/mask/edge
446 scan=CountedConstPtr<SDMemTable>(); // null pointer
447 mask.resize(0);
448 edge=pair<int,int>(0,0);
449 throw;
450 }
451}
452
453// search for spectral lines. Number of lines found is returned
454int SDLineFinder::findLines() throw(casa::AipsError)
455{
456 const int minboxnchan=4;
457 if (scan.null())
458 throw AipsError("SDLineFinder::findLines - a scan should be set first,"
459 " use set_scan");
460 DebugAssert(mask.nelements()==scan->nChan(), AipsError);
461 int max_box_nchan=int(scan->nChan()*box_size); // number of channels in running
462 // box
463 if (max_box_nchan<2)
464 throw AipsError("SDLineFinder::findLines - box_size is too small");
465
466 scan->getSpectrum(spectrum);
467
468 lines.resize(0); // search from the scratch
469 Vector<Bool> temp_mask(mask);
470 Bool need2iterate=True;
471 while (need2iterate) {
472 // line find algorithm
473 LFRunningMean lfalg(spectrum,temp_mask,edge,max_box_nchan,min_nchan,
474 threshold);
475 std::list<pair<int,int> > new_lines;
476 lfalg.findLines(new_lines);
477 if (!new_lines.size()) need2iterate=False;
478 temp_mask=getMask();
479 // update the list (lines) merging intervals, if necessary
480 addNewSearchResult(new_lines);
481 }
482 return int(lines.size());
483}
484
485
486// get the mask to mask out all lines that have been found (default)
487// if invert=true, only channels belong to lines will be unmasked
488// Note: all channels originally masked by the input mask (in_mask
489// in setScan) or dropped out by the edge parameter (in_edge
490// in setScan) are still excluded regardless on the invert option
491std::vector<bool> SDLineFinder::getMask(bool invert)
492 const throw(casa::AipsError)
493{
494 try {
495 if (scan.null())
496 throw AipsError("SDLineFinder::getMask - a scan should be set first,"
497 " use set_scan followed by find_lines");
498 DebugAssert(mask.nelements()==scan->nChan(), AipsError);
499 /*
500 if (!lines.size())
501 throw AipsError("SDLineFinder::getMask - one have to search for "
502 "lines first, use find_lines");
503 */
504 std::vector<bool> res_mask(mask.nelements());
505 // iterator through lines
506 std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
507 for (int ch=0;ch<res_mask.size();++ch)
508 if (ch<edge.first || ch>=edge.second) res_mask[ch]=false;
509 else if (!mask[ch]) res_mask[ch]=false;
510 else {
511 res_mask[ch]=!invert; // no line by default
512 if (cli==lines.end()) continue;
513 if (ch>=cli->first && ch<cli->second)
514 res_mask[ch]=invert; // this is a line
515 if (ch>=cli->second)
516 ++cli; // next line in the list
517 }
518
519 return res_mask;
520 }
521 catch (const AipsError &ae) {
522 throw;
523 }
524 catch (const exception &ex) {
525 throw AipsError(String("SDLineFinder::getMask - STL error: ")+ex.what());
526 }
527}
528
529// get range for all lines found. If defunits is true (default), the
530// same units as used in the scan will be returned (e.g. velocity
531// instead of channels). If defunits is false, channels will be returned
532std::vector<int> SDLineFinder::getLineRanges(bool defunits)
533 const throw(casa::AipsError)
534{
535 try {
536 if (scan.null())
537 throw AipsError("SDLineFinder::getLineRanges - a scan should be set first,"
538 " use set_scan followed by find_lines");
539 DebugAssert(mask.nelements()==scan->nChan(), AipsError);
540
541 if (!lines.size())
542 throw AipsError("SDLineFinder::getLineRanges - one have to search for "
543 "lines first, use find_lines");
544
545 // temporary
546 if (defunits)
547 throw AipsError("SDLineFinder::getLineRanges - sorry, defunits=true have not "
548 "yet been implemented");
549 //
550 std::vector<int> res(2*lines.size());
551 // iterator through lines & result
552 std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
553 std::vector<int>::iterator ri=res.begin();
554 for (;cli!=lines.end() && ri!=res.end();++cli,++ri) {
555 *ri=cli->first;
556 if (++ri!=res.end())
557 *ri=cli->second-1;
558 }
559 return res;
560 }
561 catch (const AipsError &ae) {
562 throw;
563 }
564 catch (const exception &ex) {
565 throw AipsError(String("SDLineFinder::getLineRanges - STL error: ")+ex.what());
566 }
567}
568
569// concatenate two lists preserving the order. If two lines appear to
570// be adjacent, they are joined into the new one
571void SDLineFinder::addNewSearchResult(const std::list<pair<int, int> > &newlines)
572 throw(AipsError)
573{
574 try {
575 for (std::list<pair<int,int> >::const_iterator cli=newlines.begin();
576 cli!=newlines.end();++cli) {
577
578 // the first item, which has a non-void intersection or touches
579 // the new line
580 std::list<pair<int,int> >::iterator pos_beg=find_if(lines.begin(),
581 lines.end(), IntersectsWith(*cli));
582 // the last such item
583 std::list<pair<int,int> >::iterator pos_end=find_if(pos_beg,
584 lines.end(), not1(IntersectsWith(*cli)));
585
586 // extract all lines which intersect or touch a new one into
587 // a temporary buffer. This may invalidate the iterators
588 // line_buffer may be empty, if no lines intersects with a new
589 // one.
590 std::list<pair<int,int> > lines_buffer;
591 lines_buffer.splice(lines_buffer.end(),lines, pos_beg, pos_end);
592
593 // build a union of all intersecting lines
594 pair<int,int> union_line=for_each(lines_buffer.begin(),
595 lines_buffer.end(),BuildUnion(*cli)).result();
596
597 // search for a right place for the new line (union_line) and add
598 std::list<pair<int,int> >::iterator pos2insert=find_if(lines.begin(),
599 lines.end(), LaterThan(union_line));
600 lines.insert(pos2insert,union_line);
601 }
602 }
603 catch (const AipsError &ae) {
604 throw;
605 }
606 catch (const exception &ex) {
607 throw AipsError(String("SDLineFinder::addNewSearchResult - STL error: ")+ex.what());
608 }
609}
Note: See TracBrowser for help on using the repository browser.