source: trunk/src/STLineFinder.cpp@ 1564

Last change on this file since 1564 was 1497, checked in by Max Voronkov, 16 years ago

fixed #149. The problem was inside getMask method. In some rare circumstances largely when single channel detections are allowed and with spectral averaging enabled, the method failed to flag the lines which have already been detected and went into infinite loop by detecting them again. The condition of leaving the loop (no new lines found) was never satisfied as a result of this.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 42.2 KB
RevLine 
[297]1//#---------------------------------------------------------------------------
[881]2//# STLineFinder.cc: A class for automated spectral line search
[297]3//#--------------------------------------------------------------------------
4//# Copyright (C) 2004
5//# ATNF
6//#
7//# This program is free software; you can redistribute it and/or modify it
8//# under the terms of the GNU General Public License as published by the Free
9//# Software Foundation; either version 2 of the License, or (at your option)
10//# any later version.
11//#
12//# This program is distributed in the hope that it will be useful, but
13//# WITHOUT ANY WARRANTY; without even the implied warranty of
14//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15//# Public License for more details.
16//#
17//# You should have received a copy of the GNU General Public License along
18//# with this program; if not, write to the Free Software Foundation, Inc.,
19//# 675 Massachusetts Ave, Cambridge, MA 02139, USA.
20//#
21//# Correspondence concerning this software should be addressed as follows:
22//# Internet email: Malte.Marquarding@csiro.au
23//# Postal address: Malte Marquarding,
24//# Australia Telescope National Facility,
25//# P.O. Box 76,
26//# Epping, NSW, 2121,
27//# AUSTRALIA
28//#
[890]29//# $Id: STLineFinder.cpp 1497 2009-02-04 06:42:20Z MaximVoronkov $
[297]30//#---------------------------------------------------------------------------
31
32
33// ASAP
[894]34#include "STLineFinder.h"
35#include "STFitter.h"
[297]36
37// STL
[343]38#include <functional>
39#include <algorithm>
[297]40#include <iostream>
[351]41#include <fstream>
[297]42
43using namespace asap;
44using namespace casa;
45using namespace std;
46
[344]47namespace asap {
48
[343]49///////////////////////////////////////////////////////////////////////////////
50//
[881]51// RunningBox - a running box calculator. This class implements
[1315]52// iterations over the specified spectrum and calculates
[351]53// running box filter statistics.
[343]54//
55
[351]56class RunningBox {
[331]57 // The input data to work with. Use reference symantics to avoid
[881]58 // an unnecessary copying
[331]59 const casa::Vector<casa::Float> &spectrum; // a buffer for the spectrum
60 const casa::Vector<casa::Bool> &mask; // associated mask
61 const std::pair<int,int> &edge; // start and stop+1 channels
62 // to work with
[881]63
[351]64 // statistics for running box filtering
65 casa::Float sumf; // sum of fluxes
66 casa::Float sumf2; // sum of squares of fluxes
67 casa::Float sumch; // sum of channel numbers (for linear fit)
68 casa::Float sumch2; // sum of squares of channel numbers (for linear fit)
69 casa::Float sumfch; // sum of flux*(channel number) (for linear fit)
[881]70
[331]71 int box_chan_cntr; // actual number of channels in the box
72 int max_box_nchan; // maximum allowed number of channels in the box
73 // (calculated from boxsize and actual spectrum size)
[351]74 // cache for derivative statistics
75 mutable casa::Bool need2recalculate; // if true, values of the statistics
76 // below are invalid
77 mutable casa::Float linmean; // a value of the linear fit to the
78 // points in the running box
79 mutable casa::Float linvariance; // the same for variance
80 int cur_channel; // the number of the current channel
81 int start_advance; // number of channel from which the box can
82 // be moved (the middle of the box, if there is no
[996]83 // masking)
[351]84public:
85 // set up the object with the references to actual data
86 // as well as the number of channels in the running box
87 RunningBox(const casa::Vector<casa::Float> &in_spectrum,
88 const casa::Vector<casa::Bool> &in_mask,
[996]89 const std::pair<int,int> &in_edge,
90 int in_max_box_nchan) throw(AipsError);
[881]91
[351]92 // access to the statistics
93 const casa::Float& getLinMean() const throw(AipsError);
94 const casa::Float& getLinVariance() const throw(AipsError);
95 const casa::Float aboveMean() const throw(AipsError);
96 int getChannel() const throw();
[881]97
[351]98 // actual number of channels in the box (max_box_nchan, if no channels
99 // are masked)
100 int getNumberOfBoxPoints() const throw();
[297]101
[351]102 // next channel
103 void next() throw(AipsError);
104
105 // checking whether there are still elements
106 casa::Bool haveMore() const throw();
107
108 // go to start
109 void rewind() throw(AipsError);
[881]110
[351]111protected:
112 // supplementary function to control running mean calculations.
113 // It adds a specified channel to the running mean box and
114 // removes (ch-maxboxnchan+1)'th channel from there
115 // Channels, for which the mask is false or index is beyond the
116 // allowed range, are ignored
117 void advanceRunningBox(int ch) throw(casa::AipsError);
118
119 // calculate derivative statistics. This function is const, because
120 // it updates the cache only
121 void updateDerivativeStatistics() const throw(AipsError);
122};
123
124//
125///////////////////////////////////////////////////////////////////////////////
126
127///////////////////////////////////////////////////////////////////////////////
128//
129// LFAboveThreshold An algorithm for line detection using running box
130// statistics. Line is detected if it is above the
131// specified threshold at the specified number of
132// consequtive channels. Prefix LF stands for Line Finder
133//
[352]134class LFAboveThreshold : protected LFLineListOperations {
[331]135 // temporary line edge channels and flag, which is True if the line
136 // was detected in the previous channels.
137 std::pair<int,int> cur_line;
138 casa::Bool is_detected_before;
139 int min_nchan; // A minimum number of consequtive
140 // channels, which should satisfy
[996]141 // the detection criterion, to be
142 // a detection
[881]143 casa::Float threshold; // detection threshold - the
[331]144 // minimal signal to noise ratio
[351]145 std::list<pair<int,int> > &lines; // list where detections are saved
146 // (pair: start and stop+1 channel)
147 RunningBox *running_box; // running box filter
[551]148 casa::Vector<Int> signs; // An array to store the signs of
149 // the value - current mean
[996]150 // (used to search wings)
[907]151 casa::Int last_sign; // a sign (+1, -1 or 0) of the
152 // last point of the detected line
153 //
[331]154public:
[351]155
156 // set up the detection criterion
157 LFAboveThreshold(std::list<pair<int,int> > &in_lines,
158 int in_min_nchan = 3,
[996]159 casa::Float in_threshold = 5) throw();
[351]160 virtual ~LFAboveThreshold() throw();
[881]161
[331]162 // replace the detection criterion
163 void setCriterion(int in_min_nchan, casa::Float in_threshold) throw();
[297]164
[551]165 // return the array with signs of the value-current mean
166 // An element is +1 if value>mean, -1 if less, 0 if equal.
167 // This array is updated each time the findLines method is called and
168 // is used to search the line wings
169 const casa::Vector<Int>& getSigns() const throw();
170
[331]171 // find spectral lines and add them into list
[344]172 // if statholder is not NULL, the accumulate function of it will be
173 // called for each channel to save statistics
[351]174 // spectrum, mask and edge - reference to the data
175 // max_box_nchan - number of channels in the running box
176 void findLines(const casa::Vector<casa::Float> &spectrum,
[996]177 const casa::Vector<casa::Bool> &mask,
178 const std::pair<int,int> &edge,
179 int max_box_nchan) throw(casa::AipsError);
[351]180
[331]181protected:
[297]182
[331]183 // process a channel: update curline and is_detected before and
184 // add a new line to the list, if necessary using processCurLine()
[351]185 // detect=true indicates that the current channel satisfies the criterion
186 void processChannel(Bool detect, const casa::Vector<casa::Bool> &mask)
187 throw(casa::AipsError);
[297]188
[331]189 // process the interval of channels stored in curline
190 // if it satisfies the criterion, add this interval as a new line
[351]191 void processCurLine(const casa::Vector<casa::Bool> &mask)
192 throw(casa::AipsError);
[924]193
[907]194 // get the sign of runningBox->aboveMean(). The RunningBox pointer
195 // should be defined
196 casa::Int getAboveMeanSign() const throw();
[331]197};
[344]198
199//
200///////////////////////////////////////////////////////////////////////////////
[351]201
[331]202} // namespace asap
[297]203
[344]204///////////////////////////////////////////////////////////////////////////////
[343]205//
[881]206// RunningBox - a running box calculator. This class implements
[351]207// interations over the specified spectrum and calculates
208// running box filter statistics.
[331]209//
[297]210
[331]211// set up the object with the references to actual data
212// and the number of channels in the running box
[351]213RunningBox::RunningBox(const casa::Vector<casa::Float> &in_spectrum,
214 const casa::Vector<casa::Bool> &in_mask,
[996]215 const std::pair<int,int> &in_edge,
216 int in_max_box_nchan) throw(AipsError) :
[331]217 spectrum(in_spectrum), mask(in_mask), edge(in_edge),
[996]218 max_box_nchan(in_max_box_nchan)
[351]219{
220 rewind();
221}
[331]222
[351]223void RunningBox::rewind() throw(AipsError) {
224 // fill statistics for initial box
225 box_chan_cntr=0; // no channels are currently in the box
226 sumf=0.; // initialize statistics
227 sumf2=0.;
228 sumch=0.;
229 sumch2=0.;
230 sumfch=0.;
231 int initial_box_ch=edge.first;
232 for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
233 ++initial_box_ch)
234 advanceRunningBox(initial_box_ch);
[881]235
236 if (initial_box_ch==edge.second)
[351]237 throw AipsError("RunningBox::rewind - too much channels are masked");
238
239 cur_channel=edge.first;
[881]240 start_advance=initial_box_ch-max_box_nchan/2;
[351]241}
242
243// access to the statistics
244const casa::Float& RunningBox::getLinMean() const throw(AipsError)
[331]245{
[351]246 DebugAssert(cur_channel<edge.second, AipsError);
247 if (need2recalculate) updateDerivativeStatistics();
248 return linmean;
[297]249}
250
[351]251const casa::Float& RunningBox::getLinVariance() const throw(AipsError)
252{
253 DebugAssert(cur_channel<edge.second, AipsError);
254 if (need2recalculate) updateDerivativeStatistics();
255 return linvariance;
256}
[331]257
[351]258const casa::Float RunningBox::aboveMean() const throw(AipsError)
259{
260 DebugAssert(cur_channel<edge.second, AipsError);
261 if (need2recalculate) updateDerivativeStatistics();
262 return spectrum[cur_channel]-linmean;
263}
264
265int RunningBox::getChannel() const throw()
266{
267 return cur_channel;
268}
269
270// actual number of channels in the box (max_box_nchan, if no channels
271// are masked)
272int RunningBox::getNumberOfBoxPoints() const throw()
273{
274 return box_chan_cntr;
275}
276
[297]277// supplementary function to control running mean calculations.
278// It adds a specified channel to the running mean box and
279// removes (ch-max_box_nchan+1)'th channel from there
280// Channels, for which the mask is false or index is beyond the
281// allowed range, are ignored
[351]282void RunningBox::advanceRunningBox(int ch) throw(AipsError)
[297]283{
284 if (ch>=edge.first && ch<edge.second)
285 if (mask[ch]) { // ch is a valid channel
286 ++box_chan_cntr;
[351]287 sumf+=spectrum[ch];
288 sumf2+=square(spectrum[ch]);
[996]289 sumch+=Float(ch);
290 sumch2+=square(Float(ch));
291 sumfch+=spectrum[ch]*Float(ch);
292 need2recalculate=True;
[297]293 }
294 int ch2remove=ch-max_box_nchan;
295 if (ch2remove>=edge.first && ch2remove<edge.second)
296 if (mask[ch2remove]) { // ch2remove is a valid channel
297 --box_chan_cntr;
[351]298 sumf-=spectrum[ch2remove];
[881]299 sumf2-=square(spectrum[ch2remove]);
[996]300 sumch-=Float(ch2remove);
301 sumch2-=square(Float(ch2remove));
302 sumfch-=spectrum[ch2remove]*Float(ch2remove);
303 need2recalculate=True;
[297]304 }
305}
306
[351]307// next channel
308void RunningBox::next() throw(AipsError)
[297]309{
[351]310 AlwaysAssert(cur_channel<edge.second,AipsError);
311 ++cur_channel;
312 if (cur_channel+max_box_nchan/2<edge.second && cur_channel>=start_advance)
313 advanceRunningBox(cur_channel+max_box_nchan/2); // update statistics
[297]314}
315
[351]316// checking whether there are still elements
317casa::Bool RunningBox::haveMore() const throw()
318{
319 return cur_channel<edge.second;
320}
321
322// calculate derivative statistics. This function is const, because
323// it updates the cache only
324void RunningBox::updateDerivativeStatistics() const throw(AipsError)
325{
326 AlwaysAssert(box_chan_cntr, AipsError);
[881]327
[351]328 Float mean=sumf/Float(box_chan_cntr);
329
330 // linear LSF formulae
331 Float meanch=sumch/Float(box_chan_cntr);
332 Float meanch2=sumch2/Float(box_chan_cntr);
333 if (meanch==meanch2 || box_chan_cntr<3) {
334 // vertical line in the spectrum, can't calculate linmean and linvariance
335 linmean=0.;
336 linvariance=0.;
337 } else {
338 Float coeff=(sumfch/Float(box_chan_cntr)-meanch*mean)/
339 (meanch2-square(meanch));
340 linmean=coeff*(Float(cur_channel)-meanch)+mean;
341 linvariance=sqrt(sumf2/Float(box_chan_cntr)-square(mean)-
342 square(coeff)*(meanch2-square(meanch)));
343 }
344 need2recalculate=False;
345}
346
347
348//
349///////////////////////////////////////////////////////////////////////////////
350
351///////////////////////////////////////////////////////////////////////////////
352//
353// LFAboveThreshold - a running mean algorithm for line detection
354//
355//
356
357
358// set up the detection criterion
359LFAboveThreshold::LFAboveThreshold(std::list<pair<int,int> > &in_lines,
360 int in_min_nchan,
361 casa::Float in_threshold) throw() :
362 min_nchan(in_min_nchan), threshold(in_threshold),
[996]363 lines(in_lines), running_box(NULL) {}
[351]364
365LFAboveThreshold::~LFAboveThreshold() throw()
366{
367 if (running_box!=NULL) delete running_box;
368}
369
370// replace the detection criterion
371void LFAboveThreshold::setCriterion(int in_min_nchan, casa::Float in_threshold)
372 throw()
373{
374 min_nchan=in_min_nchan;
375 threshold=in_threshold;
376}
377
[907]378// get the sign of runningBox->aboveMean(). The RunningBox pointer
379// should be defined
380casa::Int LFAboveThreshold::getAboveMeanSign() const throw()
381{
382 const Float buf=running_box->aboveMean();
383 if (buf>0) return 1;
384 if (buf<0) return -1;
385 return 0;
386}
[351]387
[907]388
[297]389// process a channel: update cur_line and is_detected before and
390// add a new line to the list, if necessary
[351]391void LFAboveThreshold::processChannel(Bool detect,
392 const casa::Vector<casa::Bool> &mask) throw(casa::AipsError)
[297]393{
394 try {
[907]395 if (is_detected_before) {
396 // we have to check that the current detection has the
397 // same sign of running_box->aboveMean
398 // otherwise it could be a spurious detection
399 if (last_sign && last_sign!=getAboveMeanSign())
400 detect=False;
[1315]401 }
402 if (detect) {
403 last_sign=getAboveMeanSign();
404 if (is_detected_before)
405 cur_line.second=running_box->getChannel()+1;
406 else {
407 is_detected_before=True;
408 cur_line.first=running_box->getChannel();
409 cur_line.second=running_box->getChannel()+1;
410 }
411 } else processCurLine(mask);
[297]412 }
413 catch (const AipsError &ae) {
414 throw;
[881]415 }
[297]416 catch (const exception &ex) {
[351]417 throw AipsError(String("LFAboveThreshold::processChannel - STL error: ")+ex.what());
[297]418 }
419}
420
421// process the interval of channels stored in cur_line
422// if it satisfies the criterion, add this interval as a new line
[351]423void LFAboveThreshold::processCurLine(const casa::Vector<casa::Bool> &mask)
[331]424 throw(casa::AipsError)
[297]425{
426 try {
[881]427 if (is_detected_before) {
[1315]428 if (cur_line.second-cur_line.first>=min_nchan) {
[996]429 // it was a detection. We need to change the list
430 Bool add_new_line=False;
431 if (lines.size()) {
432 for (int i=lines.back().second;i<cur_line.first;++i)
433 if (mask[i]) { // one valid channel in between
434 // means that we deal with a separate line
435 add_new_line=True;
436 break;
437 }
438 } else add_new_line=True;
439 if (add_new_line)
440 lines.push_back(cur_line);
[881]441 else lines.back().second=cur_line.second;
[996]442 }
443 is_detected_before=False;
[881]444 }
[297]445 }
446 catch (const AipsError &ae) {
447 throw;
[881]448 }
[297]449 catch (const exception &ex) {
[351]450 throw AipsError(String("LFAboveThreshold::processCurLine - STL error: ")+ex.what());
[297]451 }
452}
453
[551]454// return the array with signs of the value-current mean
455// An element is +1 if value>mean, -1 if less, 0 if equal.
456// This array is updated each time the findLines method is called and
457// is used to search the line wings
458const casa::Vector<Int>& LFAboveThreshold::getSigns() const throw()
459{
460 return signs;
461}
462
[331]463// find spectral lines and add them into list
[351]464void LFAboveThreshold::findLines(const casa::Vector<casa::Float> &spectrum,
[996]465 const casa::Vector<casa::Bool> &mask,
466 const std::pair<int,int> &edge,
467 int max_box_nchan)
[331]468 throw(casa::AipsError)
469{
470 const int minboxnchan=4;
[351]471 try {
[331]472
[351]473 if (running_box!=NULL) delete running_box;
474 running_box=new RunningBox(spectrum,mask,edge,max_box_nchan);
[368]475
[881]476
[368]477 // determine the off-line variance first
478 // an assumption made: lines occupy a small part of the spectrum
[881]479
[368]480 std::vector<float> variances(edge.second-edge.first);
481 DebugAssert(variances.size(),AipsError);
[881]482
483 for (;running_box->haveMore();running_box->next())
[368]484 variances[running_box->getChannel()-edge.first]=
[996]485 running_box->getLinVariance();
[881]486
[368]487 // in the future we probably should do a proper Chi^2 estimation
488 // now a simple 80% of smaller values will be used.
489 // it may degrade the performance of the algorithm for weak lines
490 // due to a bias of the Chi^2 distribution.
491 stable_sort(variances.begin(),variances.end());
[881]492
[368]493 Float offline_variance=0;
[881]494 uInt offline_cnt=uInt(0.8*variances.size());
[368]495 if (!offline_cnt) offline_cnt=variances.size(); // no much else left,
496 // although it is very inaccurate
[881]497 for (uInt n=0;n<offline_cnt;++n)
498 offline_variance+=variances[n];
499 offline_variance/=Float(offline_cnt);
500
[351]501 // actual search algorithm
502 is_detected_before=False;
[368]503
[551]504 // initiate the signs array
505 signs.resize(spectrum.nelements());
506 signs=Vector<Int>(spectrum.nelements(),0);
507
[369]508 //ofstream os("dbg.dat");
[368]509 for (running_box->rewind();running_box->haveMore();
510 running_box->next()) {
[351]511 const int ch=running_box->getChannel();
512 if (running_box->getNumberOfBoxPoints()>=minboxnchan)
[996]513 processChannel(mask[ch] && (fabs(running_box->aboveMean()) >=
514 threshold*offline_variance), mask);
515 else processCurLine(mask); // just finish what was accumulated before
[907]516
[996]517 signs[ch]=getAboveMeanSign();
518 // os<<ch<<" "<<spectrum[ch]<<" "<<fabs(running_box->aboveMean())<<" "<<
[1315]519 // threshold*offline_variance<<endl;
[924]520
[996]521 const Float buf=running_box->aboveMean();
522 if (buf>0) signs[ch]=1;
523 else if (buf<0) signs[ch]=-1;
524 else if (buf==0) signs[ch]=0;
[1315]525 //os<<ch<<" "<<spectrum[ch]<<" "<<running_box->getLinMean()<<" "<<
526 // threshold*offline_variance<<endl;
[351]527 }
[352]528 if (lines.size())
529 searchForWings(lines,signs,mask,edge);
[344]530 }
[351]531 catch (const AipsError &ae) {
532 throw;
[881]533 }
[351]534 catch (const exception &ex) {
535 throw AipsError(String("LFAboveThreshold::findLines - STL error: ")+ex.what());
536 }
[331]537}
538
539//
540///////////////////////////////////////////////////////////////////////////////
541
[343]542///////////////////////////////////////////////////////////////////////////////
543//
[352]544// LFLineListOperations::IntersectsWith - An auxiliary object function
545// to test whether two lines have a non-void intersection
[343]546//
[331]547
[343]548
549// line1 - range of the first line: start channel and stop+1
[352]550LFLineListOperations::IntersectsWith::IntersectsWith(const std::pair<int,int> &in_line1) :
[343]551 line1(in_line1) {}
552
553
554// return true if line2 intersects with line1 with at least one
555// common channel, and false otherwise
556// line2 - range of the second line: start channel and stop+1
[352]557bool LFLineListOperations::IntersectsWith::operator()(const std::pair<int,int> &line2)
[343]558 const throw()
559{
560 if (line2.second<line1.first) return false; // line2 is at lower channels
561 if (line2.first>line1.second) return false; // line2 is at upper channels
562 return true; // line2 has an intersection or is adjacent to line1
563}
564
565//
566///////////////////////////////////////////////////////////////////////////////
567
568///////////////////////////////////////////////////////////////////////////////
569//
[352]570// LFLineListOperations::BuildUnion - An auxiliary object function to build a union
[343]571// of several lines to account for a possibility of merging the nearby lines
572//
573
574// set an initial line (can be a first line in the sequence)
[352]575LFLineListOperations::BuildUnion::BuildUnion(const std::pair<int,int> &line1) :
[343]576 temp_line(line1) {}
577
578// update temp_line with a union of temp_line and new_line
579// provided there is no gap between the lines
[352]580void LFLineListOperations::BuildUnion::operator()(const std::pair<int,int> &new_line)
[343]581 throw()
582{
583 if (new_line.first<temp_line.first) temp_line.first=new_line.first;
584 if (new_line.second>temp_line.second) temp_line.second=new_line.second;
585}
586
587// return the result (temp_line)
[352]588const std::pair<int,int>& LFLineListOperations::BuildUnion::result() const throw()
[343]589{
590 return temp_line;
591}
592
593//
594///////////////////////////////////////////////////////////////////////////////
595
596///////////////////////////////////////////////////////////////////////////////
597//
[352]598// LFLineListOperations::LaterThan - An auxiliary object function to test whether a
[343]599// specified line is at lower spectral channels (to preserve the order in
600// the line list)
601//
602
603// setup the line to compare with
[352]604LFLineListOperations::LaterThan::LaterThan(const std::pair<int,int> &in_line1) :
[343]605 line1(in_line1) {}
606
607// return true if line2 should be placed later than line1
608// in the ordered list (so, it is at greater channel numbers)
[352]609bool LFLineListOperations::LaterThan::operator()(const std::pair<int,int> &line2)
[343]610 const throw()
611{
612 if (line2.second<line1.first) return false; // line2 is at lower channels
613 if (line2.first>line1.second) return true; // line2 is at upper channels
[881]614
[343]615 // line2 intersects with line1. We should have no such situation in
616 // practice
617 return line2.first>line1.first;
618}
619
620//
621///////////////////////////////////////////////////////////////////////////////
622
623
624///////////////////////////////////////////////////////////////////////////////
625//
[881]626// STLineFinder - a class for automated spectral line search
[343]627//
628//
[331]629
[881]630STLineFinder::STLineFinder() throw() : edge(0,0)
[331]631{
[369]632 setOptions();
[331]633}
634
[369]635// set the parameters controlling algorithm
636// in_threshold a single channel threshold default is sqrt(3), which
637// means together with 3 minimum channels at least 3 sigma
638// detection criterion
639// For bad baseline shape, in_threshold may need to be
640// increased
641// in_min_nchan minimum number of channels above the threshold to report
642// a detection, default is 3
643// in_avg_limit perform the averaging of no more than in_avg_limit
644// adjacent channels to search for broad lines
[881]645// Default is 8, but for a bad baseline shape this
[369]646// parameter should be decreased (may be even down to a
647// minimum of 1 to disable this option) to avoid
648// confusing of baseline undulations with a real line.
[881]649// Setting a very large value doesn't usually provide
650// valid detections.
[369]651// in_box_size the box size for running mean calculation. Default is
652// 1./5. of the whole spectrum size
[881]653void STLineFinder::setOptions(const casa::Float &in_threshold,
[369]654 const casa::Int &in_min_nchan,
[996]655 const casa::Int &in_avg_limit,
[369]656 const casa::Float &in_box_size) throw()
657{
658 threshold=in_threshold;
659 min_nchan=in_min_nchan;
660 avg_limit=in_avg_limit;
661 box_size=in_box_size;
662}
663
[881]664STLineFinder::~STLineFinder() throw(AipsError) {}
[331]665
[907]666// set scan to work with (in_scan parameter)
667void STLineFinder::setScan(const ScantableWrapper &in_scan) throw(AipsError)
668{
669 scan=in_scan.getCP();
670 AlwaysAssert(!scan.null(),AipsError);
[924]671
[907]672}
673
674// search for spectral lines. Number of lines found is returned
675// in_edge and in_mask control channel rejection for a given row
[331]676// if in_edge has zero length, all channels chosen by mask will be used
677// if in_edge has one element only, it represents the number of
678// channels to drop from both sides of the spectrum
679// in_edge is introduced for convinience, although all functionality
[881]680// can be achieved using a spectrum mask only
[907]681int STLineFinder::findLines(const std::vector<bool> &in_mask,
[996]682 const std::vector<int> &in_edge,
683 const casa::uInt &whichRow) throw(casa::AipsError)
[331]684{
[996]685 //const int minboxnchan=4;
[907]686 if (scan.null())
687 throw AipsError("STLineFinder::findLines - a scan should be set first,"
688 " use set_scan");
[924]689
690 uInt nchan = scan->nchan(scan->getIF(whichRow));
[907]691 // set up mask and edge rejection
[924]692 // no mask given...
693 if (in_mask.size() == 0) {
694 mask = Vector<Bool>(nchan,True);
695 } else {
696 // use provided mask
697 mask=Vector<Bool>(in_mask);
698 }
699 if (mask.nelements()!=nchan)
[907]700 throw AipsError("STLineFinder::findLines - in_scan and in_mask have different"
701 "number of spectral channels.");
702 // number of elements in in_edge
703 if (in_edge.size()>2)
704 throw AipsError("STLineFinder::findLines - the length of the in_edge parameter"
[996]705 "should not exceed 2");
[907]706 if (!in_edge.size()) {
[881]707 // all spectra, no rejection
[331]708 edge.first=0;
[996]709 edge.second=nchan;
[907]710 } else {
711 edge.first=in_edge[0];
[996]712 if (edge.first<0)
713 throw AipsError("STLineFinder::findLines - the in_edge parameter has a negative"
714 "number of channels to drop");
715 if (edge.first>=int(nchan))
716 throw AipsError("STLineFinder::findLines - all channels are rejected by the in_edge parameter");
[907]717 if (in_edge.size()==2) {
[996]718 edge.second=in_edge[1];
719 if (edge.second<0)
720 throw AipsError("STLineFinder::findLines - the in_edge parameter has a negative"
721 "number of channels to drop");
[924]722 edge.second=nchan-edge.second;
[996]723 } else edge.second=nchan-edge.first;
[369]724 if (edge.second<0 || (edge.first>=edge.second))
[996]725 throw AipsError("STLineFinder::findLines - all channels are rejected by the in_edge parameter");
[881]726 }
[924]727
[907]728 //
[924]729 int max_box_nchan=int(nchan*box_size); // number of channels in running
[331]730 // box
731 if (max_box_nchan<2)
[881]732 throw AipsError("STLineFinder::findLines - box_size is too small");
[331]733
[881]734 spectrum.resize();
735 spectrum = Vector<Float>(scan->getSpectrum(whichRow));
[331]736
737 lines.resize(0); // search from the scratch
[370]738 last_row_used=whichRow;
[331]739 Vector<Bool> temp_mask(mask);
[351]740
741 Bool first_pass=True;
[368]742 Int avg_factor=1; // this number of adjacent channels is averaged together
743 // the total number of the channels is not altered
[996]744 // instead, min_nchan is also scaled
745 // it helps to search for broad lines
[551]746 Vector<Int> signs; // a buffer for signs of the value - mean quantity
747 // see LFAboveThreshold for details
[996]748 // We need only signs resulted from last iteration
749 // because all previous values may be corrupted by the
750 // presence of spectral lines
[344]751 while (true) {
[351]752 // a buffer for new lines found at this iteration
[881]753 std::list<pair<int,int> > new_lines;
[351]754
755 try {
[369]756 // line find algorithm
757 LFAboveThreshold lfalg(new_lines,avg_factor*min_nchan, threshold);
[352]758 lfalg.findLines(spectrum,temp_mask,edge,max_box_nchan);
[996]759 signs.resize(lfalg.getSigns().nelements());
760 signs=lfalg.getSigns();
[368]761 first_pass=False;
762 if (!new_lines.size())
[996]763 throw AipsError("spurious"); // nothing new - use the same
764 // code as for a real exception
[351]765 }
766 catch(const AipsError &ae) {
767 if (first_pass) throw;
[368]768 // nothing new - proceed to the next step of averaging, if any
[996]769 // (to search for broad lines)
[1315]770 if (avg_factor>=avg_limit) break; // averaging up to avg_limit
[996]771 // adjacent channels,
772 // stop after that
773 avg_factor*=2; // twice as more averaging
774 subtractBaseline(temp_mask,9);
775 averageAdjacentChannels(temp_mask,avg_factor);
776 continue;
[1315]777 }
[368]778 keepStrongestOnly(temp_mask,new_lines,max_box_nchan);
[343]779 // update the list (lines) merging intervals, if necessary
[344]780 addNewSearchResult(new_lines,lines);
781 // get a new mask
[881]782 temp_mask=getMask();
[343]783 }
[881]784
[551]785 // an additional search for wings because in the presence of very strong
786 // lines temporary mean used at each iteration will be higher than
787 // the true mean
[881]788
[551]789 if (lines.size())
790 LFLineListOperations::searchForWings(lines,signs,mask,edge);
[881]791
[331]792 return int(lines.size());
793}
794
[369]795// auxiliary function to fit and subtract a polynomial from the current
[890]796// spectrum. It uses the Fitter class. This action is required before
[369]797// reducing the spectral resolution if the baseline shape is bad
[881]798void STLineFinder::subtractBaseline(const casa::Vector<casa::Bool> &temp_mask,
[369]799 const casa::Int &order) throw(casa::AipsError)
800{
801 AlwaysAssert(spectrum.nelements(),AipsError);
802 // use the fact that temp_mask excludes channels rejected at the edge
[890]803 Fitter sdf;
[369]804 std::vector<float> absc(spectrum.nelements());
[996]805 for (unsigned int i=0;i<absc.size();++i)
[369]806 absc[i]=float(i)/float(spectrum.nelements());
807 std::vector<float> spec;
808 spectrum.tovector(spec);
809 std::vector<bool> std_mask;
810 temp_mask.tovector(std_mask);
811 sdf.setData(absc,spec,std_mask);
812 sdf.setExpression("poly",order);
813 if (!sdf.fit()) return; // fit failed, use old spectrum
[881]814 spectrum=casa::Vector<casa::Float>(sdf.getResidual());
[369]815}
816
[368]817// auxiliary function to average adjacent channels and update the mask
818// if at least one channel involved in summation is masked, all
819// output channels will be masked. This function works with the
820// spectrum and edge fields of this class, but updates the mask
821// array specified, rather than the field of this class
822// boxsize - a number of adjacent channels to average
[881]823void STLineFinder::averageAdjacentChannels(casa::Vector<casa::Bool> &mask2update,
[368]824 const casa::Int &boxsize)
825 throw(casa::AipsError)
826{
827 DebugAssert(mask2update.nelements()==spectrum.nelements(), AipsError);
828 DebugAssert(boxsize!=0,AipsError);
[881]829
[368]830 for (int n=edge.first;n<edge.second;n+=boxsize) {
831 DebugAssert(n<spectrum.nelements(),AipsError);
832 int nboxch=0; // number of channels currently in the box
833 Float mean=0; // buffer for mean calculations
834 for (int k=n;k<n+boxsize && k<edge.second;++k)
835 if (mask2update[k]) { // k is a valid channel
[996]836 mean+=spectrum[k];
837 ++nboxch;
[881]838 }
[368]839 if (nboxch<boxsize) // mask these channels
840 for (int k=n;k<n+boxsize && k<edge.second;++k)
[996]841 mask2update[k]=False;
[368]842 else {
843 mean/=Float(boxsize);
[996]844 for (int k=n;k<n+boxsize && k<edge.second;++k)
845 spectrum[k]=mean;
[368]846 }
847 }
848}
[331]849
[368]850
[297]851// get the mask to mask out all lines that have been found (default)
852// if invert=true, only channels belong to lines will be unmasked
853// Note: all channels originally masked by the input mask (in_mask
854// in setScan) or dropped out by the edge parameter (in_edge
855// in setScan) are still excluded regardless on the invert option
[881]856std::vector<bool> STLineFinder::getMask(bool invert)
[297]857 const throw(casa::AipsError)
858{
859 try {
860 if (scan.null())
[881]861 throw AipsError("STLineFinder::getMask - a scan should be set first,"
[297]862 " use set_scan followed by find_lines");
[924]863 DebugAssert(mask.nelements()==scan->getChannels(last_row_used), AipsError);
[297]864 /*
865 if (!lines.size())
[881]866 throw AipsError("STLineFinder::getMask - one have to search for "
[996]867 "lines first, use find_lines");
[881]868 */
[297]869 std::vector<bool> res_mask(mask.nelements());
870 // iterator through lines
871 std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
[1497]872 for (int ch=0;ch<int(res_mask.size());++ch) {
[297]873 if (ch<edge.first || ch>=edge.second) res_mask[ch]=false;
[996]874 else if (!mask[ch]) res_mask[ch]=false;
875 else {
876 res_mask[ch]=!invert; // no line by default
[1497]877 if (cli!=lines.end())
878 if (ch>=cli->first && ch<cli->second)
879 res_mask[ch]=invert; // this is a line
880 }
881 if (cli!=lines.end())
882 if (ch>=cli->second) {
883 ++cli; // next line in the list
884 }
885 }
[297]886 return res_mask;
887 }
888 catch (const AipsError &ae) {
889 throw;
[881]890 }
[297]891 catch (const exception &ex) {
[881]892 throw AipsError(String("STLineFinder::getMask - STL error: ")+ex.what());
[297]893 }
894}
895
[370]896// get range for all lines found. The same units as used in the scan
897// will be returned (e.g. velocity instead of channels).
[881]898std::vector<double> STLineFinder::getLineRanges()
[297]899 const throw(casa::AipsError)
900{
[370]901 // convert to required abscissa units
902 std::vector<double> vel=scan->getAbcissa(last_row_used);
903 std::vector<int> ranges=getLineRangesInChannels();
904 std::vector<double> res(ranges.size());
905
906 std::vector<int>::const_iterator cri=ranges.begin();
907 std::vector<double>::iterator outi=res.begin();
908 for (;cri!=ranges.end() && outi!=res.end();++cri,++outi)
909 if (uInt(*cri)>=vel.size())
[881]910 throw AipsError("STLineFinder::getLineRanges - getAbcissa provided less channels than reqired");
[370]911 else *outi=vel[*cri];
912 return res;
913}
914
915// The same as getLineRanges, but channels are always used to specify
916// the range
[881]917std::vector<int> STLineFinder::getLineRangesInChannels()
[370]918 const throw(casa::AipsError)
919{
[297]920 try {
921 if (scan.null())
[881]922 throw AipsError("STLineFinder::getLineRangesInChannels - a scan should be set first,"
[297]923 " use set_scan followed by find_lines");
[924]924 DebugAssert(mask.nelements()==scan->getChannels(last_row_used), AipsError);
[881]925
[297]926 if (!lines.size())
[881]927 throw AipsError("STLineFinder::getLineRangesInChannels - one have to search for "
[996]928 "lines first, use find_lines");
[881]929
[297]930 std::vector<int> res(2*lines.size());
931 // iterator through lines & result
932 std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
933 std::vector<int>::iterator ri=res.begin();
[881]934 for (;cli!=lines.end() && ri!=res.end();++cli,++ri) {
[996]935 *ri=cli->first;
936 if (++ri!=res.end())
937 *ri=cli->second-1;
[881]938 }
[297]939 return res;
940 }
941 catch (const AipsError &ae) {
942 throw;
[881]943 }
[297]944 catch (const exception &ex) {
[881]945 throw AipsError(String("STLineFinder::getLineRanges - STL error: ")+ex.what());
[297]946 }
947}
[331]948
[370]949
950
[368]951// an auxiliary function to remove all lines from the list, except the
952// strongest one (by absolute value). If the lines removed are real,
[881]953// they will be find again at the next iteration. This approach
954// increases the number of iterations required, but is able to remove
[1315]955// spurious detections likely to occur near strong lines.
[368]956// Later a better criterion may be implemented, e.g.
957// taking into consideration the brightness of different lines. Now
[881]958// use the simplest solution
[368]959// temp_mask - mask to work with (may be different from original mask as
960// the lines previously found may be masked)
961// lines2update - a list of lines to work with
962// nothing will be done if it is empty
963// max_box_nchan - channels in the running box for baseline filtering
[881]964void STLineFinder::keepStrongestOnly(const casa::Vector<casa::Bool> &temp_mask,
[996]965 std::list<std::pair<int, int> > &lines2update,
966 int max_box_nchan)
[368]967 throw (casa::AipsError)
968{
969 try {
970 if (!lines2update.size()) return; // ignore an empty list
971
972 // current line
973 std::list<std::pair<int,int> >::iterator li=lines2update.begin();
974 // strongest line
975 std::list<std::pair<int,int> >::iterator strongli=lines2update.begin();
976 // the flux (absolute value) of the strongest line
977 Float peak_flux=-1; // negative value - a flag showing uninitialized
978 // value
979 // the algorithm below relies on the list being ordered
980 Float tmp_flux=-1; // a temporary peak
981 for (RunningBox running_box(spectrum,temp_mask,edge,max_box_nchan);
982 running_box.haveMore(); running_box.next()) {
983
984 if (li==lines2update.end()) break; // no more lines
[996]985 const int ch=running_box.getChannel();
986 if (ch>=li->first && ch<li->second)
987 if (temp_mask[ch] && tmp_flux<fabs(running_box.aboveMean()))
988 tmp_flux=fabs(running_box.aboveMean());
989 if (ch==li->second-1) {
990 if (peak_flux<tmp_flux) { // if peak_flux=-1, this condition
991 peak_flux=tmp_flux; // will be satisfied
992 strongli=li;
993 }
994 ++li;
995 tmp_flux=-1;
996 }
[881]997 }
[368]998 std::list<std::pair<int,int> > res;
999 res.splice(res.end(),lines2update,strongli);
1000 lines2update.clear();
1001 lines2update.splice(lines2update.end(),res);
1002 }
1003 catch (const AipsError &ae) {
1004 throw;
[881]1005 }
[368]1006 catch (const exception &ex) {
[881]1007 throw AipsError(String("STLineFinder::keepStrongestOnly - STL error: ")+ex.what());
[368]1008 }
1009
1010}
1011
[352]1012//
1013///////////////////////////////////////////////////////////////////////////////
1014
1015
1016///////////////////////////////////////////////////////////////////////////////
1017//
1018// LFLineListOperations - a class incapsulating operations with line lists
1019// The LF prefix stands for Line Finder
1020//
1021
[331]1022// concatenate two lists preserving the order. If two lines appear to
1023// be adjacent, they are joined into the new one
[352]1024void LFLineListOperations::addNewSearchResult(const std::list<pair<int, int> > &newlines,
[881]1025 std::list<std::pair<int, int> > &lines_list)
[331]1026 throw(AipsError)
1027{
1028 try {
1029 for (std::list<pair<int,int> >::const_iterator cli=newlines.begin();
1030 cli!=newlines.end();++cli) {
[881]1031
[996]1032 // the first item, which has a non-void intersection or touches
1033 // the new line
1034 std::list<pair<int,int> >::iterator pos_beg=find_if(lines_list.begin(),
1035 lines_list.end(), IntersectsWith(*cli));
1036 // the last such item
1037 std::list<pair<int,int> >::iterator pos_end=find_if(pos_beg,
1038 lines_list.end(), not1(IntersectsWith(*cli)));
[343]1039
1040 // extract all lines which intersect or touch a new one into
[996]1041 // a temporary buffer. This may invalidate the iterators
1042 // line_buffer may be empty, if no lines intersects with a new
1043 // one.
1044 std::list<pair<int,int> > lines_buffer;
1045 lines_buffer.splice(lines_buffer.end(),lines_list, pos_beg, pos_end);
[343]1046
[996]1047 // build a union of all intersecting lines
1048 pair<int,int> union_line=for_each(lines_buffer.begin(),
1049 lines_buffer.end(),BuildUnion(*cli)).result();
[881]1050
[996]1051 // search for a right place for the new line (union_line) and add
1052 std::list<pair<int,int> >::iterator pos2insert=find_if(lines_list.begin(),
1053 lines_list.end(), LaterThan(union_line));
1054 lines_list.insert(pos2insert,union_line);
[331]1055 }
1056 }
1057 catch (const AipsError &ae) {
1058 throw;
[881]1059 }
[331]1060 catch (const exception &ex) {
[352]1061 throw AipsError(String("LFLineListOperations::addNewSearchResult - STL error: ")+ex.what());
[331]1062 }
1063}
[344]1064
1065// extend all line ranges to the point where a value stored in the
1066// specified vector changes (e.g. value-mean change its sign)
1067// This operation is necessary to include line wings, which are below
1068// the detection threshold. If lines becomes adjacent, they are
1069// merged together. Any masked channel stops the extension
[352]1070void LFLineListOperations::searchForWings(std::list<std::pair<int, int> > &newlines,
1071 const casa::Vector<casa::Int> &signs,
[996]1072 const casa::Vector<casa::Bool> &mask,
1073 const std::pair<int,int> &edge) throw(casa::AipsError)
[344]1074{
1075 try {
1076 for (std::list<pair<int,int> >::iterator li=newlines.begin();
1077 li!=newlines.end();++li) {
[996]1078 // update the left hand side
1079 for (int n=li->first-1;n>=edge.first;--n) {
1080 if (!mask[n]) break;
1081 if (signs[n]==signs[li->first] && signs[li->first])
1082 li->first=n;
1083 else break;
1084 }
1085 // update the right hand side
1086 for (int n=li->second;n<edge.second;++n) {
1087 if (!mask[n]) break;
1088 if (signs[n]==signs[li->second-1] && signs[li->second-1])
1089 li->second=n;
1090 else break;
1091 }
[344]1092 }
1093 // need to search for possible mergers.
1094 std::list<std::pair<int, int> > result_buffer;
1095 addNewSearchResult(newlines,result_buffer);
1096 newlines.clear();
1097 newlines.splice(newlines.end(),result_buffer);
1098 }
1099 catch (const AipsError &ae) {
1100 throw;
[881]1101 }
[344]1102 catch (const exception &ex) {
[352]1103 throw AipsError(String("LFLineListOperations::extendLines - STL error: ")+ex.what());
[344]1104 }
1105}
[352]1106
1107//
1108///////////////////////////////////////////////////////////////////////////////
Note: See TracBrowser for help on using the repository browser.