source: trunk/src/SDLineFinder.cc@ 333

Last change on this file since 333 was 331, checked in by vor010, 20 years ago

Line searching algorithm is now in the separate
class LFRunningMean. It allows to run the same code several passes for different
masks, criteria, etc. In the future this interface may be more readily modified
to have multiple algorithms

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.3 KB
Line 
1//#---------------------------------------------------------------------------
2//# SDLineFinder.cc: A class for automated spectral line search
3//#--------------------------------------------------------------------------
4//# Copyright (C) 2004
5//# ATNF
6//#
7//# This program is free software; you can redistribute it and/or modify it
8//# under the terms of the GNU General Public License as published by the Free
9//# Software Foundation; either version 2 of the License, or (at your option)
10//# any later version.
11//#
12//# This program is distributed in the hope that it will be useful, but
13//# WITHOUT ANY WARRANTY; without even the implied warranty of
14//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15//# Public License for more details.
16//#
17//# You should have received a copy of the GNU General Public License along
18//# with this program; if not, write to the Free Software Foundation, Inc.,
19//# 675 Massachusetts Ave, Cambridge, MA 02139, USA.
20//#
21//# Correspondence concerning this software should be addressed as follows:
22//# Internet email: Malte.Marquarding@csiro.au
23//# Postal address: Malte Marquarding,
24//# Australia Telescope National Facility,
25//# P.O. Box 76,
26//# Epping, NSW, 2121,
27//# AUSTRALIA
28//#
29//# $Id:
30//#---------------------------------------------------------------------------
31
32
33// ASAP
34#include "SDLineFinder.h"
35
36// STL
37#include <iostream>
38
39using namespace asap;
40using namespace casa;
41using namespace std;
42using namespace boost::python;
43
44namespace asap {
45
46// An auxiliary class implementing one pass of the line search algorithm,
47// which uses a running mean. We define this class here because it is
48// used in SDLineFinder only. The incapsulation of this code into a separate
49// class will provide a possibility to add new algorithms with minor changes
50class LFRunningMean {
51 // The input data to work with. Use reference symantics to avoid
52 // an unnecessary copying
53 const casa::Vector<casa::Float> &spectrum; // a buffer for the spectrum
54 const casa::Vector<casa::Bool> &mask; // associated mask
55 const std::pair<int,int> &edge; // start and stop+1 channels
56 // to work with
57
58 // statistics for running mean filtering
59 casa::Float sum; // sum of fluxes
60 casa::Float sumsq; // sum of squares of fluxes
61 int box_chan_cntr; // actual number of channels in the box
62 int max_box_nchan; // maximum allowed number of channels in the box
63 // (calculated from boxsize and actual spectrum size)
64
65 // temporary line edge channels and flag, which is True if the line
66 // was detected in the previous channels.
67 std::pair<int,int> cur_line;
68 casa::Bool is_detected_before;
69 int min_nchan; // A minimum number of consequtive
70 // channels, which should satisfy
71 // the detection criterion, to be
72 // a detection
73 casa::Float threshold; // detection threshold - the
74 // minimal signal to noise ratio
75public:
76 // set up the object with the references to actual data
77 // as well as the detection criterion (min_nchan and threshold, see above)
78 // and the number of channels in the running box
79 LFRunningMean(const casa::Vector<casa::Float> &in_spectrum,
80 const casa::Vector<casa::Bool> &in_mask,
81 const std::pair<int,int> &in_edge,
82 int in_max_box_nchan,
83 int in_min_nchan = 3,
84 casa::Float in_threshold = 5);
85
86 // replace the detection criterion
87 void setCriterion(int in_min_nchan, casa::Float in_threshold) throw();
88
89 // find spectral lines and add them into list
90 void findLines(std::list<pair<int,int> > &lines) throw(casa::AipsError);
91
92protected:
93 // supplementary function to control running mean calculations.
94 // It adds a specified channel to the running mean box and
95 // removes (ch-maxboxnchan+1)'th channel from there
96 // Channels, for which the mask is false or index is beyond the
97 // allowed range, are ignored
98 void advanceRunningBox(int ch) throw(casa::AipsError);
99
100
101 // test a channel against current running mean & rms
102 // if channel specified is masked out or beyond the allowed indexes,
103 // false is returned
104 casa::Bool testChannel(int ch) const
105 throw(std::exception, casa::AipsError);
106
107 // process a channel: update curline and is_detected before and
108 // add a new line to the list, if necessary using processCurLine()
109 void processChannel(std::list<pair<int,int> > &lines,
110 int ch) throw(casa::AipsError);
111
112 // process the interval of channels stored in curline
113 // if it satisfies the criterion, add this interval as a new line
114 void processCurLine(std::list<pair<int,int> > &lines) throw(casa::AipsError);
115
116};
117} // namespace asap
118
119///////////////////////////////////////////////////////////////////////////////
120//
121// LFRunningMean - a running mean algorithm for line detection
122//
123//
124
125// set up the object with the references to actual data
126// as well as the detection criterion (min_nchan and threshold, see above)
127// and the number of channels in the running box
128LFRunningMean::LFRunningMean(const casa::Vector<casa::Float> &in_spectrum,
129 const casa::Vector<casa::Bool> &in_mask,
130 const std::pair<int,int> &in_edge,
131 int in_max_box_nchan,
132 int in_min_nchan, casa::Float in_threshold) :
133 spectrum(in_spectrum), mask(in_mask), edge(in_edge),
134 max_box_nchan(in_max_box_nchan),
135 min_nchan(in_min_nchan),threshold(in_threshold) {}
136
137// replace the detection criterion
138void LFRunningMean::setCriterion(int in_min_nchan, casa::Float in_threshold)
139 throw()
140{
141 min_nchan=in_min_nchan;
142 threshold=in_threshold;
143}
144
145
146// supplementary function to control running mean calculations.
147// It adds a specified channel to the running mean box and
148// removes (ch-max_box_nchan+1)'th channel from there
149// Channels, for which the mask is false or index is beyond the
150// allowed range, are ignored
151void LFRunningMean::advanceRunningBox(int ch) throw(AipsError)
152{
153 if (ch>=edge.first && ch<edge.second)
154 if (mask[ch]) { // ch is a valid channel
155 ++box_chan_cntr;
156 sum+=spectrum[ch];
157 sumsq+=square(spectrum[ch]);
158 }
159 int ch2remove=ch-max_box_nchan;
160 if (ch2remove>=edge.first && ch2remove<edge.second)
161 if (mask[ch2remove]) { // ch2remove is a valid channel
162 --box_chan_cntr;
163 sum-=spectrum[ch2remove];
164 sumsq-=square(spectrum[ch2remove]);
165 }
166}
167
168// test a channel against current running mean & rms
169// if channel specified is masked out or beyond the allowed indexes,
170// false is returned
171Bool LFRunningMean::testChannel(int ch) const throw(exception, AipsError)
172{
173 if (ch<edge.first || ch>=edge.second) return False;
174 if (!mask[ch]) return False;
175 DebugAssert(box_chan_cntr, AipsError);
176 Float mean=sum/Float(box_chan_cntr);
177 Float variance=sqrt(sumsq/Float(box_chan_cntr)-square(mean));
178 /*
179 if (ch>3900 && ch<4100)
180 cout<<"Tested "<<ch<<" mean="<<mean<<" variance="<<variance<<" sp-mean="<<spectrum[ch]-mean<<endl;
181 */
182 return fabs(spectrum[ch]-mean)>=threshold*variance;
183}
184
185// process a channel: update cur_line and is_detected before and
186// add a new line to the list, if necessary
187void LFRunningMean::processChannel(std::list<pair<int,int> > &lines,
188 int ch) throw(casa::AipsError)
189{
190 try {
191 if (testChannel(ch)) {
192 if (is_detected_before)
193 cur_line.second=ch+1;
194 else {
195 is_detected_before=True;
196 cur_line.first=ch;
197 cur_line.second=ch+1;
198 }
199 } else processCurLine(lines);
200 }
201 catch (const AipsError &ae) {
202 throw;
203 }
204 catch (const exception &ex) {
205 throw AipsError(String("SDLineFinder::processChannel - STL error: ")+ex.what());
206 }
207}
208
209// process the interval of channels stored in cur_line
210// if it satisfies the criterion, add this interval as a new line
211void LFRunningMean::processCurLine(std::list<pair<int,int> > &lines)
212 throw(casa::AipsError)
213{
214 try {
215 if (is_detected_before) {
216 if (cur_line.second-cur_line.first>min_nchan) {
217 // it was a detection. We need to change the list
218 Bool add_new_line=False;
219 if (lines.size()) {
220 for (int i=lines.back().second;i<cur_line.first;++i)
221 if (mask[i]) { // one valid channel in between
222 // means that we deal with a separate line
223 add_new_line=True;
224 break;
225 }
226 } else add_new_line=True;
227 if (add_new_line)
228 lines.push_back(cur_line);
229 else lines.back().second=cur_line.second;
230 }
231 is_detected_before=False;
232 }
233 }
234 catch (const AipsError &ae) {
235 throw;
236 }
237 catch (const exception &ex) {
238 throw AipsError(String("SDLineFinder::processCurLine - STL error: ")+ex.what());
239 }
240}
241
242// find spectral lines and add them into list
243void LFRunningMean::findLines(std::list<pair<int,int> > &lines)
244 throw(casa::AipsError)
245{
246 const int minboxnchan=4;
247
248 // fill statistics for initial box
249 box_chan_cntr=0; // no channels are currently in the box
250 sum=0; // initialize statistics
251 sumsq=0;
252 int initial_box_ch=edge.first;
253 for (;initial_box_ch<edge.second && box_chan_cntr<max_box_nchan;
254 ++initial_box_ch)
255 advanceRunningBox(initial_box_ch);
256
257 if (initial_box_ch==edge.second)
258 throw AipsError("LFRunningMean::findLines - too much channels are masked");
259
260 // actual search algorithm
261 is_detected_before=False;
262
263 if (box_chan_cntr>=minboxnchan)
264 // there is a minimum amount of data. We can search in the
265 // half of the initial box
266 for (int n=edge.first;n<initial_box_ch-max_box_nchan/2;++n)
267 processChannel(lines,n);
268
269 // now the box can be moved. n+max_box_nchan/2 is a new index which haven't
270 // yet been included in the running mean.
271 for (int n=initial_box_ch-max_box_nchan/2;n<edge.second;++n) {
272 advanceRunningBox(n+max_box_nchan/2); // update running mean & variance
273 if (box_chan_cntr>=minboxnchan) // have enough data to process
274 processChannel(lines,n);
275 else processCurLine(lines); // just finish what was accumulated before
276 }
277}
278
279//
280///////////////////////////////////////////////////////////////////////////////
281
282
283// SDLineFinder - a class for automated spectral line search
284
285SDLineFinder::SDLineFinder() throw() : edge(0,0)
286{
287 // detection threshold - the minimal signal to noise ratio
288 threshold=3.; // 3 sigma is a default
289 box_size=1./16.; // default box size for running mean calculations is
290 // 1/16 of the whole spectrum
291 // A minimum number of consequtive channels, which should satisfy
292 // the detection criterion, to be a detection
293 min_nchan=3; // default is 3 channels
294}
295
296SDLineFinder::~SDLineFinder() throw(AipsError) {}
297
298// set scan to work with (in_scan parameter), associated mask (in_mask
299// parameter) and the edge channel rejection (in_edge parameter)
300// if in_edge has zero length, all channels chosen by mask will be used
301// if in_edge has one element only, it represents the number of
302// channels to drop from both sides of the spectrum
303// in_edge is introduced for convinience, although all functionality
304// can be achieved using a spectrum mask only
305void SDLineFinder::setScan(const SDMemTableWrapper &in_scan,
306 const std::vector<bool> &in_mask,
307 const boost::python::tuple &in_edge) throw(AipsError)
308{
309 try {
310 scan=in_scan.getCP();
311 AlwaysAssert(!scan.null(),AipsError);
312 if (scan->nRow()!=1)
313 throw AipsError("SDLineFinder::setScan - in_scan contains more than 1 row."
314 "Choose one first.");
315 mask=in_mask;
316 if (mask.nelements()!=scan->nChan())
317 throw AipsError("SDLineFinder::setScan - in_scan and in_mask have different"
318 "number of spectral channels.");
319
320 // number of elements in the in_edge tuple
321 int n=extract<int>(in_edge.attr("__len__")());
322 if (n>2 || n<0)
323 throw AipsError("SDLineFinder::setScan - the length of the in_edge parameter"
324 "should not exceed 2");
325 if (!n) {
326 // all spectrum, no rejection
327 edge.first=0;
328 edge.second=scan->nChan();
329 } else {
330 edge.first=extract<int>(in_edge.attr("__getitem__")(0));
331 if (edge.first<0)
332 throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
333 "number of channels to drop");
334 if (edge.first>=scan->nChan())
335 throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
336 if (n==2) {
337 edge.second=extract<int>(in_edge.attr("__getitem__")(1));
338 if (edge.second<0)
339 throw AipsError("SDLineFinder::setScan - the in_edge parameter has a negative"
340 "number of channels to drop");
341 edge.second=scan->nChan()-edge.second;
342 } else edge.second=scan->nChan()-edge.first;
343 if (edge.second<0 || (edge.second+edge.first)>scan->nChan())
344 throw AipsError("SDLineFinder::setScan - all channels are rejected by the in_edge parameter");
345 }
346 }
347 catch(const AipsError &ae) {
348 // setScan is unsuccessfull, reset scan/mask/edge
349 scan=CountedConstPtr<SDMemTable>(); // null pointer
350 mask.resize(0);
351 edge=pair<int,int>(0,0);
352 throw;
353 }
354}
355
356// search for spectral lines. Number of lines found is returned
357int SDLineFinder::findLines() throw(casa::AipsError)
358{
359 const int minboxnchan=4;
360 if (scan.null())
361 throw AipsError("SDLineFinder::findLines - a scan should be set first,"
362 " use set_scan");
363 DebugAssert(mask.nelements()==scan->nChan(), AipsError);
364 int max_box_nchan=int(scan->nChan()*box_size); // number of channels in running
365 // box
366 if (max_box_nchan<2)
367 throw AipsError("SDLineFinder::findLines - box_size is too small");
368
369 scan->getSpectrum(spectrum);
370
371 lines.resize(0); // search from the scratch
372 Vector<Bool> temp_mask(mask);
373 size_t cursz;
374 do {
375 cursz=lines.size();
376 // line find algorithm
377 LFRunningMean lfalg(spectrum,temp_mask,edge,max_box_nchan,min_nchan,threshold);
378 lfalg.findLines(lines);
379 temp_mask=getMask();
380 } while (cursz!=lines.size());
381 return int(lines.size());
382}
383
384
385// get the mask to mask out all lines that have been found (default)
386// if invert=true, only channels belong to lines will be unmasked
387// Note: all channels originally masked by the input mask (in_mask
388// in setScan) or dropped out by the edge parameter (in_edge
389// in setScan) are still excluded regardless on the invert option
390std::vector<bool> SDLineFinder::getMask(bool invert)
391 const throw(casa::AipsError)
392{
393 try {
394 if (scan.null())
395 throw AipsError("SDLineFinder::getMask - a scan should be set first,"
396 " use set_scan followed by find_lines");
397 DebugAssert(mask.nelements()==scan->nChan(), AipsError);
398 /*
399 if (!lines.size())
400 throw AipsError("SDLineFinder::getMask - one have to search for "
401 "lines first, use find_lines");
402 */
403 std::vector<bool> res_mask(mask.nelements());
404 // iterator through lines
405 std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
406 for (int ch=0;ch<res_mask.size();++ch)
407 if (ch<edge.first || ch>=edge.second) res_mask[ch]=false;
408 else if (!mask[ch]) res_mask[ch]=false;
409 else {
410 res_mask[ch]=!invert; // no line by default
411 if (cli==lines.end()) continue;
412 if (ch>=cli->first && ch<cli->second)
413 res_mask[ch]=invert; // this is a line
414 if (ch>=cli->second)
415 ++cli; // next line in the list
416 }
417
418 return res_mask;
419 }
420 catch (const AipsError &ae) {
421 throw;
422 }
423 catch (const exception &ex) {
424 throw AipsError(String("SDLineFinder::getMask - STL error: ")+ex.what());
425 }
426}
427
428// get range for all lines found. If defunits is true (default), the
429// same units as used in the scan will be returned (e.g. velocity
430// instead of channels). If defunits is false, channels will be returned
431std::vector<int> SDLineFinder::getLineRanges(bool defunits)
432 const throw(casa::AipsError)
433{
434 try {
435 if (scan.null())
436 throw AipsError("SDLineFinder::getLineRanges - a scan should be set first,"
437 " use set_scan followed by find_lines");
438 DebugAssert(mask.nelements()==scan->nChan(), AipsError);
439
440 if (!lines.size())
441 throw AipsError("SDLineFinder::getLineRanges - one have to search for "
442 "lines first, use find_lines");
443
444 // temporary
445 if (defunits)
446 throw AipsError("SDLineFinder::getLineRanges - sorry, defunits=true have not "
447 "yet been implemented");
448 //
449 std::vector<int> res(2*lines.size());
450 // iterator through lines & result
451 std::list<std::pair<int,int> >::const_iterator cli=lines.begin();
452 std::vector<int>::iterator ri=res.begin();
453 for (;cli!=lines.end() && ri!=res.end();++cli,++ri) {
454 *ri=cli->first;
455 if (++ri!=res.end())
456 *ri=cli->second-1;
457 }
458 return res;
459 }
460 catch (const AipsError &ae) {
461 throw;
462 }
463 catch (const exception &ex) {
464 throw AipsError(String("SDLineFinder::getLineRanges - STL error: ")+ex.what());
465 }
466}
467
468// concatenate two lists preserving the order. If two lines appear to
469// be adjacent, they are joined into the new one
470void SDLineFinder::addNewSearchResult(const std::list<pair<int, int> > &newlines)
471 throw(AipsError)
472{
473 try {
474 for (std::list<pair<int,int> >::const_iterator cli=newlines.begin();
475 cli!=newlines.end();++cli) {
476 // search for a right place for the new line
477 //TODO
478 }
479 }
480 catch (const AipsError &ae) {
481 throw;
482 }
483 catch (const exception &ex) {
484 throw AipsError(String("SDLineFinder::addNewSearchResult - STL error: ")+ex.what());
485 }
486
487}
Note: See TracBrowser for help on using the repository browser.