Changeset 1757 for branches/alma/src/STLineFinder.cpp
- Timestamp:
- 06/09/10 19:03:06 (14 years ago)
- Location:
- branches/alma
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/alma
-
Property
svn:ignore
set to
.sconf_temp
.sconsign.dblite
-
Property
svn:mergeinfo
set to
/branches/asap-3.x merged eligible
-
Property
svn:ignore
set to
-
branches/alma/src/STLineFinder.cpp
r1603 r1757 34 34 #include "STLineFinder.h" 35 35 #include "STFitter.h" 36 #include "IndexedCompare.h" 36 37 37 38 // STL … … 110 111 111 112 protected: 112 // supplementary function to control running mean calculations.113 // It adds a specified channel to the running meanbox and113 // supplementary function to control running mean/median calculations. 114 // It adds a specified channel to the running box and 114 115 // removes (ch-maxboxnchan+1)'th channel from there 115 116 // Channels, for which the mask is false or index is beyond the … … 152 153 // last point of the detected line 153 154 // 155 bool itsUseMedian; // true if median statistics is used 156 // to determine the noise level, otherwise 157 // it is the mean of the lowest 80% of deviations 158 // (default) 159 int itsNoiseSampleSize; // sample size used to estimate the noise statistics 160 // Negative value means the whole spectrum is used (default) 154 161 public: 155 162 … … 157 164 LFAboveThreshold(std::list<pair<int,int> > &in_lines, 158 165 int in_min_nchan = 3, 159 casa::Float in_threshold = 5) throw(); 166 casa::Float in_threshold = 5, 167 bool use_median = false, 168 int noise_sample_size = -1) throw(); 160 169 virtual ~LFAboveThreshold() throw(); 161 170 … … 200 209 /////////////////////////////////////////////////////////////////////////////// 201 210 211 /////////////////////////////////////////////////////////////////////////////// 212 // 213 // LFNoiseEstimator a helper class designed to estimate off-line variance 214 // using statistics depending on the distribution of 215 // values (e.g. like a median) 216 // 217 // Two statistics are supported: median and an average of 218 // 80% of smallest values. 219 // 220 221 struct LFNoiseEstimator { 222 // construct an object 223 // size - maximum sample size. After a size number of elements is processed 224 // any new samples would cause the algorithm to drop the oldest samples in the 225 // buffer. 226 explicit LFNoiseEstimator(size_t size); 227 228 // add a new sample 229 // in - the new value 230 void add(float in); 231 232 // median of the distribution 233 float median() const; 234 235 // mean of lowest 80% of the samples 236 float meanLowest80Percent() const; 237 238 // return true if the buffer is full (i.e. statistics are representative) 239 inline bool filledToCapacity() const { return itsBufferFull;} 240 241 protected: 242 // update cache of sorted indices 243 // (it is assumed that itsSampleNumber points to the newly 244 // replaced element) 245 void updateSortedCache() const; 246 247 // build sorted cache from the scratch 248 void buildSortedCache() const; 249 250 // number of samples accumulated so far 251 // (can be less than the buffer size) 252 size_t numberOfSamples() const; 253 254 // this helper method builds the cache if 255 // necessary using one of the methods 256 void fillCacheIfNecessary() const; 257 258 private: 259 // buffer with samples (unsorted) 260 std::vector<float> itsVariances; 261 // current sample number (<=itsVariances.size()) 262 size_t itsSampleNumber; 263 // true, if the buffer all values in the sample buffer are used 264 bool itsBufferFull; 265 // cached indices into vector of samples 266 mutable std::vector<size_t> itsSortedIndices; 267 // true if any of the statistics have been obtained at least 268 // once. This flag allows to implement a more efficient way of 269 // calculating statistics, if they are needed at once and not 270 // after each addition of a new element 271 mutable bool itsStatisticsAccessed; 272 }; 273 274 // 275 /////////////////////////////////////////////////////////////////////////////// 276 277 202 278 } // namespace asap 279 280 /////////////////////////////////////////////////////////////////////////////// 281 // 282 // LFNoiseEstimator a helper class designed to estimate off-line variance 283 // using statistics depending on the distribution of 284 // values (e.g. like a median) 285 // 286 // Two statistics are supported: median and an average of 287 // 80% of smallest values. 288 // 289 290 // construct an object 291 // size - maximum sample size. After a size number of elements is processed 292 // any new samples would cause the algorithm to drop the oldest samples in the 293 // buffer. 294 LFNoiseEstimator::LFNoiseEstimator(size_t size) : itsVariances(size), 295 itsSampleNumber(0), itsBufferFull(false), itsSortedIndices(size), 296 itsStatisticsAccessed(false) 297 { 298 AlwaysAssert(size>0,AipsError); 299 } 300 301 302 // add a new sample 303 // in - the new value 304 void LFNoiseEstimator::add(float in) 305 { 306 if (isnan(in)) { 307 // normally it shouldn't happen 308 return; 309 } 310 itsVariances[itsSampleNumber] = in; 311 312 if (itsStatisticsAccessed) { 313 // only do element by element addition if on-the-fly 314 // statistics are needed 315 updateSortedCache(); 316 } 317 318 // advance itsSampleNumber now 319 ++itsSampleNumber; 320 if (itsSampleNumber == itsVariances.size()) { 321 itsSampleNumber = 0; 322 itsBufferFull = true; 323 } 324 AlwaysAssert(itsSampleNumber<itsVariances.size(),AipsError); 325 } 326 327 // number of samples accumulated so far 328 // (can be less than the buffer size) 329 size_t LFNoiseEstimator::numberOfSamples() const 330 { 331 // the number of samples accumulated so far may be less than the 332 // buffer size 333 const size_t nSamples = itsBufferFull ? itsVariances.size(): itsSampleNumber; 334 AlwaysAssert( (nSamples > 0) && (nSamples <= itsVariances.size()), AipsError); 335 return nSamples; 336 } 337 338 // this helper method builds the cache if 339 // necessary using one of the methods 340 void LFNoiseEstimator::fillCacheIfNecessary() const 341 { 342 if (!itsStatisticsAccessed) { 343 if ((itsSampleNumber!=0) || itsBufferFull) { 344 // build the whole cache efficiently 345 buildSortedCache(); 346 } else { 347 updateSortedCache(); 348 } 349 itsStatisticsAccessed = true; 350 } // otherwise, it is updated in 'add' using on-the-fly method 351 } 352 353 // median of the distribution 354 float LFNoiseEstimator::median() const 355 { 356 fillCacheIfNecessary(); 357 // the number of samples accumulated so far may be less than the 358 // buffer size 359 const size_t nSamples = numberOfSamples(); 360 const size_t medSample = nSamples / 2; 361 AlwaysAssert(medSample < itsSortedIndices.size(), AipsError); 362 return itsVariances[itsSortedIndices[medSample]]; 363 } 364 365 // mean of lowest 80% of the samples 366 float LFNoiseEstimator::meanLowest80Percent() const 367 { 368 fillCacheIfNecessary(); 369 // the number of samples accumulated so far may be less than the 370 // buffer size 371 const size_t nSamples = numberOfSamples(); 372 float result = 0; 373 size_t numpt=size_t(0.8*nSamples); 374 if (!numpt) { 375 numpt=nSamples; // no much else left, 376 // although it is very inaccurate 377 } 378 AlwaysAssert( (numpt > 0) && (numpt<itsSortedIndices.size()), AipsError); 379 for (size_t ch=0; ch<numpt; ++ch) { 380 result += itsVariances[itsSortedIndices[ch]]; 381 } 382 result /= float(numpt); 383 return result; 384 } 385 386 // update cache of sorted indices 387 // (it is assumed that itsSampleNumber points to the newly 388 // replaced element) 389 void LFNoiseEstimator::updateSortedCache() const 390 { 391 // the number of samples accumulated so far may be less than the 392 // buffer size 393 const size_t nSamples = numberOfSamples(); 394 395 if (itsBufferFull) { 396 // first find the index of the element which is being replaced 397 size_t index = nSamples; 398 for (size_t i=0; i<nSamples; ++i) { 399 AlwaysAssert(i < itsSortedIndices.size(), AipsError); 400 if (itsSortedIndices[i] == itsSampleNumber) { 401 index = i; 402 break; 403 } 404 } 405 AlwaysAssert( index < nSamples, AipsError); 406 407 const vector<size_t>::iterator indStart = itsSortedIndices.begin(); 408 // merge this element with preceeding block first 409 if (index != 0) { 410 // merge indices on the basis of variances 411 inplace_merge(indStart,indStart+index,indStart+index+1, 412 indexedCompare<size_t>(itsVariances.begin())); 413 } 414 // merge with the following block 415 if (index + 1 != nSamples) { 416 // merge indices on the basis of variances 417 inplace_merge(indStart,indStart+index+1,indStart+nSamples, 418 indexedCompare<size_t>(itsVariances.begin())); 419 } 420 } else { 421 // itsSampleNumber is the index of the new element 422 AlwaysAssert(itsSampleNumber < itsSortedIndices.size(), AipsError); 423 itsSortedIndices[itsSampleNumber] = itsSampleNumber; 424 if (itsSampleNumber >= 1) { 425 // we have to place this new sample in 426 const vector<size_t>::iterator indStart = itsSortedIndices.begin(); 427 // merge indices on the basis of variances 428 inplace_merge(indStart,indStart+itsSampleNumber,indStart+itsSampleNumber+1, 429 indexedCompare<size_t>(itsVariances.begin())); 430 } 431 } 432 } 433 434 // build sorted cache from the scratch 435 void LFNoiseEstimator::buildSortedCache() const 436 { 437 // the number of samples accumulated so far may be less than the 438 // buffer size 439 const size_t nSamples = numberOfSamples(); 440 AlwaysAssert(nSamples <= itsSortedIndices.size(), AipsError); 441 for (size_t i=0; i<nSamples; ++i) { 442 itsSortedIndices[i]=i; 443 } 444 445 // sort indices, but check the array of variances 446 const vector<size_t>::iterator indStart = itsSortedIndices.begin(); 447 stable_sort(indStart,indStart+nSamples, indexedCompare<size_t>(itsVariances.begin())); 448 } 449 450 // 451 /////////////////////////////////////////////////////////////////////////////// 203 452 204 453 /////////////////////////////////////////////////////////////////////////////// … … 275 524 } 276 525 277 // supplementary function to control running mean calculations.278 // It adds a specified channel to the running meanbox and526 // supplementary function to control running mean/median calculations. 527 // It adds a specified channel to the running box and 279 528 // removes (ch-max_box_nchan+1)'th channel from there 280 529 // Channels, for which the mask is false or index is beyond the … … 339 588 (meanch2-square(meanch)); 340 589 linmean=coeff*(Float(cur_channel)-meanch)+mean; 341 linvariance=sqrt(sumf2/Float(box_chan_cntr)-square(mean)- 342 square(coeff)*(meanch2-square(meanch))); 590 linvariance=sumf2/Float(box_chan_cntr)-square(mean)- 591 square(coeff)*(meanch2-square(meanch)); 592 if (linvariance<0.) { 593 // this shouldn't happen normally, but could be due to round-off error 594 linvariance = 0; 595 } else { 596 linvariance = sqrt(linvariance); 597 } 343 598 } 344 599 need2recalculate=False; … … 351 606 /////////////////////////////////////////////////////////////////////////////// 352 607 // 353 // LFAboveThreshold - a running mean algorithm for line detection608 // LFAboveThreshold - a running mean/median algorithm for line detection 354 609 // 355 610 // … … 359 614 LFAboveThreshold::LFAboveThreshold(std::list<pair<int,int> > &in_lines, 360 615 int in_min_nchan, 361 casa::Float in_threshold) throw() : 616 casa::Float in_threshold, 617 bool use_median, 618 int noise_sample_size) throw() : 362 619 min_nchan(in_min_nchan), threshold(in_threshold), 363 lines(in_lines), running_box(NULL) {} 620 lines(in_lines), running_box(NULL), itsUseMedian(use_median), 621 itsNoiseSampleSize(noise_sample_size) {} 364 622 365 623 LFAboveThreshold::~LFAboveThreshold() throw() … … 474 732 running_box=new RunningBox(spectrum,mask,edge,max_box_nchan); 475 733 476 477 734 // determine the off-line variance first 478 735 // an assumption made: lines occupy a small part of the spectrum 479 736 480 std::vector<float> variances(edge.second-edge.first); 481 DebugAssert(variances.size(),AipsError); 482 483 for (;running_box->haveMore();running_box->next()) 484 variances[running_box->getChannel()-edge.first]= 485 running_box->getLinVariance(); 486 487 // in the future we probably should do a proper Chi^2 estimation 488 // now a simple 80% of smaller values will be used. 489 // it may degrade the performance of the algorithm for weak lines 490 // due to a bias of the Chi^2 distribution. 491 stable_sort(variances.begin(),variances.end()); 492 493 Float offline_variance=0; 494 uInt offline_cnt=uInt(0.8*variances.size()); 495 if (!offline_cnt) offline_cnt=variances.size(); // no much else left, 496 // although it is very inaccurate 497 for (uInt n=0;n<offline_cnt;++n) 498 offline_variance+=variances[n]; 499 offline_variance/=Float(offline_cnt); 737 const size_t noiseSampleSize = itsNoiseSampleSize<0 ? size_t(edge.second-edge.first) : 738 std::min(size_t(itsNoiseSampleSize), size_t(edge.second-edge.first)); 739 DebugAssert(noiseSampleSize,AipsError); 740 const bool globalNoise = (size_t(edge.second - edge.first) == noiseSampleSize); 741 LFNoiseEstimator ne(noiseSampleSize); 742 743 for (;running_box->haveMore();running_box->next()) { 744 ne.add(running_box->getLinVariance()); 745 if (ne.filledToCapacity()) { 746 break; 747 } 748 } 749 750 Float offline_variance = -1; // just a flag that it is unset 751 752 if (globalNoise) { 753 offline_variance = itsUseMedian ? ne.median() : ne.meanLowest80Percent(); 754 } 500 755 501 756 // actual search algorithm … … 510 765 running_box->next()) { 511 766 const int ch=running_box->getChannel(); 512 if (running_box->getNumberOfBoxPoints()>=minboxnchan) 767 if (!globalNoise) { 768 // add a next point for a local noise estimate 769 ne.add(running_box->getLinVariance()); 770 } 771 if (running_box->getNumberOfBoxPoints()>=minboxnchan) { 772 if (!globalNoise) { 773 offline_variance = itsUseMedian ? ne.median() : ne.meanLowest80Percent(); 774 } 775 AlwaysAssert(offline_variance>0.,AipsError); 513 776 processChannel(mask[ch] && (fabs(running_box->aboveMean()) >= 514 777 threshold*offline_variance), mask); 515 else processCurLine(mask); // just finish what was accumulated before778 } else processCurLine(mask); // just finish what was accumulated before 516 779 517 780 signs[ch]=getAboveMeanSign(); 518 // os<<ch<<" "<<spectrum[ch]<<" "<<fabs(running_box->aboveMean())<<" "<< 519 // threshold*offline_variance<<endl; 520 521 const Float buf=running_box->aboveMean(); 522 if (buf>0) signs[ch]=1; 523 else if (buf<0) signs[ch]=-1; 524 else if (buf==0) signs[ch]=0; 525 //os<<ch<<" "<<spectrum[ch]<<" "<<running_box->getLinMean()<<" "<< 526 // threshold*offline_variance<<endl; 781 //os<<ch<<" "<<spectrum[ch]<<" "<<fabs(running_box->aboveMean())<<" "<< 782 //threshold*offline_variance<<endl; 527 783 } 528 784 if (lines.size()) … … 649 905 // Setting a very large value doesn't usually provide 650 906 // valid detections. 651 // in_box_size the box size for running mean calculation. Default is907 // in_box_size the box size for running mean/median calculation. Default is 652 908 // 1./5. of the whole spectrum size 909 // in_noise_box the box size for off-line noise estimation (if working with 910 // local noise. Negative value means use global noise estimate 911 // Default is -1 (i.e. estimate using the whole spectrum) 912 // in_median true if median statistics is used as opposed to average of 913 // the lowest 80% of deviations (default) 653 914 void STLineFinder::setOptions(const casa::Float &in_threshold, 654 915 const casa::Int &in_min_nchan, 655 916 const casa::Int &in_avg_limit, 656 const casa::Float &in_box_size) throw() 917 const casa::Float &in_box_size, 918 const casa::Float &in_noise_box, 919 const casa::Bool &in_median) throw() 657 920 { 658 921 threshold=in_threshold; … … 660 923 avg_limit=in_avg_limit; 661 924 box_size=in_box_size; 925 itsNoiseBox = in_noise_box; 926 itsUseMedian = in_median; 662 927 } 663 928 … … 683 948 const casa::uInt &whichRow) throw(casa::AipsError) 684 949 { 685 //const int minboxnchan=4;686 950 if (scan.null()) 687 951 throw AipsError("STLineFinder::findLines - a scan should be set first," … … 700 964 throw AipsError("STLineFinder::findLines - in_scan and in_mask have different" 701 965 "number of spectral channels."); 966 967 // taking flagged channels into account 968 vector<bool> flaggedChannels = scan->getMask(whichRow); 969 if (flaggedChannels.size()) { 970 // there is a mask set for this row 971 if (flaggedChannels.size() != mask.nelements()) { 972 throw AipsError("STLineFinder::findLines - internal inconsistency: number of mask elements do not match the number of channels"); 973 } 974 for (size_t ch = 0; ch<mask.nelements(); ++ch) { 975 mask[ch] &= flaggedChannels[ch]; 976 } 977 } 978 702 979 // number of elements in in_edge 703 980 if (in_edge.size()>2) … … 732 1009 throw AipsError("STLineFinder::findLines - box_size is too small"); 733 1010 1011 // number of elements in the sample for noise estimate 1012 const int noise_box = itsNoiseBox<0 ? -1 : int(nchan * itsNoiseBox); 1013 1014 if ((noise_box!= -1) and (noise_box<2)) 1015 throw AipsError("STLineFinder::findLines - noise_box is supposed to be at least 2 elements"); 1016 734 1017 spectrum.resize(); 735 1018 spectrum = Vector<Float>(scan->getSpectrum(whichRow)); … … 755 1038 try { 756 1039 // line find algorithm 757 LFAboveThreshold lfalg(new_lines,avg_factor*min_nchan, threshold );1040 LFAboveThreshold lfalg(new_lines,avg_factor*min_nchan, threshold, itsUseMedian,noise_box); 758 1041 lfalg.findLines(spectrum,temp_mask,edge,max_box_nchan); 759 1042 signs.resize(lfalg.getSigns().nelements());
Note: See TracChangeset
for help on using the changeset viewer.