source: branches/alma/python/simplelinefinder.py @ 1757

Last change on this file since 1757 was 1757, checked in by Kana Sugimoto, 14 years ago

New Development: Yes

JIRA Issue: Yes (CAS-2211)

Ready for Test: Yes

Interface Changes: Yes

What Interface Changed: ASAP 3.0.0 interface changes

Test Programs:

Put in Release Notes: Yes

Module(s): all the CASA sd tools and tasks are affected.

Description: Merged ATNF-ASAP 3.0.0 developments to CASA (alma) branch.

Note you also need to update casa/code/atnf.


File size: 13.7 KB
Line 
1import math
2from asap import asaplog
3
4class StatCalculator:
5   def __init__(self):
6       self.s=0.
7       self.s2=0.
8       self.cnt=0
9   
10   def mean(self):
11       if self.cnt<=0:
12          raise RuntimeError, "At least one data point has to be defined"
13       return self.s/float(self.cnt)
14   
15   def variance(self):
16       if self.cnt<=1:
17          raise RuntimeError, "At least two data points has to be defined"
18       return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
19   
20   def rms(self):
21       """
22          return rms of the accumulated sample
23       """
24       if self.cnt<=0:
25          raise RuntimeError, "At least one data point has to be defined"
26       return math.sqrt(self.s2/self.cnt)
27 
28   def add(self, pt):
29       self.s = self.s + pt
30       self.s2 = self.s2 + pt*pt
31       self.cnt = self.cnt + 1
32
33
34class simplelinefinder:
35   '''
36       A simplified class to search for spectral features. The algorithm assumes that the bandpass
37       is taken out perfectly and no spectral channels are flagged (except some edge channels).
38       It works with a list or tuple rather than a scantable and returns the channel pairs.
39       There is an optional feature to attempt to split the detected lines into components, although
40       it should be used with caution. This class is largely intended to be used with scripts.
41       
42       The fully featured version of the algorithm working with scantables is called linefinder.
43   '''
44   
45   def __init__(self):
46      '''
47         Initialize the class.
48      '''
49      self._median = None
50      self._rms = None
51   
52   def writeLog(self, str):
53      """
54         Write user defined string into log file
55      """
56      asaplog.push(str)
57
58   def invertChannelSelection(self, nchan, chans, edge = (0,0)):
59      """
60         This method converts a tuple with channel ranges to a tuple which covers all channels
61         not selected by the original tuple (optionally edge channels can be discarded)
62 
63         nchan - number of channels in the spectrum.
64         chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
65         edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
66
67         return:  a tuple with inverted channel selection
68         Note, at this stage channel ranges are assumed to be sorted and without overlap
69      """
70      if nchan<=1:
71         raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan     
72      if len(chans)%2!=0:
73         raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
74      tempedge = edge
75      if not isinstance(tempedge,tuple):
76         tempedge = (edge,edge)
77      if len(tempedge)!=2:
78         raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
79      if tempedge[0]<0 or tempedge[1]<0:
80         raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
81      startchan = tempedge[0]
82      stopchan = nchan - tempedge[1]
83      if stopchan-startchan<0:
84         raise RuntimeError, "Edge rejection rejected all channels"
85      ranges = []
86      curstart = startchan
87      for i in range(0,len(chans),2):
88          if chans[i+1]<curstart:
89             continue
90          elif chans[i]<=curstart:
91             curstart = chans[i+1]+1
92          else:
93             ranges.append(curstart)
94             ranges.append(chans[i]-1)
95             curstart = chans[i+1]+1
96      if curstart<stopchan:
97         ranges.append(curstart)
98         ranges.append(stopchan-1)
99      return tuple(ranges)
100
101   def channelRange(self, spc, vel_range):
102      """
103         A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
104         allows to convert supplied velocity range into the channel range.
105
106         spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
107         vel_range - a 2-element tuple with start and stop velocity of the range
108       
109         return: a 2-element tuple with channels
110         Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
111      """
112      if len(spc) != 2:
113         raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
114      if len(spc[0]) != len(spc[1]):
115         raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
116      if len(vel_range) != 2:
117         raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
118      if vel_range[0]>=vel_range[1]:
119         raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
120      if len(spc[0])<=2:
121         raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
122      chans = list(vel_range)
123      for j in range(len(chans)):
124          chans[j] = -1
125      for i in range(len(spc[0])):
126          if i!=0:
127             prev_vel = spc[0][i-1]
128          else:
129             prev_vel = spc[0][i+1]
130          delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
131          for j in range(len(vel_range)):
132              if abs(vel_range[j]-spc[0][i])<delta:
133                  chans[j] = i
134      if chans[0] == chans[1]:
135         return ()
136      if chans[1] == -1:
137         chans[1] = len(spc[0])-1
138      if chans[0] == -1:
139         chans[0] = 0
140      if chans[0]<=chans[1]:
141         return tuple(chans)
142      return (chans[1],chans[0])
143
144   def _absvalComp(self,x,y):
145      """
146         A helper method to compare x and y by absolute value (to do sorting)
147         
148         x - first value
149         y - second value
150         
151         return -1,0 or 1 depending on the result of comparison
152      """
153      if abs(x)<abs(y):
154         return -1
155      elif abs(x)>abs(y):
156         return 1
157      else:
158         return 0
159         
160   def rms(self):
161      """
162         Return rms scatter of the spectral points (with respect to the median) calculated
163         during last find_lines call. Note, this method throws an exception if
164         find_lines has never been called.
165      """
166      if self._rms==None:
167         raise RuntimeError, "call find_lines before using the rms method"
168      return self._rms
169   
170   def median(self):
171      """
172         Return the median of the last spectrum passed to find_lines.
173         Note, this method throws an exception if find_lines has never been called.
174      """
175      if self._median==None:
176         raise RuntimeError, "call find_lines before using the median method"
177      return self._median
178   
179   def _mergeIntervals(self, lines, spc):
180      """
181         A helper method to merge intervals.
182         
183         lines - list of tuples with first and last channels of all intervals
184         spc - spectrum (to be able to test whether adjacent intervals have the
185               same sign.
186      """
187      toberemoved = []
188      for i in range(1,len(lines)):
189          if lines[i-1][1]+1>=lines[i][0]:
190             if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
191                 lines[i] = (lines[i-1][0],lines[i][1])
192                 toberemoved.append(i-1)
193      toberemoved.sort()
194      for i in range(len(toberemoved)-1,-1,-1):
195          if toberemoved[i]>=len(lines):
196             raise RuntimeError, "this shouldn't have happened!"
197          lines.pop(toberemoved[i])
198         
199   def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
200      """
201         A helper method used in the spectral line detection routine. It splits given intervals
202         into a number of "spectral lines". Each line is characterised by a single extremum.
203         Noise is dealt with by taking into account only those extrema, where a difference with
204         respect to surrounding spectral points exceeds threshold times rms (stored inside this
205         class, so the main body of the line detection should be executed first) and there are
206         at least minchan such points.     
207      """
208      if minchan<1:
209                 raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
210      newlines = []
211      for line in lines:
212          if line[1]-line[0]+1 <= minchan:
213             newlines.append(line)
214          wasIncreasing = None
215          derivSignReversals = []
216          for ch in range(line[0]+1,line[1]+1):
217              diff=spc[ch]-spc[ch-1]
218              isIncreasing = (diff>0)
219              if wasIncreasing != None:
220                 if isIncreasing != wasIncreasing:
221                    derivSignReversals.append((ch,isIncreasing))
222              wasIncreasing = isIncreasing
223          if len(derivSignReversals)==0:
224             newlines.append(line)
225          elif len(derivSignReversals)%2 != 1:
226             self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
227             newlines.append(line)
228          elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
229             self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
230             newlines.append(line)
231          else:
232             startchan = line[0]
233             for i in range(len(derivSignReversals)):
234                if i%2 == 1:
235                   newlines.append((startchan,derivSignReversals[i][0]-1))
236                   startchan = derivSignReversals[i][0]
237             newlines.append((startchan,line[1]))
238      return newlines
239                 
240   def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
241      """
242         A simple spectral line detection routine, which assumes that bandpass has been
243         taken out perfectly and no channels are flagged within the spectrum. A detection
244         is reported if consequtive minchan number of channels is consistently above or
245         below the median value. The threshold is given in terms of the rms calculated
246         using 80% of the lowest data points by the absolute value (with respect to median)
247         
248         spc - a list or tuple with the spectrum, no default
249         threshold - detection threshold, default is 3 sigma, see above for the definition
250         edge - if non-zero, this number of spectral channels will be rejected at the edge.
251                Default is not to do any rejection.
252         minchan -  minimum number of consequitive channels exceeding threshold to claim the
253                    detection, default is 3.
254         tailsearch - if True (default), the algorithm attempts to widen each line until
255                    its flux crosses the median. It merges lines if necessary. Set this
256                    option off if you need to split the lines according to some criterion                   
257         splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
258                    a number of spectral lines (just one local extremum). The default action is
259                    not to do it (may give an adverse results if the noise is high)
260         
261         This method returns a list of tuples each containing start and stop 0-based channel
262         number of every detected line. Empty list if nothing has been detected.
263     
264         Note. The median and rms about this median is stored inside this class and can
265         be obtained with rms and median methods.
266      """
267      if edge<0:
268         raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
269      if 2*edge>=len(spc):
270         raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
271      if threshold<=0:
272         raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold     
273      if minchan<=0:
274         raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
275     
276      # temporary storage to get statistics, apply edge rejection here
277      tmpspc = spc[edge:len(spc)-edge+1]
278      if len(tmpspc)<2:
279         raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."       
280      tmpspc.sort()
281      self._median=tmpspc[len(tmpspc)/2]
282      # work with offsets from the median and sort by absolute values
283      for i in range(len(tmpspc)):
284          tmpspc[i]-=self._median
285      tmpspc.sort(cmp=self._absvalComp)
286      sc = StatCalculator()
287      for i in tmpspc[:-int(0.2*len(tmpspc))]:
288          sc.add(i)         
289      self._rms=sc.rms()
290     
291      self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
292      self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))     
293     
294      #actual line detection
295      lines=[]
296      wasAbove = None
297      nchan = 0
298      startchan=None
299      for i in range(edge,len(spc)-edge):
300          if abs(spc[i]-self._median)>threshold*self._rms:
301             isAbove=(spc[i] > self._median)
302             if nchan!=0:
303                if wasAbove == isAbove:
304                   nchan+=1
305                else:
306                   if nchan>=minchan:
307                      lines.append((startchan,i-1))
308                   nchan=1
309                   wasAbove = isAbove
310                   startchan = i
311             else:
312                nchan=1
313                wasAbove = isAbove
314                startchan = i
315          else:
316             if nchan>=minchan:
317                lines.append((startchan,i-1))
318             nchan = 0
319      if nchan>=minchan:
320         lines.append((startchan,len(spc)-edge-1))
321     
322      if tailsearch:
323         for i in range(len(lines)):
324             wasAbove = None
325             curRange = list(lines[i])
326             for x in range(curRange[0],edge,-1):
327                 isAbove=(spc[x] > self._median)             
328                 if wasAbove == None:
329                    wasAbove = isAbove
330                 if isAbove!=wasAbove:
331                    curRange[0]=x+1
332                    break
333             for x in range(curRange[1],len(spc)-edge):
334                 isAbove=(spc[x] > self._median)             
335                 if isAbove!=wasAbove:
336                    curRange[1]=x-1
337                    break
338             lines[i]=tuple(curRange)
339         self._mergeIntervals(lines,spc)                           
340      if splitFeatures:
341         return self._splitIntervals(lines,spc,threshold,minchan)
342      return lines         
Note: See TracBrowser for help on using the repository browser.