source: trunk/python/simplelinefinder.py

Last change on this file was 1826, checked in by Malte Marquarding, 14 years ago

Tidy up of imports (now imported from asap.). Also fixed some whitespace/tab issues

File size: 13.5 KB
RevLine 
[1645]1import math
[1826]2from asap.logging import asaplog
[1645]3
4class StatCalculator:
5   def __init__(self):
6       self.s=0.
7       self.s2=0.
8       self.cnt=0
[1826]9
[1645]10   def mean(self):
11       if self.cnt<=0:
12          raise RuntimeError, "At least one data point has to be defined"
13       return self.s/float(self.cnt)
[1826]14
[1645]15   def variance(self):
16       if self.cnt<=1:
17          raise RuntimeError, "At least two data points has to be defined"
18       return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
[1826]19
[1645]20   def rms(self):
21       """
22          return rms of the accumulated sample
23       """
24       if self.cnt<=0:
25          raise RuntimeError, "At least one data point has to be defined"
26       return math.sqrt(self.s2/self.cnt)
[1826]27
[1645]28   def add(self, pt):
29       self.s = self.s + pt
30       self.s2 = self.s2 + pt*pt
31       self.cnt = self.cnt + 1
32
33
34class simplelinefinder:
35   '''
36       A simplified class to search for spectral features. The algorithm assumes that the bandpass
37       is taken out perfectly and no spectral channels are flagged (except some edge channels).
[1826]38       It works with a list or tuple rather than a scantable and returns the channel pairs.
[1645]39       There is an optional feature to attempt to split the detected lines into components, although
[1826]40       it should be used with caution. This class is largely intended to be used with scripts.
41
[1645]42       The fully featured version of the algorithm working with scantables is called linefinder.
43   '''
[1826]44
[1645]45   def __init__(self):
46      '''
[1826]47         Initialize the class.
[1645]48      '''
49      self._median = None
50      self._rms = None
[1826]51
[1645]52   def writeLog(self, str):
53      """
54         Write user defined string into log file
[1826]55      """
[1645]56      asaplog.push(str)
57
58   def invertChannelSelection(self, nchan, chans, edge = (0,0)):
59      """
[1826]60         This method converts a tuple with channel ranges to a tuple which covers all channels
[1645]61         not selected by the original tuple (optionally edge channels can be discarded)
[1826]62
63         nchan - number of channels in the spectrum.
[1645]64         chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
65         edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
66
67         return:  a tuple with inverted channel selection
68         Note, at this stage channel ranges are assumed to be sorted and without overlap
69      """
70      if nchan<=1:
[1826]71         raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan
[1645]72      if len(chans)%2!=0:
73         raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
74      tempedge = edge
75      if not isinstance(tempedge,tuple):
76         tempedge = (edge,edge)
77      if len(tempedge)!=2:
78         raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
79      if tempedge[0]<0 or tempedge[1]<0:
80         raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
81      startchan = tempedge[0]
82      stopchan = nchan - tempedge[1]
83      if stopchan-startchan<0:
84         raise RuntimeError, "Edge rejection rejected all channels"
85      ranges = []
86      curstart = startchan
87      for i in range(0,len(chans),2):
88          if chans[i+1]<curstart:
89             continue
90          elif chans[i]<=curstart:
91             curstart = chans[i+1]+1
92          else:
93             ranges.append(curstart)
94             ranges.append(chans[i]-1)
95             curstart = chans[i+1]+1
96      if curstart<stopchan:
97         ranges.append(curstart)
98         ranges.append(stopchan-1)
99      return tuple(ranges)
100
101   def channelRange(self, spc, vel_range):
102      """
103         A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
104         allows to convert supplied velocity range into the channel range.
105
106         spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
107         vel_range - a 2-element tuple with start and stop velocity of the range
[1826]108
[1645]109         return: a 2-element tuple with channels
110         Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
111      """
112      if len(spc) != 2:
113         raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
114      if len(spc[0]) != len(spc[1]):
115         raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
116      if len(vel_range) != 2:
117         raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
118      if vel_range[0]>=vel_range[1]:
119         raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
120      if len(spc[0])<=2:
121         raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
122      chans = list(vel_range)
123      for j in range(len(chans)):
124          chans[j] = -1
125      for i in range(len(spc[0])):
126          if i!=0:
127             prev_vel = spc[0][i-1]
128          else:
129             prev_vel = spc[0][i+1]
130          delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
131          for j in range(len(vel_range)):
132              if abs(vel_range[j]-spc[0][i])<delta:
133                  chans[j] = i
134      if chans[0] == chans[1]:
135         return ()
136      if chans[1] == -1:
137         chans[1] = len(spc[0])-1
138      if chans[0] == -1:
139         chans[0] = 0
140      if chans[0]<=chans[1]:
141         return tuple(chans)
142      return (chans[1],chans[0])
143
144   def _absvalComp(self,x,y):
145      """
146         A helper method to compare x and y by absolute value (to do sorting)
[1826]147
[1645]148         x - first value
149         y - second value
[1826]150
[1645]151         return -1,0 or 1 depending on the result of comparison
152      """
153      if abs(x)<abs(y):
154         return -1
155      elif abs(x)>abs(y):
156         return 1
157      else:
158         return 0
[1826]159
[1645]160   def rms(self):
161      """
162         Return rms scatter of the spectral points (with respect to the median) calculated
163         during last find_lines call. Note, this method throws an exception if
164         find_lines has never been called.
165      """
166      if self._rms==None:
167         raise RuntimeError, "call find_lines before using the rms method"
168      return self._rms
[1826]169
[1645]170   def median(self):
171      """
172         Return the median of the last spectrum passed to find_lines.
173         Note, this method throws an exception if find_lines has never been called.
174      """
175      if self._median==None:
176         raise RuntimeError, "call find_lines before using the median method"
177      return self._median
[1826]178
[1645]179   def _mergeIntervals(self, lines, spc):
180      """
181         A helper method to merge intervals.
[1826]182
[1645]183         lines - list of tuples with first and last channels of all intervals
184         spc - spectrum (to be able to test whether adjacent intervals have the
185               same sign.
186      """
187      toberemoved = []
188      for i in range(1,len(lines)):
189          if lines[i-1][1]+1>=lines[i][0]:
190             if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
191                 lines[i] = (lines[i-1][0],lines[i][1])
192                 toberemoved.append(i-1)
193      toberemoved.sort()
194      for i in range(len(toberemoved)-1,-1,-1):
195          if toberemoved[i]>=len(lines):
196             raise RuntimeError, "this shouldn't have happened!"
197          lines.pop(toberemoved[i])
[1826]198
[1645]199   def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
200      """
201         A helper method used in the spectral line detection routine. It splits given intervals
202         into a number of "spectral lines". Each line is characterised by a single extremum.
203         Noise is dealt with by taking into account only those extrema, where a difference with
204         respect to surrounding spectral points exceeds threshold times rms (stored inside this
[1826]205         class, so the main body of the line detection should be executed first) and there are
206         at least minchan such points.
[1645]207      """
208      if minchan<1:
209                 raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
210      newlines = []
211      for line in lines:
212          if line[1]-line[0]+1 <= minchan:
213             newlines.append(line)
214          wasIncreasing = None
215          derivSignReversals = []
216          for ch in range(line[0]+1,line[1]+1):
217              diff=spc[ch]-spc[ch-1]
218              isIncreasing = (diff>0)
219              if wasIncreasing != None:
220                 if isIncreasing != wasIncreasing:
[1826]221                    derivSignReversals.append((ch,isIncreasing))
[1645]222              wasIncreasing = isIncreasing
223          if len(derivSignReversals)==0:
224             newlines.append(line)
225          elif len(derivSignReversals)%2 != 1:
226             self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
227             newlines.append(line)
228          elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
229             self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
230             newlines.append(line)
231          else:
232             startchan = line[0]
233             for i in range(len(derivSignReversals)):
234                if i%2 == 1:
235                   newlines.append((startchan,derivSignReversals[i][0]-1))
236                   startchan = derivSignReversals[i][0]
237             newlines.append((startchan,line[1]))
238      return newlines
[1826]239
[1645]240   def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
241      """
242         A simple spectral line detection routine, which assumes that bandpass has been
243         taken out perfectly and no channels are flagged within the spectrum. A detection
244         is reported if consequtive minchan number of channels is consistently above or
[1826]245         below the median value. The threshold is given in terms of the rms calculated
246         using 80% of the lowest data points by the absolute value (with respect to median)
247
[1645]248         spc - a list or tuple with the spectrum, no default
249         threshold - detection threshold, default is 3 sigma, see above for the definition
250         edge - if non-zero, this number of spectral channels will be rejected at the edge.
251                Default is not to do any rejection.
252         minchan -  minimum number of consequitive channels exceeding threshold to claim the
253                    detection, default is 3.
254         tailsearch - if True (default), the algorithm attempts to widen each line until
255                    its flux crosses the median. It merges lines if necessary. Set this
[1826]256                    option off if you need to split the lines according to some criterion
[1645]257         splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
258                    a number of spectral lines (just one local extremum). The default action is
259                    not to do it (may give an adverse results if the noise is high)
[1826]260
[1645]261         This method returns a list of tuples each containing start and stop 0-based channel
262         number of every detected line. Empty list if nothing has been detected.
[1826]263
[1645]264         Note. The median and rms about this median is stored inside this class and can
265         be obtained with rms and median methods.
266      """
267      if edge<0:
268         raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
269      if 2*edge>=len(spc):
270         raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
271      if threshold<=0:
[1826]272         raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold
[1645]273      if minchan<=0:
274         raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
[1826]275
276      # temporary storage to get statistics, apply edge rejection here
[1645]277      tmpspc = spc[edge:len(spc)-edge+1]
278      if len(tmpspc)<2:
[1826]279         raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."
[1645]280      tmpspc.sort()
281      self._median=tmpspc[len(tmpspc)/2]
282      # work with offsets from the median and sort by absolute values
283      for i in range(len(tmpspc)):
284          tmpspc[i]-=self._median
285      tmpspc.sort(cmp=self._absvalComp)
286      sc = StatCalculator()
287      for i in tmpspc[:-int(0.2*len(tmpspc))]:
[1826]288          sc.add(i)
[1645]289      self._rms=sc.rms()
[1826]290
291      self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
292      self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))
293
[1645]294      #actual line detection
295      lines=[]
296      wasAbove = None
297      nchan = 0
298      startchan=None
299      for i in range(edge,len(spc)-edge):
300          if abs(spc[i]-self._median)>threshold*self._rms:
301             isAbove=(spc[i] > self._median)
302             if nchan!=0:
303                if wasAbove == isAbove:
304                   nchan+=1
305                else:
306                   if nchan>=minchan:
307                      lines.append((startchan,i-1))
308                   nchan=1
309                   wasAbove = isAbove
310                   startchan = i
311             else:
312                nchan=1
313                wasAbove = isAbove
314                startchan = i
315          else:
316             if nchan>=minchan:
317                lines.append((startchan,i-1))
318             nchan = 0
319      if nchan>=minchan:
320         lines.append((startchan,len(spc)-edge-1))
[1826]321
[1645]322      if tailsearch:
323         for i in range(len(lines)):
324             wasAbove = None
325             curRange = list(lines[i])
326             for x in range(curRange[0],edge,-1):
[1826]327                 isAbove=(spc[x] > self._median)
[1645]328                 if wasAbove == None:
329                    wasAbove = isAbove
330                 if isAbove!=wasAbove:
331                    curRange[0]=x+1
332                    break
333             for x in range(curRange[1],len(spc)-edge):
[1826]334                 isAbove=(spc[x] > self._median)
[1645]335                 if isAbove!=wasAbove:
336                    curRange[1]=x-1
337                    break
338             lines[i]=tuple(curRange)
[1826]339         self._mergeIntervals(lines,spc)
[1645]340      if splitFeatures:
341         return self._splitIntervals(lines,spc,threshold,minchan)
[1826]342      return lines
Note: See TracBrowser for help on using the repository browser.