source: trunk/python/simplelinefinder.py@ 2338

Last change on this file since 2338 was 1826, checked in by Malte Marquarding, 14 years ago

Tidy up of imports (now imported from asap.). Also fixed some whitespace/tab issues

File size: 13.5 KB
RevLine 
[1645]1import math
[1826]2from asap.logging import asaplog
[1645]3
4class StatCalculator:
5 def __init__(self):
6 self.s=0.
7 self.s2=0.
8 self.cnt=0
[1826]9
[1645]10 def mean(self):
11 if self.cnt<=0:
12 raise RuntimeError, "At least one data point has to be defined"
13 return self.s/float(self.cnt)
[1826]14
[1645]15 def variance(self):
16 if self.cnt<=1:
17 raise RuntimeError, "At least two data points has to be defined"
18 return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
[1826]19
[1645]20 def rms(self):
21 """
22 return rms of the accumulated sample
23 """
24 if self.cnt<=0:
25 raise RuntimeError, "At least one data point has to be defined"
26 return math.sqrt(self.s2/self.cnt)
[1826]27
[1645]28 def add(self, pt):
29 self.s = self.s + pt
30 self.s2 = self.s2 + pt*pt
31 self.cnt = self.cnt + 1
32
33
34class simplelinefinder:
35 '''
36 A simplified class to search for spectral features. The algorithm assumes that the bandpass
37 is taken out perfectly and no spectral channels are flagged (except some edge channels).
[1826]38 It works with a list or tuple rather than a scantable and returns the channel pairs.
[1645]39 There is an optional feature to attempt to split the detected lines into components, although
[1826]40 it should be used with caution. This class is largely intended to be used with scripts.
41
[1645]42 The fully featured version of the algorithm working with scantables is called linefinder.
43 '''
[1826]44
[1645]45 def __init__(self):
46 '''
[1826]47 Initialize the class.
[1645]48 '''
49 self._median = None
50 self._rms = None
[1826]51
[1645]52 def writeLog(self, str):
53 """
54 Write user defined string into log file
[1826]55 """
[1645]56 asaplog.push(str)
57
58 def invertChannelSelection(self, nchan, chans, edge = (0,0)):
59 """
[1826]60 This method converts a tuple with channel ranges to a tuple which covers all channels
[1645]61 not selected by the original tuple (optionally edge channels can be discarded)
[1826]62
63 nchan - number of channels in the spectrum.
[1645]64 chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
65 edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
66
67 return: a tuple with inverted channel selection
68 Note, at this stage channel ranges are assumed to be sorted and without overlap
69 """
70 if nchan<=1:
[1826]71 raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan
[1645]72 if len(chans)%2!=0:
73 raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
74 tempedge = edge
75 if not isinstance(tempedge,tuple):
76 tempedge = (edge,edge)
77 if len(tempedge)!=2:
78 raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
79 if tempedge[0]<0 or tempedge[1]<0:
80 raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
81 startchan = tempedge[0]
82 stopchan = nchan - tempedge[1]
83 if stopchan-startchan<0:
84 raise RuntimeError, "Edge rejection rejected all channels"
85 ranges = []
86 curstart = startchan
87 for i in range(0,len(chans),2):
88 if chans[i+1]<curstart:
89 continue
90 elif chans[i]<=curstart:
91 curstart = chans[i+1]+1
92 else:
93 ranges.append(curstart)
94 ranges.append(chans[i]-1)
95 curstart = chans[i+1]+1
96 if curstart<stopchan:
97 ranges.append(curstart)
98 ranges.append(stopchan-1)
99 return tuple(ranges)
100
101 def channelRange(self, spc, vel_range):
102 """
103 A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
104 allows to convert supplied velocity range into the channel range.
105
106 spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
107 vel_range - a 2-element tuple with start and stop velocity of the range
[1826]108
[1645]109 return: a 2-element tuple with channels
110 Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
111 """
112 if len(spc) != 2:
113 raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
114 if len(spc[0]) != len(spc[1]):
115 raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
116 if len(vel_range) != 2:
117 raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
118 if vel_range[0]>=vel_range[1]:
119 raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
120 if len(spc[0])<=2:
121 raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
122 chans = list(vel_range)
123 for j in range(len(chans)):
124 chans[j] = -1
125 for i in range(len(spc[0])):
126 if i!=0:
127 prev_vel = spc[0][i-1]
128 else:
129 prev_vel = spc[0][i+1]
130 delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
131 for j in range(len(vel_range)):
132 if abs(vel_range[j]-spc[0][i])<delta:
133 chans[j] = i
134 if chans[0] == chans[1]:
135 return ()
136 if chans[1] == -1:
137 chans[1] = len(spc[0])-1
138 if chans[0] == -1:
139 chans[0] = 0
140 if chans[0]<=chans[1]:
141 return tuple(chans)
142 return (chans[1],chans[0])
143
144 def _absvalComp(self,x,y):
145 """
146 A helper method to compare x and y by absolute value (to do sorting)
[1826]147
[1645]148 x - first value
149 y - second value
[1826]150
[1645]151 return -1,0 or 1 depending on the result of comparison
152 """
153 if abs(x)<abs(y):
154 return -1
155 elif abs(x)>abs(y):
156 return 1
157 else:
158 return 0
[1826]159
[1645]160 def rms(self):
161 """
162 Return rms scatter of the spectral points (with respect to the median) calculated
163 during last find_lines call. Note, this method throws an exception if
164 find_lines has never been called.
165 """
166 if self._rms==None:
167 raise RuntimeError, "call find_lines before using the rms method"
168 return self._rms
[1826]169
[1645]170 def median(self):
171 """
172 Return the median of the last spectrum passed to find_lines.
173 Note, this method throws an exception if find_lines has never been called.
174 """
175 if self._median==None:
176 raise RuntimeError, "call find_lines before using the median method"
177 return self._median
[1826]178
[1645]179 def _mergeIntervals(self, lines, spc):
180 """
181 A helper method to merge intervals.
[1826]182
[1645]183 lines - list of tuples with first and last channels of all intervals
184 spc - spectrum (to be able to test whether adjacent intervals have the
185 same sign.
186 """
187 toberemoved = []
188 for i in range(1,len(lines)):
189 if lines[i-1][1]+1>=lines[i][0]:
190 if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
191 lines[i] = (lines[i-1][0],lines[i][1])
192 toberemoved.append(i-1)
193 toberemoved.sort()
194 for i in range(len(toberemoved)-1,-1,-1):
195 if toberemoved[i]>=len(lines):
196 raise RuntimeError, "this shouldn't have happened!"
197 lines.pop(toberemoved[i])
[1826]198
[1645]199 def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
200 """
201 A helper method used in the spectral line detection routine. It splits given intervals
202 into a number of "spectral lines". Each line is characterised by a single extremum.
203 Noise is dealt with by taking into account only those extrema, where a difference with
204 respect to surrounding spectral points exceeds threshold times rms (stored inside this
[1826]205 class, so the main body of the line detection should be executed first) and there are
206 at least minchan such points.
[1645]207 """
208 if minchan<1:
209 raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
210 newlines = []
211 for line in lines:
212 if line[1]-line[0]+1 <= minchan:
213 newlines.append(line)
214 wasIncreasing = None
215 derivSignReversals = []
216 for ch in range(line[0]+1,line[1]+1):
217 diff=spc[ch]-spc[ch-1]
218 isIncreasing = (diff>0)
219 if wasIncreasing != None:
220 if isIncreasing != wasIncreasing:
[1826]221 derivSignReversals.append((ch,isIncreasing))
[1645]222 wasIncreasing = isIncreasing
223 if len(derivSignReversals)==0:
224 newlines.append(line)
225 elif len(derivSignReversals)%2 != 1:
226 self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
227 newlines.append(line)
228 elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
229 self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
230 newlines.append(line)
231 else:
232 startchan = line[0]
233 for i in range(len(derivSignReversals)):
234 if i%2 == 1:
235 newlines.append((startchan,derivSignReversals[i][0]-1))
236 startchan = derivSignReversals[i][0]
237 newlines.append((startchan,line[1]))
238 return newlines
[1826]239
[1645]240 def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
241 """
242 A simple spectral line detection routine, which assumes that bandpass has been
243 taken out perfectly and no channels are flagged within the spectrum. A detection
244 is reported if consequtive minchan number of channels is consistently above or
[1826]245 below the median value. The threshold is given in terms of the rms calculated
246 using 80% of the lowest data points by the absolute value (with respect to median)
247
[1645]248 spc - a list or tuple with the spectrum, no default
249 threshold - detection threshold, default is 3 sigma, see above for the definition
250 edge - if non-zero, this number of spectral channels will be rejected at the edge.
251 Default is not to do any rejection.
252 minchan - minimum number of consequitive channels exceeding threshold to claim the
253 detection, default is 3.
254 tailsearch - if True (default), the algorithm attempts to widen each line until
255 its flux crosses the median. It merges lines if necessary. Set this
[1826]256 option off if you need to split the lines according to some criterion
[1645]257 splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
258 a number of spectral lines (just one local extremum). The default action is
259 not to do it (may give an adverse results if the noise is high)
[1826]260
[1645]261 This method returns a list of tuples each containing start and stop 0-based channel
262 number of every detected line. Empty list if nothing has been detected.
[1826]263
[1645]264 Note. The median and rms about this median is stored inside this class and can
265 be obtained with rms and median methods.
266 """
267 if edge<0:
268 raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
269 if 2*edge>=len(spc):
270 raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
271 if threshold<=0:
[1826]272 raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold
[1645]273 if minchan<=0:
274 raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
[1826]275
276 # temporary storage to get statistics, apply edge rejection here
[1645]277 tmpspc = spc[edge:len(spc)-edge+1]
278 if len(tmpspc)<2:
[1826]279 raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."
[1645]280 tmpspc.sort()
281 self._median=tmpspc[len(tmpspc)/2]
282 # work with offsets from the median and sort by absolute values
283 for i in range(len(tmpspc)):
284 tmpspc[i]-=self._median
285 tmpspc.sort(cmp=self._absvalComp)
286 sc = StatCalculator()
287 for i in tmpspc[:-int(0.2*len(tmpspc))]:
[1826]288 sc.add(i)
[1645]289 self._rms=sc.rms()
[1826]290
291 self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
292 self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))
293
[1645]294 #actual line detection
295 lines=[]
296 wasAbove = None
297 nchan = 0
298 startchan=None
299 for i in range(edge,len(spc)-edge):
300 if abs(spc[i]-self._median)>threshold*self._rms:
301 isAbove=(spc[i] > self._median)
302 if nchan!=0:
303 if wasAbove == isAbove:
304 nchan+=1
305 else:
306 if nchan>=minchan:
307 lines.append((startchan,i-1))
308 nchan=1
309 wasAbove = isAbove
310 startchan = i
311 else:
312 nchan=1
313 wasAbove = isAbove
314 startchan = i
315 else:
316 if nchan>=minchan:
317 lines.append((startchan,i-1))
318 nchan = 0
319 if nchan>=minchan:
320 lines.append((startchan,len(spc)-edge-1))
[1826]321
[1645]322 if tailsearch:
323 for i in range(len(lines)):
324 wasAbove = None
325 curRange = list(lines[i])
326 for x in range(curRange[0],edge,-1):
[1826]327 isAbove=(spc[x] > self._median)
[1645]328 if wasAbove == None:
329 wasAbove = isAbove
330 if isAbove!=wasAbove:
331 curRange[0]=x+1
332 break
333 for x in range(curRange[1],len(spc)-edge):
[1826]334 isAbove=(spc[x] > self._median)
[1645]335 if isAbove!=wasAbove:
336 curRange[1]=x-1
337 break
338 lines[i]=tuple(curRange)
[1826]339 self._mergeIntervals(lines,spc)
[1645]340 if splitFeatures:
341 return self._splitIntervals(lines,spc,threshold,minchan)
[1826]342 return lines
Note: See TracBrowser for help on using the repository browser.