source: branches/hpc34/python/simplelinefinder.py@ 3103

Last change on this file since 3103 was 1826, checked in by Malte Marquarding, 14 years ago

Tidy up of imports (now imported from asap.). Also fixed some whitespace/tab issues

File size: 13.5 KB
Line 
1import math
2from asap.logging import asaplog
3
4class StatCalculator:
5 def __init__(self):
6 self.s=0.
7 self.s2=0.
8 self.cnt=0
9
10 def mean(self):
11 if self.cnt<=0:
12 raise RuntimeError, "At least one data point has to be defined"
13 return self.s/float(self.cnt)
14
15 def variance(self):
16 if self.cnt<=1:
17 raise RuntimeError, "At least two data points has to be defined"
18 return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
19
20 def rms(self):
21 """
22 return rms of the accumulated sample
23 """
24 if self.cnt<=0:
25 raise RuntimeError, "At least one data point has to be defined"
26 return math.sqrt(self.s2/self.cnt)
27
28 def add(self, pt):
29 self.s = self.s + pt
30 self.s2 = self.s2 + pt*pt
31 self.cnt = self.cnt + 1
32
33
34class simplelinefinder:
35 '''
36 A simplified class to search for spectral features. The algorithm assumes that the bandpass
37 is taken out perfectly and no spectral channels are flagged (except some edge channels).
38 It works with a list or tuple rather than a scantable and returns the channel pairs.
39 There is an optional feature to attempt to split the detected lines into components, although
40 it should be used with caution. This class is largely intended to be used with scripts.
41
42 The fully featured version of the algorithm working with scantables is called linefinder.
43 '''
44
45 def __init__(self):
46 '''
47 Initialize the class.
48 '''
49 self._median = None
50 self._rms = None
51
52 def writeLog(self, str):
53 """
54 Write user defined string into log file
55 """
56 asaplog.push(str)
57
58 def invertChannelSelection(self, nchan, chans, edge = (0,0)):
59 """
60 This method converts a tuple with channel ranges to a tuple which covers all channels
61 not selected by the original tuple (optionally edge channels can be discarded)
62
63 nchan - number of channels in the spectrum.
64 chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
65 edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
66
67 return: a tuple with inverted channel selection
68 Note, at this stage channel ranges are assumed to be sorted and without overlap
69 """
70 if nchan<=1:
71 raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan
72 if len(chans)%2!=0:
73 raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
74 tempedge = edge
75 if not isinstance(tempedge,tuple):
76 tempedge = (edge,edge)
77 if len(tempedge)!=2:
78 raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
79 if tempedge[0]<0 or tempedge[1]<0:
80 raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
81 startchan = tempedge[0]
82 stopchan = nchan - tempedge[1]
83 if stopchan-startchan<0:
84 raise RuntimeError, "Edge rejection rejected all channels"
85 ranges = []
86 curstart = startchan
87 for i in range(0,len(chans),2):
88 if chans[i+1]<curstart:
89 continue
90 elif chans[i]<=curstart:
91 curstart = chans[i+1]+1
92 else:
93 ranges.append(curstart)
94 ranges.append(chans[i]-1)
95 curstart = chans[i+1]+1
96 if curstart<stopchan:
97 ranges.append(curstart)
98 ranges.append(stopchan-1)
99 return tuple(ranges)
100
101 def channelRange(self, spc, vel_range):
102 """
103 A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
104 allows to convert supplied velocity range into the channel range.
105
106 spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
107 vel_range - a 2-element tuple with start and stop velocity of the range
108
109 return: a 2-element tuple with channels
110 Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
111 """
112 if len(spc) != 2:
113 raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
114 if len(spc[0]) != len(spc[1]):
115 raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
116 if len(vel_range) != 2:
117 raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
118 if vel_range[0]>=vel_range[1]:
119 raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
120 if len(spc[0])<=2:
121 raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
122 chans = list(vel_range)
123 for j in range(len(chans)):
124 chans[j] = -1
125 for i in range(len(spc[0])):
126 if i!=0:
127 prev_vel = spc[0][i-1]
128 else:
129 prev_vel = spc[0][i+1]
130 delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
131 for j in range(len(vel_range)):
132 if abs(vel_range[j]-spc[0][i])<delta:
133 chans[j] = i
134 if chans[0] == chans[1]:
135 return ()
136 if chans[1] == -1:
137 chans[1] = len(spc[0])-1
138 if chans[0] == -1:
139 chans[0] = 0
140 if chans[0]<=chans[1]:
141 return tuple(chans)
142 return (chans[1],chans[0])
143
144 def _absvalComp(self,x,y):
145 """
146 A helper method to compare x and y by absolute value (to do sorting)
147
148 x - first value
149 y - second value
150
151 return -1,0 or 1 depending on the result of comparison
152 """
153 if abs(x)<abs(y):
154 return -1
155 elif abs(x)>abs(y):
156 return 1
157 else:
158 return 0
159
160 def rms(self):
161 """
162 Return rms scatter of the spectral points (with respect to the median) calculated
163 during last find_lines call. Note, this method throws an exception if
164 find_lines has never been called.
165 """
166 if self._rms==None:
167 raise RuntimeError, "call find_lines before using the rms method"
168 return self._rms
169
170 def median(self):
171 """
172 Return the median of the last spectrum passed to find_lines.
173 Note, this method throws an exception if find_lines has never been called.
174 """
175 if self._median==None:
176 raise RuntimeError, "call find_lines before using the median method"
177 return self._median
178
179 def _mergeIntervals(self, lines, spc):
180 """
181 A helper method to merge intervals.
182
183 lines - list of tuples with first and last channels of all intervals
184 spc - spectrum (to be able to test whether adjacent intervals have the
185 same sign.
186 """
187 toberemoved = []
188 for i in range(1,len(lines)):
189 if lines[i-1][1]+1>=lines[i][0]:
190 if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
191 lines[i] = (lines[i-1][0],lines[i][1])
192 toberemoved.append(i-1)
193 toberemoved.sort()
194 for i in range(len(toberemoved)-1,-1,-1):
195 if toberemoved[i]>=len(lines):
196 raise RuntimeError, "this shouldn't have happened!"
197 lines.pop(toberemoved[i])
198
199 def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
200 """
201 A helper method used in the spectral line detection routine. It splits given intervals
202 into a number of "spectral lines". Each line is characterised by a single extremum.
203 Noise is dealt with by taking into account only those extrema, where a difference with
204 respect to surrounding spectral points exceeds threshold times rms (stored inside this
205 class, so the main body of the line detection should be executed first) and there are
206 at least minchan such points.
207 """
208 if minchan<1:
209 raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
210 newlines = []
211 for line in lines:
212 if line[1]-line[0]+1 <= minchan:
213 newlines.append(line)
214 wasIncreasing = None
215 derivSignReversals = []
216 for ch in range(line[0]+1,line[1]+1):
217 diff=spc[ch]-spc[ch-1]
218 isIncreasing = (diff>0)
219 if wasIncreasing != None:
220 if isIncreasing != wasIncreasing:
221 derivSignReversals.append((ch,isIncreasing))
222 wasIncreasing = isIncreasing
223 if len(derivSignReversals)==0:
224 newlines.append(line)
225 elif len(derivSignReversals)%2 != 1:
226 self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
227 newlines.append(line)
228 elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
229 self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
230 newlines.append(line)
231 else:
232 startchan = line[0]
233 for i in range(len(derivSignReversals)):
234 if i%2 == 1:
235 newlines.append((startchan,derivSignReversals[i][0]-1))
236 startchan = derivSignReversals[i][0]
237 newlines.append((startchan,line[1]))
238 return newlines
239
240 def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
241 """
242 A simple spectral line detection routine, which assumes that bandpass has been
243 taken out perfectly and no channels are flagged within the spectrum. A detection
244 is reported if consequtive minchan number of channels is consistently above or
245 below the median value. The threshold is given in terms of the rms calculated
246 using 80% of the lowest data points by the absolute value (with respect to median)
247
248 spc - a list or tuple with the spectrum, no default
249 threshold - detection threshold, default is 3 sigma, see above for the definition
250 edge - if non-zero, this number of spectral channels will be rejected at the edge.
251 Default is not to do any rejection.
252 minchan - minimum number of consequitive channels exceeding threshold to claim the
253 detection, default is 3.
254 tailsearch - if True (default), the algorithm attempts to widen each line until
255 its flux crosses the median. It merges lines if necessary. Set this
256 option off if you need to split the lines according to some criterion
257 splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
258 a number of spectral lines (just one local extremum). The default action is
259 not to do it (may give an adverse results if the noise is high)
260
261 This method returns a list of tuples each containing start and stop 0-based channel
262 number of every detected line. Empty list if nothing has been detected.
263
264 Note. The median and rms about this median is stored inside this class and can
265 be obtained with rms and median methods.
266 """
267 if edge<0:
268 raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
269 if 2*edge>=len(spc):
270 raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
271 if threshold<=0:
272 raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold
273 if minchan<=0:
274 raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
275
276 # temporary storage to get statistics, apply edge rejection here
277 tmpspc = spc[edge:len(spc)-edge+1]
278 if len(tmpspc)<2:
279 raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."
280 tmpspc.sort()
281 self._median=tmpspc[len(tmpspc)/2]
282 # work with offsets from the median and sort by absolute values
283 for i in range(len(tmpspc)):
284 tmpspc[i]-=self._median
285 tmpspc.sort(cmp=self._absvalComp)
286 sc = StatCalculator()
287 for i in tmpspc[:-int(0.2*len(tmpspc))]:
288 sc.add(i)
289 self._rms=sc.rms()
290
291 self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
292 self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))
293
294 #actual line detection
295 lines=[]
296 wasAbove = None
297 nchan = 0
298 startchan=None
299 for i in range(edge,len(spc)-edge):
300 if abs(spc[i]-self._median)>threshold*self._rms:
301 isAbove=(spc[i] > self._median)
302 if nchan!=0:
303 if wasAbove == isAbove:
304 nchan+=1
305 else:
306 if nchan>=minchan:
307 lines.append((startchan,i-1))
308 nchan=1
309 wasAbove = isAbove
310 startchan = i
311 else:
312 nchan=1
313 wasAbove = isAbove
314 startchan = i
315 else:
316 if nchan>=minchan:
317 lines.append((startchan,i-1))
318 nchan = 0
319 if nchan>=minchan:
320 lines.append((startchan,len(spc)-edge-1))
321
322 if tailsearch:
323 for i in range(len(lines)):
324 wasAbove = None
325 curRange = list(lines[i])
326 for x in range(curRange[0],edge,-1):
327 isAbove=(spc[x] > self._median)
328 if wasAbove == None:
329 wasAbove = isAbove
330 if isAbove!=wasAbove:
331 curRange[0]=x+1
332 break
333 for x in range(curRange[1],len(spc)-edge):
334 isAbove=(spc[x] > self._median)
335 if isAbove!=wasAbove:
336 curRange[1]=x-1
337 break
338 lines[i]=tuple(curRange)
339 self._mergeIntervals(lines,spc)
340 if splitFeatures:
341 return self._splitIntervals(lines,spc,threshold,minchan)
342 return lines
Note: See TracBrowser for help on using the repository browser.