1 | import math
|
---|
2 | from asap.logging import asaplog
|
---|
3 |
|
---|
4 | class StatCalculator:
|
---|
5 | def __init__(self):
|
---|
6 | self.s=0.
|
---|
7 | self.s2=0.
|
---|
8 | self.cnt=0
|
---|
9 |
|
---|
10 | def mean(self):
|
---|
11 | if self.cnt<=0:
|
---|
12 | raise RuntimeError, "At least one data point has to be defined"
|
---|
13 | return self.s/float(self.cnt)
|
---|
14 |
|
---|
15 | def variance(self):
|
---|
16 | if self.cnt<=1:
|
---|
17 | raise RuntimeError, "At least two data points has to be defined"
|
---|
18 | return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
|
---|
19 |
|
---|
20 | def rms(self):
|
---|
21 | """
|
---|
22 | return rms of the accumulated sample
|
---|
23 | """
|
---|
24 | if self.cnt<=0:
|
---|
25 | raise RuntimeError, "At least one data point has to be defined"
|
---|
26 | return math.sqrt(self.s2/self.cnt)
|
---|
27 |
|
---|
28 | def add(self, pt):
|
---|
29 | self.s = self.s + pt
|
---|
30 | self.s2 = self.s2 + pt*pt
|
---|
31 | self.cnt = self.cnt + 1
|
---|
32 |
|
---|
33 |
|
---|
34 | class simplelinefinder:
|
---|
35 | '''
|
---|
36 | A simplified class to search for spectral features. The algorithm assumes that the bandpass
|
---|
37 | is taken out perfectly and no spectral channels are flagged (except some edge channels).
|
---|
38 | It works with a list or tuple rather than a scantable and returns the channel pairs.
|
---|
39 | There is an optional feature to attempt to split the detected lines into components, although
|
---|
40 | it should be used with caution. This class is largely intended to be used with scripts.
|
---|
41 |
|
---|
42 | The fully featured version of the algorithm working with scantables is called linefinder.
|
---|
43 | '''
|
---|
44 |
|
---|
45 | def __init__(self):
|
---|
46 | '''
|
---|
47 | Initialize the class.
|
---|
48 | '''
|
---|
49 | self._median = None
|
---|
50 | self._rms = None
|
---|
51 |
|
---|
52 | def writeLog(self, str):
|
---|
53 | """
|
---|
54 | Write user defined string into log file
|
---|
55 | """
|
---|
56 | asaplog.push(str)
|
---|
57 |
|
---|
58 | def invertChannelSelection(self, nchan, chans, edge = (0,0)):
|
---|
59 | """
|
---|
60 | This method converts a tuple with channel ranges to a tuple which covers all channels
|
---|
61 | not selected by the original tuple (optionally edge channels can be discarded)
|
---|
62 |
|
---|
63 | nchan - number of channels in the spectrum.
|
---|
64 | chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
|
---|
65 | edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
|
---|
66 |
|
---|
67 | return: a tuple with inverted channel selection
|
---|
68 | Note, at this stage channel ranges are assumed to be sorted and without overlap
|
---|
69 | """
|
---|
70 | if nchan<=1:
|
---|
71 | raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan
|
---|
72 | if len(chans)%2!=0:
|
---|
73 | raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
|
---|
74 | tempedge = edge
|
---|
75 | if not isinstance(tempedge,tuple):
|
---|
76 | tempedge = (edge,edge)
|
---|
77 | if len(tempedge)!=2:
|
---|
78 | raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
|
---|
79 | if tempedge[0]<0 or tempedge[1]<0:
|
---|
80 | raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
|
---|
81 | startchan = tempedge[0]
|
---|
82 | stopchan = nchan - tempedge[1]
|
---|
83 | if stopchan-startchan<0:
|
---|
84 | raise RuntimeError, "Edge rejection rejected all channels"
|
---|
85 | ranges = []
|
---|
86 | curstart = startchan
|
---|
87 | for i in range(0,len(chans),2):
|
---|
88 | if chans[i+1]<curstart:
|
---|
89 | continue
|
---|
90 | elif chans[i]<=curstart:
|
---|
91 | curstart = chans[i+1]+1
|
---|
92 | else:
|
---|
93 | ranges.append(curstart)
|
---|
94 | ranges.append(chans[i]-1)
|
---|
95 | curstart = chans[i+1]+1
|
---|
96 | if curstart<stopchan:
|
---|
97 | ranges.append(curstart)
|
---|
98 | ranges.append(stopchan-1)
|
---|
99 | return tuple(ranges)
|
---|
100 |
|
---|
101 | def channelRange(self, spc, vel_range):
|
---|
102 | """
|
---|
103 | A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
|
---|
104 | allows to convert supplied velocity range into the channel range.
|
---|
105 |
|
---|
106 | spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
|
---|
107 | vel_range - a 2-element tuple with start and stop velocity of the range
|
---|
108 |
|
---|
109 | return: a 2-element tuple with channels
|
---|
110 | Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
|
---|
111 | """
|
---|
112 | if len(spc) != 2:
|
---|
113 | raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
|
---|
114 | if len(spc[0]) != len(spc[1]):
|
---|
115 | raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
|
---|
116 | if len(vel_range) != 2:
|
---|
117 | raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
|
---|
118 | if vel_range[0]>=vel_range[1]:
|
---|
119 | raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
|
---|
120 | if len(spc[0])<=2:
|
---|
121 | raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
|
---|
122 | chans = list(vel_range)
|
---|
123 | for j in range(len(chans)):
|
---|
124 | chans[j] = -1
|
---|
125 | for i in range(len(spc[0])):
|
---|
126 | if i!=0:
|
---|
127 | prev_vel = spc[0][i-1]
|
---|
128 | else:
|
---|
129 | prev_vel = spc[0][i+1]
|
---|
130 | delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
|
---|
131 | for j in range(len(vel_range)):
|
---|
132 | if abs(vel_range[j]-spc[0][i])<delta:
|
---|
133 | chans[j] = i
|
---|
134 | if chans[0] == chans[1]:
|
---|
135 | return ()
|
---|
136 | if chans[1] == -1:
|
---|
137 | chans[1] = len(spc[0])-1
|
---|
138 | if chans[0] == -1:
|
---|
139 | chans[0] = 0
|
---|
140 | if chans[0]<=chans[1]:
|
---|
141 | return tuple(chans)
|
---|
142 | return (chans[1],chans[0])
|
---|
143 |
|
---|
144 | def _absvalComp(self,x,y):
|
---|
145 | """
|
---|
146 | A helper method to compare x and y by absolute value (to do sorting)
|
---|
147 |
|
---|
148 | x - first value
|
---|
149 | y - second value
|
---|
150 |
|
---|
151 | return -1,0 or 1 depending on the result of comparison
|
---|
152 | """
|
---|
153 | if abs(x)<abs(y):
|
---|
154 | return -1
|
---|
155 | elif abs(x)>abs(y):
|
---|
156 | return 1
|
---|
157 | else:
|
---|
158 | return 0
|
---|
159 |
|
---|
160 | def rms(self):
|
---|
161 | """
|
---|
162 | Return rms scatter of the spectral points (with respect to the median) calculated
|
---|
163 | during last find_lines call. Note, this method throws an exception if
|
---|
164 | find_lines has never been called.
|
---|
165 | """
|
---|
166 | if self._rms==None:
|
---|
167 | raise RuntimeError, "call find_lines before using the rms method"
|
---|
168 | return self._rms
|
---|
169 |
|
---|
170 | def median(self):
|
---|
171 | """
|
---|
172 | Return the median of the last spectrum passed to find_lines.
|
---|
173 | Note, this method throws an exception if find_lines has never been called.
|
---|
174 | """
|
---|
175 | if self._median==None:
|
---|
176 | raise RuntimeError, "call find_lines before using the median method"
|
---|
177 | return self._median
|
---|
178 |
|
---|
179 | def _mergeIntervals(self, lines, spc):
|
---|
180 | """
|
---|
181 | A helper method to merge intervals.
|
---|
182 |
|
---|
183 | lines - list of tuples with first and last channels of all intervals
|
---|
184 | spc - spectrum (to be able to test whether adjacent intervals have the
|
---|
185 | same sign.
|
---|
186 | """
|
---|
187 | toberemoved = []
|
---|
188 | for i in range(1,len(lines)):
|
---|
189 | if lines[i-1][1]+1>=lines[i][0]:
|
---|
190 | if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
|
---|
191 | lines[i] = (lines[i-1][0],lines[i][1])
|
---|
192 | toberemoved.append(i-1)
|
---|
193 | toberemoved.sort()
|
---|
194 | for i in range(len(toberemoved)-1,-1,-1):
|
---|
195 | if toberemoved[i]>=len(lines):
|
---|
196 | raise RuntimeError, "this shouldn't have happened!"
|
---|
197 | lines.pop(toberemoved[i])
|
---|
198 |
|
---|
199 | def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
|
---|
200 | """
|
---|
201 | A helper method used in the spectral line detection routine. It splits given intervals
|
---|
202 | into a number of "spectral lines". Each line is characterised by a single extremum.
|
---|
203 | Noise is dealt with by taking into account only those extrema, where a difference with
|
---|
204 | respect to surrounding spectral points exceeds threshold times rms (stored inside this
|
---|
205 | class, so the main body of the line detection should be executed first) and there are
|
---|
206 | at least minchan such points.
|
---|
207 | """
|
---|
208 | if minchan<1:
|
---|
209 | raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
|
---|
210 | newlines = []
|
---|
211 | for line in lines:
|
---|
212 | if line[1]-line[0]+1 <= minchan:
|
---|
213 | newlines.append(line)
|
---|
214 | wasIncreasing = None
|
---|
215 | derivSignReversals = []
|
---|
216 | for ch in range(line[0]+1,line[1]+1):
|
---|
217 | diff=spc[ch]-spc[ch-1]
|
---|
218 | isIncreasing = (diff>0)
|
---|
219 | if wasIncreasing != None:
|
---|
220 | if isIncreasing != wasIncreasing:
|
---|
221 | derivSignReversals.append((ch,isIncreasing))
|
---|
222 | wasIncreasing = isIncreasing
|
---|
223 | if len(derivSignReversals)==0:
|
---|
224 | newlines.append(line)
|
---|
225 | elif len(derivSignReversals)%2 != 1:
|
---|
226 | self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
|
---|
227 | newlines.append(line)
|
---|
228 | elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
|
---|
229 | self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
|
---|
230 | newlines.append(line)
|
---|
231 | else:
|
---|
232 | startchan = line[0]
|
---|
233 | for i in range(len(derivSignReversals)):
|
---|
234 | if i%2 == 1:
|
---|
235 | newlines.append((startchan,derivSignReversals[i][0]-1))
|
---|
236 | startchan = derivSignReversals[i][0]
|
---|
237 | newlines.append((startchan,line[1]))
|
---|
238 | return newlines
|
---|
239 |
|
---|
240 | def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
|
---|
241 | """
|
---|
242 | A simple spectral line detection routine, which assumes that bandpass has been
|
---|
243 | taken out perfectly and no channels are flagged within the spectrum. A detection
|
---|
244 | is reported if consequtive minchan number of channels is consistently above or
|
---|
245 | below the median value. The threshold is given in terms of the rms calculated
|
---|
246 | using 80% of the lowest data points by the absolute value (with respect to median)
|
---|
247 |
|
---|
248 | spc - a list or tuple with the spectrum, no default
|
---|
249 | threshold - detection threshold, default is 3 sigma, see above for the definition
|
---|
250 | edge - if non-zero, this number of spectral channels will be rejected at the edge.
|
---|
251 | Default is not to do any rejection.
|
---|
252 | minchan - minimum number of consequitive channels exceeding threshold to claim the
|
---|
253 | detection, default is 3.
|
---|
254 | tailsearch - if True (default), the algorithm attempts to widen each line until
|
---|
255 | its flux crosses the median. It merges lines if necessary. Set this
|
---|
256 | option off if you need to split the lines according to some criterion
|
---|
257 | splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
|
---|
258 | a number of spectral lines (just one local extremum). The default action is
|
---|
259 | not to do it (may give an adverse results if the noise is high)
|
---|
260 |
|
---|
261 | This method returns a list of tuples each containing start and stop 0-based channel
|
---|
262 | number of every detected line. Empty list if nothing has been detected.
|
---|
263 |
|
---|
264 | Note. The median and rms about this median is stored inside this class and can
|
---|
265 | be obtained with rms and median methods.
|
---|
266 | """
|
---|
267 | if edge<0:
|
---|
268 | raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
|
---|
269 | if 2*edge>=len(spc):
|
---|
270 | raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
|
---|
271 | if threshold<=0:
|
---|
272 | raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold
|
---|
273 | if minchan<=0:
|
---|
274 | raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
|
---|
275 |
|
---|
276 | # temporary storage to get statistics, apply edge rejection here
|
---|
277 | tmpspc = spc[edge:len(spc)-edge+1]
|
---|
278 | if len(tmpspc)<2:
|
---|
279 | raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."
|
---|
280 | tmpspc.sort()
|
---|
281 | self._median=tmpspc[len(tmpspc)/2]
|
---|
282 | # work with offsets from the median and sort by absolute values
|
---|
283 | for i in range(len(tmpspc)):
|
---|
284 | tmpspc[i]-=self._median
|
---|
285 | tmpspc.sort(cmp=self._absvalComp)
|
---|
286 | sc = StatCalculator()
|
---|
287 | for i in tmpspc[:-int(0.2*len(tmpspc))]:
|
---|
288 | sc.add(i)
|
---|
289 | self._rms=sc.rms()
|
---|
290 |
|
---|
291 | self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
|
---|
292 | self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))
|
---|
293 |
|
---|
294 | #actual line detection
|
---|
295 | lines=[]
|
---|
296 | wasAbove = None
|
---|
297 | nchan = 0
|
---|
298 | startchan=None
|
---|
299 | for i in range(edge,len(spc)-edge):
|
---|
300 | if abs(spc[i]-self._median)>threshold*self._rms:
|
---|
301 | isAbove=(spc[i] > self._median)
|
---|
302 | if nchan!=0:
|
---|
303 | if wasAbove == isAbove:
|
---|
304 | nchan+=1
|
---|
305 | else:
|
---|
306 | if nchan>=minchan:
|
---|
307 | lines.append((startchan,i-1))
|
---|
308 | nchan=1
|
---|
309 | wasAbove = isAbove
|
---|
310 | startchan = i
|
---|
311 | else:
|
---|
312 | nchan=1
|
---|
313 | wasAbove = isAbove
|
---|
314 | startchan = i
|
---|
315 | else:
|
---|
316 | if nchan>=minchan:
|
---|
317 | lines.append((startchan,i-1))
|
---|
318 | nchan = 0
|
---|
319 | if nchan>=minchan:
|
---|
320 | lines.append((startchan,len(spc)-edge-1))
|
---|
321 |
|
---|
322 | if tailsearch:
|
---|
323 | for i in range(len(lines)):
|
---|
324 | wasAbove = None
|
---|
325 | curRange = list(lines[i])
|
---|
326 | for x in range(curRange[0],edge,-1):
|
---|
327 | isAbove=(spc[x] > self._median)
|
---|
328 | if wasAbove == None:
|
---|
329 | wasAbove = isAbove
|
---|
330 | if isAbove!=wasAbove:
|
---|
331 | curRange[0]=x+1
|
---|
332 | break
|
---|
333 | for x in range(curRange[1],len(spc)-edge):
|
---|
334 | isAbove=(spc[x] > self._median)
|
---|
335 | if isAbove!=wasAbove:
|
---|
336 | curRange[1]=x-1
|
---|
337 | break
|
---|
338 | lines[i]=tuple(curRange)
|
---|
339 | self._mergeIntervals(lines,spc)
|
---|
340 | if splitFeatures:
|
---|
341 | return self._splitIntervals(lines,spc,threshold,minchan)
|
---|
342 | return lines
|
---|