| 1 | import math | 
|---|
| 2 | from asap.logging import asaplog | 
|---|
| 3 |  | 
|---|
| 4 | class StatCalculator: | 
|---|
| 5 | def __init__(self): | 
|---|
| 6 | self.s=0. | 
|---|
| 7 | self.s2=0. | 
|---|
| 8 | self.cnt=0 | 
|---|
| 9 |  | 
|---|
| 10 | def mean(self): | 
|---|
| 11 | if self.cnt<=0: | 
|---|
| 12 | raise RuntimeError, "At least one data point has to be defined" | 
|---|
| 13 | return self.s/float(self.cnt) | 
|---|
| 14 |  | 
|---|
| 15 | def variance(self): | 
|---|
| 16 | if self.cnt<=1: | 
|---|
| 17 | raise RuntimeError, "At least two data points has to be defined" | 
|---|
| 18 | return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12) | 
|---|
| 19 |  | 
|---|
| 20 | def rms(self): | 
|---|
| 21 | """ | 
|---|
| 22 | return rms of the accumulated sample | 
|---|
| 23 | """ | 
|---|
| 24 | if self.cnt<=0: | 
|---|
| 25 | raise RuntimeError, "At least one data point has to be defined" | 
|---|
| 26 | return math.sqrt(self.s2/self.cnt) | 
|---|
| 27 |  | 
|---|
| 28 | def add(self, pt): | 
|---|
| 29 | self.s = self.s + pt | 
|---|
| 30 | self.s2 = self.s2 + pt*pt | 
|---|
| 31 | self.cnt = self.cnt + 1 | 
|---|
| 32 |  | 
|---|
| 33 |  | 
|---|
| 34 | class simplelinefinder: | 
|---|
| 35 | ''' | 
|---|
| 36 | A simplified class to search for spectral features. The algorithm assumes that the bandpass | 
|---|
| 37 | is taken out perfectly and no spectral channels are flagged (except some edge channels). | 
|---|
| 38 | It works with a list or tuple rather than a scantable and returns the channel pairs. | 
|---|
| 39 | There is an optional feature to attempt to split the detected lines into components, although | 
|---|
| 40 | it should be used with caution. This class is largely intended to be used with scripts. | 
|---|
| 41 |  | 
|---|
| 42 | The fully featured version of the algorithm working with scantables is called linefinder. | 
|---|
| 43 | ''' | 
|---|
| 44 |  | 
|---|
| 45 | def __init__(self): | 
|---|
| 46 | ''' | 
|---|
| 47 | Initialize the class. | 
|---|
| 48 | ''' | 
|---|
| 49 | self._median = None | 
|---|
| 50 | self._rms = None | 
|---|
| 51 |  | 
|---|
| 52 | def writeLog(self, str): | 
|---|
| 53 | """ | 
|---|
| 54 | Write user defined string into log file | 
|---|
| 55 | """ | 
|---|
| 56 | asaplog.push(str) | 
|---|
| 57 |  | 
|---|
| 58 | def invertChannelSelection(self, nchan, chans, edge = (0,0)): | 
|---|
| 59 | """ | 
|---|
| 60 | This method converts a tuple with channel ranges to a tuple which covers all channels | 
|---|
| 61 | not selected by the original tuple (optionally edge channels can be discarded) | 
|---|
| 62 |  | 
|---|
| 63 | nchan - number of channels in the spectrum. | 
|---|
| 64 | chans - tuple (with even number of elements) containing start and stop channel for all selected ranges | 
|---|
| 65 | edge - one number or two element tuple (separate values for two ends) defining how many channels to reject | 
|---|
| 66 |  | 
|---|
| 67 | return:  a tuple with inverted channel selection | 
|---|
| 68 | Note, at this stage channel ranges are assumed to be sorted and without overlap | 
|---|
| 69 | """ | 
|---|
| 70 | if nchan<=1: | 
|---|
| 71 | raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan | 
|---|
| 72 | if len(chans)%2!=0: | 
|---|
| 73 | raise RuntimeError, "chans is supposed to be a tuple with even number of elements" | 
|---|
| 74 | tempedge = edge | 
|---|
| 75 | if not isinstance(tempedge,tuple): | 
|---|
| 76 | tempedge = (edge,edge) | 
|---|
| 77 | if len(tempedge)!=2: | 
|---|
| 78 | raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number" | 
|---|
| 79 | if tempedge[0]<0 or tempedge[1]<0: | 
|---|
| 80 | raise RuntimeError, "number of edge rejected cannels is supposed to be positive" | 
|---|
| 81 | startchan = tempedge[0] | 
|---|
| 82 | stopchan = nchan - tempedge[1] | 
|---|
| 83 | if stopchan-startchan<0: | 
|---|
| 84 | raise RuntimeError, "Edge rejection rejected all channels" | 
|---|
| 85 | ranges = [] | 
|---|
| 86 | curstart = startchan | 
|---|
| 87 | for i in range(0,len(chans),2): | 
|---|
| 88 | if chans[i+1]<curstart: | 
|---|
| 89 | continue | 
|---|
| 90 | elif chans[i]<=curstart: | 
|---|
| 91 | curstart = chans[i+1]+1 | 
|---|
| 92 | else: | 
|---|
| 93 | ranges.append(curstart) | 
|---|
| 94 | ranges.append(chans[i]-1) | 
|---|
| 95 | curstart = chans[i+1]+1 | 
|---|
| 96 | if curstart<stopchan: | 
|---|
| 97 | ranges.append(curstart) | 
|---|
| 98 | ranges.append(stopchan-1) | 
|---|
| 99 | return tuple(ranges) | 
|---|
| 100 |  | 
|---|
| 101 | def channelRange(self, spc, vel_range): | 
|---|
| 102 | """ | 
|---|
| 103 | A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It | 
|---|
| 104 | allows to convert supplied velocity range into the channel range. | 
|---|
| 105 |  | 
|---|
| 106 | spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself) | 
|---|
| 107 | vel_range - a 2-element tuple with start and stop velocity of the range | 
|---|
| 108 |  | 
|---|
| 109 | return: a 2-element tuple with channels | 
|---|
| 110 | Note, if supplied range is completely outside the spectrum, an empty tuple will be returned | 
|---|
| 111 | """ | 
|---|
| 112 | if len(spc) != 2: | 
|---|
| 113 | raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements" | 
|---|
| 114 | if len(spc[0]) != len(spc[1]): | 
|---|
| 115 | raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length" | 
|---|
| 116 | if len(vel_range) != 2: | 
|---|
| 117 | raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements" | 
|---|
| 118 | if vel_range[0]>=vel_range[1]: | 
|---|
| 119 | raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range | 
|---|
| 120 | if len(spc[0])<=2: | 
|---|
| 121 | raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0]) | 
|---|
| 122 | chans = list(vel_range) | 
|---|
| 123 | for j in range(len(chans)): | 
|---|
| 124 | chans[j] = -1 | 
|---|
| 125 | for i in range(len(spc[0])): | 
|---|
| 126 | if i!=0: | 
|---|
| 127 | prev_vel = spc[0][i-1] | 
|---|
| 128 | else: | 
|---|
| 129 | prev_vel = spc[0][i+1] | 
|---|
| 130 | delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i]) | 
|---|
| 131 | for j in range(len(vel_range)): | 
|---|
| 132 | if abs(vel_range[j]-spc[0][i])<delta: | 
|---|
| 133 | chans[j] = i | 
|---|
| 134 | if chans[0] == chans[1]: | 
|---|
| 135 | return () | 
|---|
| 136 | if chans[1] == -1: | 
|---|
| 137 | chans[1] = len(spc[0])-1 | 
|---|
| 138 | if chans[0] == -1: | 
|---|
| 139 | chans[0] = 0 | 
|---|
| 140 | if chans[0]<=chans[1]: | 
|---|
| 141 | return tuple(chans) | 
|---|
| 142 | return (chans[1],chans[0]) | 
|---|
| 143 |  | 
|---|
| 144 | def _absvalComp(self,x,y): | 
|---|
| 145 | """ | 
|---|
| 146 | A helper method to compare x and y by absolute value (to do sorting) | 
|---|
| 147 |  | 
|---|
| 148 | x - first value | 
|---|
| 149 | y - second value | 
|---|
| 150 |  | 
|---|
| 151 | return -1,0 or 1 depending on the result of comparison | 
|---|
| 152 | """ | 
|---|
| 153 | if abs(x)<abs(y): | 
|---|
| 154 | return -1 | 
|---|
| 155 | elif abs(x)>abs(y): | 
|---|
| 156 | return 1 | 
|---|
| 157 | else: | 
|---|
| 158 | return 0 | 
|---|
| 159 |  | 
|---|
| 160 | def rms(self): | 
|---|
| 161 | """ | 
|---|
| 162 | Return rms scatter of the spectral points (with respect to the median) calculated | 
|---|
| 163 | during last find_lines call. Note, this method throws an exception if | 
|---|
| 164 | find_lines has never been called. | 
|---|
| 165 | """ | 
|---|
| 166 | if self._rms==None: | 
|---|
| 167 | raise RuntimeError, "call find_lines before using the rms method" | 
|---|
| 168 | return self._rms | 
|---|
| 169 |  | 
|---|
| 170 | def median(self): | 
|---|
| 171 | """ | 
|---|
| 172 | Return the median of the last spectrum passed to find_lines. | 
|---|
| 173 | Note, this method throws an exception if find_lines has never been called. | 
|---|
| 174 | """ | 
|---|
| 175 | if self._median==None: | 
|---|
| 176 | raise RuntimeError, "call find_lines before using the median method" | 
|---|
| 177 | return self._median | 
|---|
| 178 |  | 
|---|
| 179 | def _mergeIntervals(self, lines, spc): | 
|---|
| 180 | """ | 
|---|
| 181 | A helper method to merge intervals. | 
|---|
| 182 |  | 
|---|
| 183 | lines - list of tuples with first and last channels of all intervals | 
|---|
| 184 | spc - spectrum (to be able to test whether adjacent intervals have the | 
|---|
| 185 | same sign. | 
|---|
| 186 | """ | 
|---|
| 187 | toberemoved = [] | 
|---|
| 188 | for i in range(1,len(lines)): | 
|---|
| 189 | if lines[i-1][1]+1>=lines[i][0]: | 
|---|
| 190 | if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median): | 
|---|
| 191 | lines[i] = (lines[i-1][0],lines[i][1]) | 
|---|
| 192 | toberemoved.append(i-1) | 
|---|
| 193 | toberemoved.sort() | 
|---|
| 194 | for i in range(len(toberemoved)-1,-1,-1): | 
|---|
| 195 | if toberemoved[i]>=len(lines): | 
|---|
| 196 | raise RuntimeError, "this shouldn't have happened!" | 
|---|
| 197 | lines.pop(toberemoved[i]) | 
|---|
| 198 |  | 
|---|
| 199 | def _splitIntervals(self,lines,spc,threshold=3,minchan=3): | 
|---|
| 200 | """ | 
|---|
| 201 | A helper method used in the spectral line detection routine. It splits given intervals | 
|---|
| 202 | into a number of "spectral lines". Each line is characterised by a single extremum. | 
|---|
| 203 | Noise is dealt with by taking into account only those extrema, where a difference with | 
|---|
| 204 | respect to surrounding spectral points exceeds threshold times rms (stored inside this | 
|---|
| 205 | class, so the main body of the line detection should be executed first) and there are | 
|---|
| 206 | at least minchan such points. | 
|---|
| 207 | """ | 
|---|
| 208 | if minchan<1: | 
|---|
| 209 | raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan | 
|---|
| 210 | newlines = [] | 
|---|
| 211 | for line in lines: | 
|---|
| 212 | if line[1]-line[0]+1 <= minchan: | 
|---|
| 213 | newlines.append(line) | 
|---|
| 214 | wasIncreasing = None | 
|---|
| 215 | derivSignReversals = [] | 
|---|
| 216 | for ch in range(line[0]+1,line[1]+1): | 
|---|
| 217 | diff=spc[ch]-spc[ch-1] | 
|---|
| 218 | isIncreasing = (diff>0) | 
|---|
| 219 | if wasIncreasing != None: | 
|---|
| 220 | if isIncreasing != wasIncreasing: | 
|---|
| 221 | derivSignReversals.append((ch,isIncreasing)) | 
|---|
| 222 | wasIncreasing = isIncreasing | 
|---|
| 223 | if len(derivSignReversals)==0: | 
|---|
| 224 | newlines.append(line) | 
|---|
| 225 | elif len(derivSignReversals)%2 != 1: | 
|---|
| 226 | self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals); | 
|---|
| 227 | newlines.append(line) | 
|---|
| 228 | elif derivSignReversals[0][1]!=derivSignReversals[-1][1]: | 
|---|
| 229 | self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals) | 
|---|
| 230 | newlines.append(line) | 
|---|
| 231 | else: | 
|---|
| 232 | startchan = line[0] | 
|---|
| 233 | for i in range(len(derivSignReversals)): | 
|---|
| 234 | if i%2 == 1: | 
|---|
| 235 | newlines.append((startchan,derivSignReversals[i][0]-1)) | 
|---|
| 236 | startchan = derivSignReversals[i][0] | 
|---|
| 237 | newlines.append((startchan,line[1])) | 
|---|
| 238 | return newlines | 
|---|
| 239 |  | 
|---|
| 240 | def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False): | 
|---|
| 241 | """ | 
|---|
| 242 | A simple spectral line detection routine, which assumes that bandpass has been | 
|---|
| 243 | taken out perfectly and no channels are flagged within the spectrum. A detection | 
|---|
| 244 | is reported if consequtive minchan number of channels is consistently above or | 
|---|
| 245 | below the median value. The threshold is given in terms of the rms calculated | 
|---|
| 246 | using 80% of the lowest data points by the absolute value (with respect to median) | 
|---|
| 247 |  | 
|---|
| 248 | spc - a list or tuple with the spectrum, no default | 
|---|
| 249 | threshold - detection threshold, default is 3 sigma, see above for the definition | 
|---|
| 250 | edge - if non-zero, this number of spectral channels will be rejected at the edge. | 
|---|
| 251 | Default is not to do any rejection. | 
|---|
| 252 | minchan -  minimum number of consequitive channels exceeding threshold to claim the | 
|---|
| 253 | detection, default is 3. | 
|---|
| 254 | tailsearch - if True (default), the algorithm attempts to widen each line until | 
|---|
| 255 | its flux crosses the median. It merges lines if necessary. Set this | 
|---|
| 256 | option off if you need to split the lines according to some criterion | 
|---|
| 257 | splitFeatures - if True, the algorithm attempts to split each detected spectral feature into | 
|---|
| 258 | a number of spectral lines (just one local extremum). The default action is | 
|---|
| 259 | not to do it (may give an adverse results if the noise is high) | 
|---|
| 260 |  | 
|---|
| 261 | This method returns a list of tuples each containing start and stop 0-based channel | 
|---|
| 262 | number of every detected line. Empty list if nothing has been detected. | 
|---|
| 263 |  | 
|---|
| 264 | Note. The median and rms about this median is stored inside this class and can | 
|---|
| 265 | be obtained with rms and median methods. | 
|---|
| 266 | """ | 
|---|
| 267 | if edge<0: | 
|---|
| 268 | raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge | 
|---|
| 269 | if 2*edge>=len(spc): | 
|---|
| 270 | raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc)) | 
|---|
| 271 | if threshold<=0: | 
|---|
| 272 | raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold | 
|---|
| 273 | if minchan<=0: | 
|---|
| 274 | raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan | 
|---|
| 275 |  | 
|---|
| 276 | # temporary storage to get statistics, apply edge rejection here | 
|---|
| 277 | tmpspc = spc[edge:len(spc)-edge+1] | 
|---|
| 278 | if len(tmpspc)<2: | 
|---|
| 279 | raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum." | 
|---|
| 280 | tmpspc.sort() | 
|---|
| 281 | self._median=tmpspc[len(tmpspc)/2] | 
|---|
| 282 | # work with offsets from the median and sort by absolute values | 
|---|
| 283 | for i in range(len(tmpspc)): | 
|---|
| 284 | tmpspc[i]-=self._median | 
|---|
| 285 | tmpspc.sort(cmp=self._absvalComp) | 
|---|
| 286 | sc = StatCalculator() | 
|---|
| 287 | for i in tmpspc[:-int(0.2*len(tmpspc))]: | 
|---|
| 288 | sc.add(i) | 
|---|
| 289 | self._rms=sc.rms() | 
|---|
| 290 |  | 
|---|
| 291 | self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch)) | 
|---|
| 292 | self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms)) | 
|---|
| 293 |  | 
|---|
| 294 | #actual line detection | 
|---|
| 295 | lines=[] | 
|---|
| 296 | wasAbove = None | 
|---|
| 297 | nchan = 0 | 
|---|
| 298 | startchan=None | 
|---|
| 299 | for i in range(edge,len(spc)-edge): | 
|---|
| 300 | if abs(spc[i]-self._median)>threshold*self._rms: | 
|---|
| 301 | isAbove=(spc[i] > self._median) | 
|---|
| 302 | if nchan!=0: | 
|---|
| 303 | if wasAbove == isAbove: | 
|---|
| 304 | nchan+=1 | 
|---|
| 305 | else: | 
|---|
| 306 | if nchan>=minchan: | 
|---|
| 307 | lines.append((startchan,i-1)) | 
|---|
| 308 | nchan=1 | 
|---|
| 309 | wasAbove = isAbove | 
|---|
| 310 | startchan = i | 
|---|
| 311 | else: | 
|---|
| 312 | nchan=1 | 
|---|
| 313 | wasAbove = isAbove | 
|---|
| 314 | startchan = i | 
|---|
| 315 | else: | 
|---|
| 316 | if nchan>=minchan: | 
|---|
| 317 | lines.append((startchan,i-1)) | 
|---|
| 318 | nchan = 0 | 
|---|
| 319 | if nchan>=minchan: | 
|---|
| 320 | lines.append((startchan,len(spc)-edge-1)) | 
|---|
| 321 |  | 
|---|
| 322 | if tailsearch: | 
|---|
| 323 | for i in range(len(lines)): | 
|---|
| 324 | wasAbove = None | 
|---|
| 325 | curRange = list(lines[i]) | 
|---|
| 326 | for x in range(curRange[0],edge,-1): | 
|---|
| 327 | isAbove=(spc[x] > self._median) | 
|---|
| 328 | if wasAbove == None: | 
|---|
| 329 | wasAbove = isAbove | 
|---|
| 330 | if isAbove!=wasAbove: | 
|---|
| 331 | curRange[0]=x+1 | 
|---|
| 332 | break | 
|---|
| 333 | for x in range(curRange[1],len(spc)-edge): | 
|---|
| 334 | isAbove=(spc[x] > self._median) | 
|---|
| 335 | if isAbove!=wasAbove: | 
|---|
| 336 | curRange[1]=x-1 | 
|---|
| 337 | break | 
|---|
| 338 | lines[i]=tuple(curRange) | 
|---|
| 339 | self._mergeIntervals(lines,spc) | 
|---|
| 340 | if splitFeatures: | 
|---|
| 341 | return self._splitIntervals(lines,spc,threshold,minchan) | 
|---|
| 342 | return lines | 
|---|