source: trunk/python/selector.py@ 3045

Last change on this file since 3045 was 2881, checked in by Takeshi Nakazato, 11 years ago

New Development: No

JIRA Issue: Yes CAS-5858

Ready for Test: Yes

Interface Changes: No

What Interface Changed: Please list interface changes

Test Programs: List test programs

Put in Release Notes: No

Module(s): Module Names change impacts.

Description: Describe your changes here...

Bug fix on handling existing TaQL query string.
If original query is related to FIELDNAME or SRCNAME,
it is overwritten. Otherwise, the new query is appended
to the original one.


File size: 14.8 KB
Line 
1import re
2import math
3import string
4from asap._asap import selector as _selector, srctype
5from asap.utils import unique, _to_list
6
7class selector(_selector):
8 """
9 A selection object to be applied to scantables to restrict the
10 scantables to specific rows.
11 """
12 fields = ["pols", "ifs", "beams", "scans", "cycles", "name", "query", "types", "rows"]
13
14 def __init__(self, *args, **kw):
15 if len(args) == 1:
16 if isinstance(args[0], self.__class__) \
17 or isinstance(args[0], _selector):
18 _selector.__init__(self, args[0])
19 else:
20 raise TypeError("Argument can only be a selector object")
21 else:
22 _selector.__init__(self)
23 for k,v in kw.items():
24 if k in self.fields:
25 func = getattr(self, "set_%s" % k)
26 func(v)
27
28 def reset(self):
29 """
30 Unset all selections.
31 """
32 self._reset()
33
34 def is_empty(self):
35 """
36 Has anything been set?
37 """
38 return self._empty()
39
40 def set_polarisations(self, pols=[]):
41 """
42 Set the polarisations to be selected in the scantable.
43 Parameters:
44 pols: a list of integers of 0-3, or strings, e.g ["I","Q"].
45 Default [] is no selection
46 Example:
47 sel = selector()
48 # These are equivalent if data is 'linear'
49 sel.set_polarisations(["XX","Re(XY)"])
50 sel.set_polarisations([0,2])
51 # reset the polarisation selection
52 sel.set_polarisations()
53
54 """
55 vec = _to_list(pols, str) or _to_list(pols, int)
56 if isinstance(vec, list): # is an empty and/or valid vector
57 if len(vec) and isinstance(vec[-1],str):
58 self._setpolstrings(vec)
59 return
60 self._setpols(vec)
61 else:
62 raise TypeError('Unknown pol type. Please use [0,1...] or ["XX","YY"...]')
63
64 # for the americans
65 set_polarizations = set_polarisations
66 # for the lazy
67 set_pols = set_polarisations
68
69 def set_ifs(self, ifs=[]):
70 """
71 Set a sequence of IF numbers (0-based).
72 Parameters:
73 ifs: a list of integers. Default [] is to unset the selection.
74 """
75 vec = _to_list(ifs, int)
76 if isinstance(vec,list):
77 self._setifs(vec)
78 else:
79 raise TypeError('Unknown IFno type. Use lists of integers.')
80
81 def set_scans(self, scans=[]):
82 """
83 Set a sequence of Scan numbers (0-based).
84 Parameters:
85 scans: a list of integers. Default [] is to unset the selection.
86 """
87 vec = _to_list(scans, int)
88 if isinstance(vec,list):
89 self._setscans(vec)
90 else:
91 raise TypeError('Unknown Scan number type. Use lists of integers.')
92
93 def set_beams(self, beams=[]):
94 """
95 Set a sequence of Beam numbers (0-based).
96 Parameters:
97 beams: a list of integers. Default [] is to unset the selection.
98 """
99 vec = _to_list(beams, int)
100 if isinstance(vec,list):
101 self._setbeams(vec)
102 else:
103 raise TypeError('Unknown Beam number type. Use lists of integers.')
104
105 def set_cycles(self, cycles=[]):
106 """
107 Set a sequence of IF numbers (0-based).
108 Parameters:
109 cycless: a list of integers. Default [] is to unset the selection.
110 """
111 vec = _to_list(cycles, int)
112 if isinstance(vec,list):
113 self._setcycles(vec)
114 else:
115 raise TypeError('Unknown Cycle number type. Use lists of integers.')
116
117
118 def set_name(self, name):
119 """
120 Set a selection based on a name. This can be a unix pattern , e.g. "*_R"
121 Parameters:
122 name: a string containing a source name or pattern
123 Examples:
124 # select all reference scans which start with "Orion"
125 selection.set_name("Orion*_R")
126 """
127 if isinstance(name, str):
128 self._setname(name)
129 else:
130 raise TypeError('name must be a string')
131
132 def set_tsys(self, tsysmin=0.0, tsysmax=None):
133 """
134 Select by Tsys range.
135 Parameters:
136 tsysmin: the lower threshold. Default 0.0
137 tsysmax: the upper threshold. Default None.
138 Examples:
139 # select all spectra with Tsys <= 500.0
140 selection.set_tsys(tsysmax=500.0)
141
142 """
143 taql = "SELECT FROM $1 WHERE TSYS[0] >= %f" % (tsysmin)
144 if isinstance(tsysmax, float):
145 taql = taql + " AND TSYS[0] <= %f" % ( tsysmax)
146 self._settaql(taql)
147
148 def set_query(self, query):
149 """
150 Select by Column query. Power users only!
151 Example:
152 # select all off scans with integration times over 60 seconds.
153 selection.set_query("SRCTYPE == PSOFF AND INTERVAL > 60.0")
154 """
155 rx = re.compile("((SRCTYPE *[!=][=] *)([a-zA-Z.]+))", re.I)
156 for r in rx.findall(query):
157 sval = None
158 stype = r[-1].lower()
159 if stype.find('srctype.') == -1:
160 stype = ".".join(["srctype", stype])
161 try:
162 sval = eval(stype)
163 sval = "%s%d" % (r[1], sval)
164 except:
165 continue
166 query = query.replace(r[0], sval)
167 taql = "SELECT FROM $1 WHERE " + query
168 self._settaql(taql)
169
170 def set_order(self, order):
171 """
172 Set the order the scantable should be sorted by.
173 Parameters:
174 order: The list of column names to sort by in order
175 """
176 self._setorder(order)
177
178 def set_rows(self, rows=[]):
179 """
180 Set a sequence of row numbers (0-based). Power users Only!
181 NOTICE row numbers can be changed easily by sorting,
182 prior selection, etc.
183 Parameters:
184 rows: a list of integers. Default [] is to unset the selection.
185 """
186 vec = _to_list(rows, int)
187 if isinstance(vec,list):
188 self._setrows(vec)
189 else:
190 raise TypeError('Unknown row number type. Use lists of integers.')
191
192 def set_types(self, types=[]):
193 """
194 Set a sequence of source types.
195 Parameters:
196 types: a list of integers. Default [] is to unset the selection.
197 """
198 vec = _to_list(types, int)
199 if isinstance(vec,list):
200 self._settypes(vec)
201 else:
202 raise TypeError('Unknown row number type. Use lists of integers.')
203
204 def set_msselection_field(self, selection):
205 """
206 Set a field selection in msselection syntax. The msselection
207 suppports the following syntax:
208
209 pattern match:
210 - UNIX style pattern match for source name using '*'
211 (compatible with set_name)
212
213 field id selection:
214 - simple number in string ('0', '1', etc.)
215 - range specification using '~' ('0~1', etc.)
216 - range specification using '>' or '<' in combination
217 with '=' ('>=1', '<3', etc.)
218
219 comma separated multiple selection:
220 - selections can be combined by using ',' ('0,>1',
221 'mysource*,2~4', etc.)
222 """
223 selection_list = map(string.strip, selection.split(','))
224 query_list = list(self.generate_query(selection_list))
225 if len(query_list) > 0:
226 original_query = self.get_query()
227 if len(original_query) == 0 or re.match('.*(SRC|FIELD)NAME.*',original_query):
228 query = 'SELECT FROM $1 WHERE ' + ' || '.join(query_list)
229 else:
230 query = 'SELECT FROM $1 WHERE (' + original_query + ') && (' + ' || '.join(query_list) + ')'
231 self._settaql(query)
232
233 def generate_query(self, selection_list):
234 for s in selection_list:
235 if s.isdigit() or re.match('^[<>]=?[0-9]*$', s) or \
236 re.match('^[0-9]+~[0-9]+$', s):
237 #print '"%s" is ID selection using < or <='%(s)
238 a = FieldIdRegexGenerator(s)
239 yield '(%s)'%(a.get_regex())
240 elif len(s) > 0:
241 #print '"%s" is UNIX style pattern match'%(s)
242 yield '(SRCNAME == pattern(\'%s\'))'%(s)
243
244 def get_scans(self):
245 return list(self._getscans())
246 def get_cycles(self):
247 return list(self._getcycles())
248 def get_beams(self):
249 return list(self._getbeams())
250 def get_ifs(self):
251 return list(self._getifs())
252 def get_pols(self):
253 return list(self._getpols())
254 def get_poltypes(self):
255 return list(self._getpoltypes())
256 def get_order(self):
257 return list(self._getorder())
258 def get_types(self):
259 return list(self._gettypes())
260 def get_rows(self):
261 return list(self._getrows())
262 def get_query(self):
263 prefix = "SELECT FROM $1 WHERE "
264 return self._gettaql().replace(prefix, "")
265
266 def get_name(self):
267 print "NYI"
268 s = self._gettaql()
269 return
270 def __str__(self):
271 out = ""
272 d = {"SCANNO": self.get_scans(),
273 "CYCLENO": self.get_cycles(),
274 "BEAMNO": self.get_beams(),
275 "IFNO": self.get_ifs(),
276 "Pol Type": self.get_poltypes(),
277 "POLNO": self.get_pols(),
278 "QUERY": self.get_query(),
279 "SRCTYPE": self.get_types(),
280 "ROWS": self.get_rows(),
281 "Sort Order": self.get_order()
282 }
283 for k,v in d.iteritems():
284 if v:
285 out += "%s: %s\n" % (k, v)
286 if len(out):
287 return out[:-1]
288 else:
289 return out
290
291 def __add__(self, other):
292 """
293 Merge two selections.
294 """
295 if self.is_empty():
296 return selector(other)
297 elif other.is_empty():
298 return selector(self)
299 union = selector()
300 gets = [[self._getscans(), other._getscans(), union._setscans],
301 [self._getcycles(), other._getcycles(),union._setcycles],
302 [self._getbeams(), other._getbeams(), union._setbeams],
303 [self._getifs(), other._getifs(), union._setifs],
304 [self._getpols(), other._getpols(), union._setpols]]
305 for v in gets:
306 vec = list(v[0]+v[1])
307 vec.sort()
308 v[2](unique(vec))
309 q = other.get_query()
310 qs = self.get_query()
311 if len(q) and len(qs):
312 union.set_query(qs +" AND " + q)
313 else:
314 if len(q):
315 union.set_query(q)
316 elif len(qs):
317 union.set_query(qs)
318 return union
319
320class FieldIdRegexGenerator(object):
321 def __init__(self, pattern):
322 if pattern.isdigit():
323 self.regex = 'FIELDNAME == regex(\'.+__%s$\')'%(pattern)
324 else:
325 self.regex = None
326 ineq = None
327 if pattern.find('<') >= 0:
328 ineq = '<'
329 s = pattern.strip().lstrip(ineq).lstrip('=')
330 if not s.isdigit():
331 raise RuntimeError('Invalid syntax: %s'%(pattern))
332 self.id = int(s) + (-1 if pattern.find('=') < 0 else 0)
333 self.template = string.Template('FIELDNAME == regex(\'.+__${reg}$\')')
334 elif pattern.find('>') >= 0:
335 ineq = '>'
336 s = pattern.strip().lstrip(ineq).lstrip('=')
337 if not s.isdigit():
338 raise RuntimeError('Invalid syntax: %s'%(pattern))
339 self.id = int(s) + (-1 if pattern.find('=') >= 0 else 0)
340 self.template = string.Template('FIELDNAME == regex(\'.+__[0-9]+$\') && FIELDNAME != regex(\'.+__${reg}$\')')
341 elif pattern.find('~') >= 0:
342 s = map(string.strip, pattern.split('~'))
343 if len(s) == 2 and s[0].isdigit() and s[1].isdigit():
344 id0 = int(s[0])
345 id1 = int(s[1])
346 if id0 == 0:
347 self.id = id1
348 self.template = string.Template('FIELDNAME == regex(\'.+__${reg}$\')')
349 else:
350 self.id = [id0-1,id1]
351 self.template = string.Template('FIELDNAME == regex(\'.+__${reg}$\') && FIELDNAME != regex(\'.+__${optreg}$\')')
352 else:
353 raise RuntimeError('Invalid syntax: %s'%(pattern))
354 else:
355 raise RuntimeError('Invalid syntax: %s'%(pattern))
356 #print 'self.id=',self.id
357
358 def get_regex(self):
359 if self.regex is not None:
360 # 'X'
361 return self.regex
362 elif isinstance(self.id, list):
363 # 'X~Y'
364 return self.template.safe_substitute(reg=self.__compile(self.id[1]),
365 optreg=self.__compile(self.id[0]))
366 else:
367 # '<(=)X' or '>(=)X'
368 return self.template.safe_substitute(reg=self.__compile(self.id))
369
370 def __compile(self, idx):
371 pattern = ''
372 if idx >= 0:
373 numerics = map(int,list(str(idx)))
374 #numerics.reverse()
375 num_digits = len(numerics)
376 #print 'numerics=',numerics
377 if num_digits == 1:
378 if numerics[0] == 0:
379 pattern = '0'
380 else:
381 pattern = '[0-%s]'%(numerics[0])
382 elif num_digits == 2:
383 pattern = '(%s)'%('|'.join(
384 list(self.__gen_two_digit_pattern(numerics))))
385 elif num_digits == 3:
386 pattern = '(%s)'%('|'.join(
387 list(self.__gen_three_digit_pattern(numerics))))
388 else:
389 raise RuntimeError('ID > 999 is not supported')
390 else:
391 raise RuntimeError('ID must be >= 0')
392 return pattern
393
394 def __gen_two_digit_pattern(self, numerics):
395 assert len(numerics) == 2
396 yield '[0-9]'
397 if numerics[0] == 2:
398 yield '1[0-9]'
399 elif numerics[0] > 2:
400 yield '[1-%s][0-9]'%(numerics[0]-1)
401 if numerics[1] == 0:
402 yield '%s%s'%(numerics[0],numerics[1])
403 else:
404 yield '%s[0-%s]'%(numerics[0],numerics[1])
405
406 def __gen_three_digit_pattern(self, numerics):
407 assert len(numerics) == 3
408 yield '[0-9]'
409 yield '[1-9][0-9]'
410 if numerics[0] == 2:
411 yield '1[0-9][0-9]'
412 elif numerics[0] > 2:
413 yield '[1-%s][0-9][0-9]'%(numerics[0]-1)
414 if numerics[1] == 0:
415 if numerics[2] == 0:
416 yield '%s00'%(numerics[0])
417 else:
418 yield '%s0[0-%s]'%(numerics[0],numerics[2])
419 else:
420 if numerics[1] > 1:
421 yield '%s[0-%s][0-9]'%(numerics[0],numerics[1]-1)
422 elif numerics[1] == 1:
423 yield '%s0[0-9]'%(numerics[0])
424 if numerics[0] == 0:
425 yield '%s%s%s'%(numerics[0],numerics[1],numerics[2])
426 else:
427 yield '%s%s[0-%s]'%(numerics[0],numerics[1],numerics[2])
Note: See TracBrowser for help on using the repository browser.