source: trunk/docs/Guide.tex @ 136

Last change on this file since 136 was 134, checked in by Matthew Whiting, 18 years ago

Added config.h to param.cc's include statements
Added new text to Guide referring to ability to use strings as well as
integers for flag input.

File size: 80.9 KB
Line 
1\documentclass[12pt,a4paper]{article}
2
3%%%%%% LINE SPACING %%%%%%%%%%%%
4\usepackage{setspace}
5\singlespacing
6%\onehalfspacing
7%\doublespacing
8
9%Define a test for doing PDF format -- use different code below
10\newif\ifPDF
11\ifx\pdfoutput\undefined\PDFfalse
12\else\ifnum\pdfoutput > 0\PDFtrue
13     \else\PDFfalse
14     \fi
15\fi
16
17\textwidth=161 mm
18\textheight=245 mm
19\topmargin=-15 mm
20\oddsidemargin=0 mm
21\parindent=6 mm
22
23\usepackage[sort]{natbib}
24\usepackage{lscape}
25\bibpunct[,]{(}{)}{;}{a}{}{,}
26
27\newcommand{\eg}{e.g.\ }
28\newcommand{\ie}{i.e.\ }
29\newcommand{\hi}{H{\sc i}}
30\newcommand{\hipass}{{\sc hipass}}
31\newcommand{\duchamp}{\emph{Duchamp}}
32\newcommand{\atrous}{\textit{{\`a} trous}}
33\newcommand{\Atrous}{\textit{{\`A} trous}}
34\newcommand{\diff}{{\rm d}}
35\newcommand{\entrylabel}[1]{\mbox{\textsf{\bf{#1:}}}\hfil}
36\newenvironment{entry}
37        {\begin{list}{}%
38                {\renewcommand{\makelabel}{\entrylabel}%
39                        \setlength{\labelwidth}{30mm}%
40                        \setlength{\labelsep}{5pt}%
41                        \setlength{\itemsep}{2pt}%
42                        \setlength{\parsep}{2pt}%
43                        \setlength{\leftmargin}{35mm}%
44                }%
45        }%
46{\end{list}}
47
48
49\title{Source Detection with \duchamp\ v1.0\\A User's Guide}
50\author{Matthew Whiting\\
51Australia Telescope National Facility\\CSIRO}
52\date{}
53
54% If we are creating a PDF, use different options for graphicx, hyperref.
55\ifPDF
56  \usepackage[pdftex]{graphicx,color}
57  \usepackage[pdftex]{hyperref}
58  \hypersetup{colorlinks=true,%             
59              citecolor=red,%
60              filecolor=red,%
61              linkcolor=red,%
62              urlcolor=red,%
63              }
64\else
65  \usepackage[dvips]{graphicx}
66  \usepackage[dvips]{hyperref}
67\fi
68
69\pagestyle{headings}
70\begin{document}
71
72\maketitle
73\thispagestyle{empty}
74\begin{figure}[!h]
75\begin{center}
76\includegraphics[width=\textwidth]{cover_image}
77\end{center}
78\end{figure}
79
80\newpage
81\tableofcontents
82
83\newpage
84\section{Introduction and getting going quickly}
85
86This document provides a user's guide to \duchamp, an object-finder
87for use on spectral-line data cubes. The basic execution of
88\duchamp\ is to read in a FITS data cube, find sources in the cube,
89and produce a text file of positions, velocities and fluxes of the
90detections, as well as a postscript file of the spectra of each
91detection.
92
93So, you have a FITS cube, and you want to find the sources in it. What
94do you do? The first step is to make an input file that contains the
95list of parameters. Brief and detailed examples are shown in
96Appendix~\ref{app-input}. This file provides the input file name, the various
97output files, and defines various parameters that control the
98execution.
99
100The standard way to run \duchamp\ is by the command
101\begin{quote}
102\texttt{Duchamp -p [parameter file]}
103\end{quote}
104replacing \texttt{[parameter file]} with the name of the file listing
105the parameters. Alternatively, you can use the syntax
106\begin{quote}
107\texttt{Duchamp -f [FITS file]}
108\end{quote}
109where \texttt{[FITS file]} is the file you wish to search. In the latter
110case, all parameters will take their default values detailed in
111Appendix~\ref{app-param}. In either case, the program will then work
112away and give you the list of detections and their spectra. The
113program execution is summarised below, and detailed in
114\S\ref{sec-flow}. Information on inputs is in \S\ref{sec-param} and
115Appendix~\ref{app-param}, and descriptions of the output is in
116\S\ref{sec-output}.
117
118\subsection{A summary of the execution steps}
119
120The basic flow of the program is summarised here -- all steps are
121discussed in more detail in the following sections.
122\begin{enumerate}
123\item If the \texttt{-p} option is used, the parameter file given on
124  the command line is read in, and the parameters absorbed.
125\item The FITS image is located and read in to memory.
126\item If requested, a FITS image with a previously reconstructed array
127  is read in.
128\item If requested, blank pixels are trimmed from the edges, and
129  the baseline of each spectrum is removed.
130\item If the reconstruction method is requested, and the reconstructed
131  array has not been read in at Step 3 above, the cube is
132  reconstructed using the \atrous\ wavelet method.
133\item Searching for objects then takes place, using the requested
134  thresholding method.
135\item The list of objects is condensed by merging neighbouring objects
136  and removing those deemed unacceptable.
137\item The baselines and trimmed pixels are replaced prior to output.
138\item The details of the detections are written to screen and to the
139  requested output file.
140\item Maps showing the spatial location of the detections are written.
141\item The integrated spectra of each detection are written to a
142  postscript file.
143\item If requested, the reconstructed array can be written to a new
144  FITS file.
145\end{enumerate}
146
147\subsection{Guide to terminology}
148
149First, a brief note on the use of terminology in this guide. \duchamp\
150is designed to work on FITS ``cubes''. These are FITS\footnote{FITS is
151the Flexible Image Transport System -- see \citet{hanisch01} or
152websites such as
153\href{http://fits.cv.nrao.edu/FITS.html}{http://fits.cv.nrao.edu/FITS.html}
154for details.} image arrays with three dimensions -- they are assumed
155to have the following form: the first two dimensions (referred to as
156$x$ and $y$) are spatial directions (that is, relating to the position
157on the sky), while the third dimension, $z$, is the spectral
158direction, which can correspond to frequency, wavelength, or
159velocity. The three dimensional analogue of pixels are ``voxels'', or
160volume cells -- a voxel is defined by a unique $(x,y,z)$ location and
161has a unique flux or intensity value associated with it.
162
163Each spatial pixel (a given $(x,y)$ coordinate) can be said to be a
164single spectrum, while a slice through the cube perpendicular to the
165spectral direction at a given $z$-value is a single channel (the 2-D
166image is a channel map).
167
168Detection involves locating a contiguous group of voxels with fluxes
169above a certain threshold. \duchamp\ makes no assumptions as to the
170size or shape of the detected features, other than having
171user-selected minimum size criteria.
172
173Features that are detected are assumed to be positive. The user can
174choose to search for negative features by setting an input parameter
175-- this inverts the cube prior to the search (see
176\S\ref{sec-detection} for details).
177
178Note that it is possible to run \duchamp\ on a two-dimensional image
179(\ie one with no frequency or velocity information), or indeed a
180one-dimensional array, and many of the features of the program will
181work fine. The focus, however, is on object detection in three
182dimensions.
183
184\subsection{Why \duchamp?}
185
186Well, it's important for a program to have a name, and the initial
187working title of \emph{cubefind} was somewhat uninspiring. I wanted to
188avoid the classic astronomical approach of designing a cute acronym,
189and since it is designed to work on cubes, I looked at naming it after
190a cubist. \emph{Picasso}, sadly, was already taken \citep{minchin99},
191so I settled on naming it after Marcel Duchamp, another cubist, but
192also one of the first artists to work with ``found objects''.
193
194\section{User Inputs}
195\label{sec-param}
196
197Input to the program is provided by means of a parameter
198file. Parameters are listed in the file, followed by the value that
199should be assigned to them. The syntax used is \texttt{paramName
200value}. Parameter names are not case-sensitive, and lines in the input
201file that start with \texttt{\#} are ignored. If a parameter is listed
202more than once, the latter value is used, but otherwise the order in
203which the parameters are listed in the input file is
204arbitrary. Example input files can be seen in
205Appendix~\ref{app-input}.
206
207If a parameter is not listed, the default value is assumed. The
208defaults are chosen to provide a good result (using the reconstruction
209method), so the user doesn't need to specify many new parameters in
210the input file. Note that the image file \textbf{must} be specified! The
211parameters that can be set are listed in Appendix~\ref{app-param},
212with their default values in parentheses.
213
214The parameters with names starting with \texttt{flag} are stored as
215\texttt{bool} variables, and so are either \texttt{true = 1} or
216\texttt{false = 0}. They can be entered in the file either in text or
217integer format -- \duchamp\ will read them correctly in either case.
218
219\section{What \duchamp\ is doing}
220\label{sec-flow}
221
222The execution flow of \duchamp\ is detailed here, indicating the
223main algorithmic steps that are used. The program is written in C/C++
224and makes use of the {\sc cfitsio}, {\sc wcslib} and {\sc pgplot}
225libraries.
226
227\subsection{Image input}
228\label{sec-input}
229
230The cube is read in using basic {\sc cfitsio} commands, and stored as
231an array in a special C++ class. This class keeps track of
232the list of detected objects, as well as any reconstructed arrays that
233are made (see \S\ref{sec-recon}). The World Coordinate System (WCS)
234information for the cube is also obtained from the FITS header by {\sc
235wcslib} functions \citep{greisen02, calabretta02}, and this
236information, in the form of a \texttt{wcsprm} structure, is also stored
237in the same class.
238
239A sub-section of an image can be requested via the \texttt{subsection}
240parameter in the parameter file -- this can be a good idea if the cube
241has very noisy edges, which may produce many spurious detections. The
242generalised form of the subsection that is used by {\sc cfitsio} is
243\texttt{[x1:x2:dx,y1:y2:dy,z1:z2:dz]}, such that the x-coordinates run
244from \texttt{x1} to \texttt{x2} (inclusive), with steps of
245\texttt{dx}. The step value can be omitted (so a subsection of the
246form \texttt{[2:50,2:50,10:1000]} is still valid). \duchamp\ does not
247make use of any step value present in the subsection string, and any
248that are present are removed before the file is opened.
249
250If one wants the full range of a coordinate then replace the range
251with an asterisk, \eg \texttt{[2:50,2:50,*]}. If one wants to use a
252subsection, one must set \texttt{flagSubsection = 1}. A complete
253description of the section syntax can be found at the {\sc fitsio} web
254site
255\footnote{
256\href{http://heasarc.gsfc.nasa.gov/docs/software/fitsio/c/c\_user/node90.html}%
257{http://heasarc.gsfc.nasa.gov/docs/software/fitsio/c/c\_user/node90.html}}.
258
259\subsection{Image modification}
260\label{sec-modify}
261
262Several modifications to the cube can be made that improve the
263execution and efficiency of \duchamp\ (these are optional -- their
264use is indicated by the relevant flags set in the input parameter
265file).
266
267\subsubsection{Blank pixel removal}
268
269First, the cube is trimmed of any BLANK pixels that pad the image out
270to a rectangular shape. This is optional, its use determined by the
271\texttt{flagBlankPix} parameter. The value for these pixels is read from
272the FITS header (using the BLANK, BSCALE and BZERO keywords), but if
273these are not present then the value can be specified by the user in
274the parameter file using \texttt{blankPixValue}.
275
276This stage is particularly important for the reconstruction step, as
277lots of BLANK pixels on the edges will smooth out features in the
278wavelet calculation stage. The trimming will also reduce the size of
279the cube's array, speeding up the execution. The amount of trimming is
280recorded, and these pixels are added back in once the source-detection
281is completed (so that quoted pixel positions are applicable to the
282original cube).
283
284Rows and columns are trimmed one at a time until the first non-BLANK
285pixel is reached, so that the image remains rectangular. In practice,
286this means that there will be BLANK pixels left in the trimmed image
287(if the non-BLANK region is non-rectangular). However, these are
288ignored in all further calculations done on the cube.
289
290\subsubsection{Baseline removal}
291
292Second, the user may request the removal of baselines from the
293spectra, via the parameter \texttt{flagBaseline}. This may be necessary
294if there is a strong baseline ripple present, which can result in
295spurious detections at the high points of the ripple. The baseline is
296calculated from a wavelet reconstruction procedure (see
297\S\ref{sec-recon}) that keeps only the two largest scales. This is
298done separately for each spatial pixel (\ie for each spectrum in the
299cube), and the baselines are stored and added back in before any
300output is done. In this way the quoted fluxes and displayed spectra
301are as one would see from the input cube itself -- even though the
302detection (and reconstruction if applicable) is done on the
303baseline-removed cube.
304
305The presence of very strong signals (for instance, masers at several
306hundred Jy) can affect the determination of the baseline, leading to a
307large dip centred on the signal in the baseline-subtracted
308spectrum. To prevent this, the signal is trimmed prior to the
309reconstruction process at some standard threshold (at $8\sigma$ above
310the mean). The baseline determined should thus be representative of
311the true, signal-free baseline. Note that this trimming is only a
312temporary measure which does not affect the source-detection.
313
314\subsubsection{Ignoring bright Milky Way emission}
315
316Finally, a single set of contiguous channels can be ignored -- these
317may exhibit very strong emission, such as that from the Milky Way as
318seen in extragalactic \hi\ cubes (hence the references to ``Milky
319Way'' in relation to this task -- apologies to Galactic
320astronomers!). Such dominant channels will produce many detections
321that are unnecessary, uninteresting (if one is interested in
322extragalactic \hi) and large (in size and hence in memory usage), and
323so will slow the program down and detract from the interesting
324detections. The use of this feature is controlled by the
325\texttt{flagMW} parameter, and the exact channels concerned are able
326to be set by the user (using \texttt{maxMW} and \texttt{minMW} --
327these give an inclusive range of channels). When employed, these
328channels are temporarily blanked out for the searching, and the
329scaling of the spectral output (see Fig.~\ref{fig-spect}) will not
330take them into account. They will be present in the reconstructed
331array, however, and so will be included in the saved FITS file (see
332\S\ref{sec-reconIO}). When the final spectra are plotted, the range of
333channels covered by these parameters is indicated by a green hashed
334box.
335
336\subsection{Image reconstruction}
337\label{sec-recon}
338
339The user can direct \duchamp\ to reconstruct the data cube using the
340\atrous\ wavelet procedure. A good description of the procedure can be
341found in \citet{starck02:book}. The reconstruction is an effective way
342of removing a lot of the noise in the image, allowing one to search
343reliably to fainter levels, and reducing the number of spurious
344detections. This is an optional step, but one that greatly enhances
345the source-detection process, with the payoff that it can be
346relatively time- and memory-intensive.
347
348\subsubsection{Algorithm}
349
350The steps in the \atrous\ reconstruction are as follows:
351\begin{enumerate}
352\item Set the reconstructed array to 0 everywhere.
353\item The input array is discretely convolved with a given filter
354  function. This is determined from the parameter file via the
355  \texttt{filterCode} parameter -- see Appendix~\ref{app-param} for
356  details on the filters available.
357\item The wavelet coefficients are calculated by taking the difference
358  between the convolved array and the input array.
359\item If the wavelet coefficients at a given point are above the
360  requested threshold (given by \texttt{snrRecon} as the number of
361  $\sigma$ above the mean and adjusted to the current scale -- see
362  Appendix~\ref{app-scaling}), add these to the reconstructed array.
363\item The separation of the filter coefficients is doubled. (Note that
364  this step provides the name of the procedure\footnote{\atrous\ means
365  ``with holes'' in French.}, as gaps or holes are created in the
366  filter coverage.)
367\item The procedure is repeated from step 2, using the convolved array
368  as the input array.
369\item Continue until the required maximum number of scales is reached.
370\item Add the final smoothed (\ie convolved) array to the
371  reconstructed array. This provides the ``DC offset'', as each of the
372  wavelet coefficient arrays will have zero mean.
373\end{enumerate}
374
375The reconstruction has at least two iterations. The first iteration
376makes a first pass at the wavelet reconstruction (the process outlined
377in the 8 stages above), but the residual array will inevitably have
378some structure still in it, so the wavelet filtering is done on the
379residual, and any significant wavelet terms are added to the final
380reconstruction. This step is repeated until the change in the $\sigma$
381of the background is less than some fiducial amount.
382
383It is important to note that the \atrous\ decomposition is an
384example of a ``redundant'' transformation. If no thresholding is
385performed, the sum of all the wavelet coefficient arrays and the final
386smoothed array is identical to the input array. The thresholding thus
387removes only the unwanted structure in the array.
388
389Note that any BLANK pixels that are still in the cube will not be
390altered by the reconstruction -- they will be left as BLANK so that
391the shape of the valid part of the cube is preserved.
392
393\subsubsection{Note on Statistics}
394
395The correct calculation of the reconstructed array needs good
396estimation of the underlying mean and standard deviation of the
397background noise distribution. These statistics are estimated using
398robust methods, to avoid corruption by strong outlying points. The
399mean of the distribution is actually estimated by the median, while
400the median absolute deviation from the median (MADFM) is calculated
401and corrected assuming Gaussianity to estimate the underlying standard
402deviation $\sigma$. The Gaussianity (or Normality) assumption is
403critical, as the MADFM does not give the same value as the usual rms
404or standard deviation value -- for a normal distribution
405$N(\mu,\sigma)$ we find MADFM$=0.6744888\sigma$. The difference
406between the MADFM and $\sigma$ is corrected for, so the user need only
407think in the usual multiples of $\sigma$ when setting
408\texttt{snrRecon}. See Appendix~\ref{app-madfm} for a derivation of
409this value.
410
411When thresholding the different wavelet scales, the value of $\sigma$
412as measured from the wavelet array needs to be scaled to account for the
413increased amount of correlation between neighbouring pixels (due to
414the convolution). See Appendix~\ref{app-scaling} for details on this
415scaling.
416
417\subsubsection{User control of reconstruction parameters}
418
419The most important parameter for the user to select in relation to the
420reconstruction is the threshold for each wavelet array. This is set
421using the \texttt{snrRecon} parameter, and is given as a multiple of the
422rms (estimated by the MADFM) above the mean (which for the wavelet
423arrays should be approximately zero). There are several other
424parameters that can be altered as well that affect the outcome of the
425reconstruction.
426
427By default, the cube is reconstructed in three dimensions, using a
4283-dimensional filter and 3-dimensional convolution. This can be
429altered, however, using the parameter \texttt{reconDim}. If set to 1,
430this means the cube is reconstructed by considering each spectrum
431separately, whereas \texttt{reconDim=2} will mean the cube is
432reconstructed by doing each channel map separately. The merits of
433these choices are discussed in \S\ref{sec-notes}, but it should be
434noted that a 2-dimensional reconstruction can be susceptible to edge
435effects if the spatial shape is not rectangular.
436
437The user can also select the minimum scale to be used in the
438reconstruction -- the first scale exhibits the highest frequency
439variations, and so ignoring this one can sometimes be beneficial in
440removing excess noise. The default, however, is to use all scales
441(\texttt{minscale = 1}).
442
443Finally, the filter that is used for the convolution can be selected
444by using \texttt{filterCode} and the relevant code number -- the
445choices are listed in Appendix~\ref{app-param}. A larger filter will
446give a better reconstruction, but take longer and use more memory when
447executing. When multi-dimensional reconstruction is selected, this
448filter is used to construct a 2- or 3-dimensional equivalent.
449
450\subsection{Reconstruction I/O}
451\label{sec-reconIO}
452
453The reconstruction stage can be relatively time-consuming, particularly
454for large cubes and reconstructions in 3-D. To get around this, \duchamp\
455provides a shortcut to allow users to perform multiple searches (\eg with
456different thresholds) on the same reconstruction without calculating the
457reconstruction each time.
458
459The first step is to choose to save the reconstructed array as a FITS
460file by setting \texttt{flagOutputRecon = true}. The file will be saved
461in the same directory as the input image, so the user needs to have write
462permissions for that directory.
463
464The filename will be derived from the input filename, with extra
465information detailing the reconstruction that has been done. For
466example, suppose \texttt{image.fits} has been reconstructed using a
4673-dimensional reconstruction with filter 2, thresholded at $4\sigma$
468using all scales. The output filename will then be
469\texttt{image.RECON-3-2-4-1.fits} (\ie it uses the four parameters
470relevant for the \atrous\ reconstruction as listed in
471Appendix~\ref{app-param}). The new FITS file will also have these
472parameters as header keywords. If a subsection of the input image has
473been used (see \S\ref{sec-input}), the format of the output filename
474will be \texttt{image.sub.RECON-3-2-4-1.fits}, and the subsection that
475has been used is also stored in the FITS header.
476
477Likewise, the residual image, defined as the difference between the input
478and reconstructed arrays, can also be saved in the same manner by setting
479\texttt{flagOutputResid = true}. Its filename will be the same as above,
480with RESID replacing RECON.
481
482If a reconstructed image has been saved, it can be read in and used
483instead of redoing the reconstruction. To do so, the user should set
484\texttt{flagReconExists = true}. The user can indicate the name of the
485reconstructed FITS file using the \texttt{reconFile} parameter, or, if
486this is not specified, \duchamp\ searches for the file with the name
487as defined above. If the file is not found, the reconstruction is
488performed as normal. Note that to do this, the user needs to set
489\texttt{flagAtrous = true} (obviously, if this is \texttt{false}, the
490reconstruction is not needed).
491
492\subsection{Searching the image}
493\label{sec-detection}
494
495The image is searched for detections in two ways: spectrally (a
4961-dimensional search in the spectrum in each spatial pixel), and
497spatially (a 2-dimensional search in the spatial image in each
498channel). In both cases, the algorithm finds connected pixels that are
499above the user-specified threshold. In the case of the spatial image
500search, the algorithm of \citet{lutz80} is used to raster scan through
501the image and connect groups of pixels on neighbouring rows.
502
503Note that this algorithm cannot be applied directly to a 3-dimensional
504case, as it requires that objects are completely nested in a row: that
505is, if you are scanning along a row, and one object finishes and
506another starts, you know that you will not get back to the first one
507(if at all) until the second is completely finished for that
508row. Three-dimensional data does not have this property, which is why
509we break up the searching into 1- and 2-dimensional cases.
510
511The determination of the threshold is done in one of two ways. The
512first way is a simple sigma-clipping, where a threshold is set at a
513fixed number $n$ of standard deviations above the mean, and pixels
514above this threshold are flagged as detected. The value of $n$ is set
515with the parameter \texttt{snrCut}. As before, the value of the
516standard deviation is estimated by the MADFM, and corrected by the
517ratio derived in Appendix~\ref{app-madfm}.
518
519The second method uses the False Discovery Rate (FDR) technique
520\citep{miller01,hopkins02}, whose basis we briefly detail here. The
521false discovery rate (given by the number of false detections divided
522by the total number of detections) is fixed at a certain value
523$\alpha$ (\eg $\alpha=0.05$ implies 5\% of detections are false
524positives). In practice, an $\alpha$ value is chosen, and the ensemble
525average FDR (\ie $\langle FDR \rangle$) when the method is used will
526be less than $\alpha$.  One calculates $p$ -- the probability,
527assuming the null hypothesis is true, of obtaining a test statistic as
528extreme as the pixel value (the observed test statistic) -- for each
529pixel, and sorts them in increasing order. One then calculates $d$
530where
531\[
532d = \max_j \left\{ j : P_j < \frac{j\alpha}{c_N N} \right\},
533\]
534and then rejects all hypotheses whose $p$-values are less than or equal
535to $P_d$. (So a $P_i<P_d$ will be rejected even if $P_i \geq
536j\alpha/c_N N$.) Note that ``reject hypothesis'' here means ``accept
537the pixel as an object pixel'' (\ie we are rejecting the null
538hypothesis that the pixel belongs to the background).
539
540The $c_N$ values here are normalisation constants that depend on the
541correlated nature of the pixel values. If all the pixels are
542uncorrelated, then $c_N=1$. If $N$ pixels are correlated, then their
543tests will be dependent on each other, and so $c_N = \sum_{i=1}^N
544i^{-1}$. \citet{hopkins02} consider real radio data, where the pixels
545are correlated over the beam. In this case the sum is made over the
546$N$ pixels that make up the beam. The value of $N$ is calculated from
547the FITS header (if the correct keywords -- BMAJ, BMIN -- are not
548present, a default value of 10 pixels is assumed).
549
550The theory behind the FDR method implies a direct connection between the
551choice of $\alpha$ and the fraction of detections that will be false
552positives. However, due to the merging process, this direct connection is
553lost when looking at the final number of detections -- see discussion in
554\S\ref{sec-notes}. The effect is that the number of false detections will
555be less than indicated by the $\alpha$ value used.
556
557If a reconstruction has been made, the residuals (defined as original
558$-$ reconstruction) are used to estimate the noise parameters of the
559cube. Otherwise they are estimated directly from the cube itself. In
560both cases, robust estimators are used as described above.
561
562Detections must have a minimum number of pixels to be counted. This
563minimum number is given by the input parameters \texttt{minPix} (for
5642-dimensional searches) and \texttt{minChannels} (for 1-dimensional
565searches).
566
567The search only looks for positive features. If one is interested
568instead in negative features (such as absorption lines), set the
569parameter \texttt{flagNegative = true}. This will invert the cube (\ie
570multiply all pixels by $-1$) prior to the search, and then re-invert
571the cube (and the fluxes of any detections) after searching is
572complete. All outputs are done in the same manner as normal, so that
573fluxes of detections will be negative.
574
575\subsection{Merging detected objects}
576\label{sec-merger}
577
578The searching step produces a list of detected objects that will have many
579repeated detections of a given object -- for instance, spectral
580detections in adjacent pixels of the same object and/or spatial
581detections in neighbouring channels. These are then combined in an
582algorithm that matches all objects judged to be ``close''. This
583determination is made in one of two ways.
584
585One way is to define two thresholds -- one spatial and one in velocity
586-- and say that two objects should be merged if there is at least one
587pair of pixels that lie within these threshold distances of each
588other. These thresholds are specified by the parameters
589\texttt{threshSpatial} and \texttt{threshVelocity} (in units of pixels
590and channels respectively).
591
592Alternatively, the spatial requirement can be changed to say that
593there must be a pair of pixels that are \emph{adjacent} -- a stricter,
594but perhaps more realistic requirement, particularly when the spatial pixels
595have a large angular size (as is the case for \hi\ surveys). This
596method can be selected by setting the parameter
597\texttt{flagAdjacent} to 1 (\ie \texttt{true}) in the parameter file. The
598velocity thresholding is done in the same way as the first option.
599
600Once the detections have been merged, they may be ``grown''. This is a
601process of increasing the size of the detection by adding adjacent
602pixels that are above some secondary threshold. This threshold is
603lower than the one used for the initial detection, but above the noise
604level, so that faint pixels are only detected when they are close to a
605bright pixel. The value of this threshold is a possible input
606parameter (\texttt{growthCut}), with a default value of $1.5\sigma$. The
607use of the growth algorithm is controlled by the \texttt{flagGrowth}
608parameter -- the default value of which is \texttt{false}. If the
609detections are grown, they are sent through the merging algorithm a
610second time, to pick up any detections that now overlap or have grown
611over each other.
612
613Finally, to be accepted, the detections must span \emph{both} a minimum
614number of channels (to remove any spurious single-channel spikes that
615may be present), and a minimum number of spatial pixels. These
616numbers, as for the original detection step, are set with the
617\texttt{minChannels} and \texttt{minPix} parameters. The channel
618requirement means there must be at least one set of \texttt{minChannels}
619consecutive channels in the source for it to be accepted.
620
621\section{Outputs}
622\label{sec-output}
623
624\subsection{During execution}
625
626\duchamp\ provides the user with feedback whilst it is running, to
627keep the user informed on the progress of the analysis. Most of this
628consists of self-explanatory messages about the particular stage the
629program is up to. The relevant parameters are printed to the screen at
630the start (once the file has been successfully read in), so the user
631is able to make a quick check that the setup is correct (see
632Appendix~{app-input} for an example).
633
634If the cube is being trimmed (\S\ref{sec-modify}), the resulting
635dimensions are printed to indicate how much has been trimmed. If a
636reconstruction is being done, a continually updating message shows
637either the current iteration and scale, compared to the maximum scale
638(when \texttt{reconDim=3}), or a progress bar showing the amount of
639the cube that has been reconstructed (for smaller values of
640\texttt{reconDim}).
641
642During the searching algorithms, the progress through the 1D and 2D
643searches are shown. When the searches have completed,
644the number of objects found in both the 1D and 2D searches are
645reported (see \S\ref{sec-detection} for details).
646
647In the merging process (where multiple detections of the same object
648are combined -- see \S\ref{sec-merger}), two stages of output
649occur. The first is when each object in the list is compared with all
650others. The output shows two numbers: the first being how far through
651the list the current object is, and the second being the length of the
652list. As the algorithm proceeds, the first number should increase and
653the second should decrease (as objects are combined). When the numbers
654meet (\ie the whole list has been compared), the second phase begins,
655in which multiply-appearing pixels in each object are removed, as are
656objects not meeting the minimum channels requirement. During this
657phase, the total number of accepted objects is shown, which should
658steadily increase until all have been accepted or rejected. Note that
659these steps can be very quick for small numbers of detections.
660
661Since this continual printing to screen has some overhead of time and
662CPU involved, the user can elect to not print this information by
663setting the parameter \texttt{verbose = 0}. In this case, the user is
664still informed as to the steps being undertaken, but the details of
665the progress are not shown.
666
667\subsection{Results}
668
669\subsubsection{Table of Results}
670
671Finally, we get to the results -- the reason for running \duchamp\ in
672the first place. Once the detection list is finalised, it is sorted by
673the mean velocity of the detections (or, if there is no good WCS
674associated with the cube, by the mean Z-pixel position). The results
675are then printed to the screen and to the output file, given by the
676\texttt{OutFile} parameter. The results list, an example of which can be
677seen in Appendix~\ref{app-output}, contains the following columns
678(note that the title of the columns depending on WCS information will
679depend on the projection of the WCS):
680
681\begin{entry}
682\item[Obj\#] The ID number of the detection (simply the sequential
683  count for the list, which is ordered by increasing velocity).
684\item[Name] The IAU-format name of the detection (derived from the WCS
685  position -- see below for a description of the format).
686\item[X] The average X-pixel position.
687\item[Y] The average Y-pixel position.
688\item[Z] The average Z-pixel position.
689\item[RA/GLON] The Right Ascension or Galactic Longitude of the centre
690of the object.
691\item[DEC/GLAT] The Declination or Galactic Latitude of the centre of
692the object.
693\item[VEL] The mean velocity of the object [units given by the
694  \texttt{spectralUnits} parameter].
695\item[w\_RA/w\_GLON] The width of the object in Right Ascension or
696Galactic Longitude [arcmin].
697\item[w\_DEC/w\_GLAT] The width of the object in Declination Galactic
698  Latitude [arcmin].
699\item[w\_VEL] The full velocity width of the detection (max channel
700  $-$ min channel, in velocity units [see note below]).
701\item[F\_int] The integrated flux over the object, in the units of
702  flux times velocity, corrected for the beam if necessary.
703\item[F\_peak] The peak flux over the object, in the units of flux.
704\item[X1, X2] The minimum and maximum X-pixel coordinates.
705\item[Y1, Y2] The minimum and maximum Y-pixel coordinates.
706\item[Z1, Z2] The minimum and maximum Z-pixel coordinates.
707\item[Npix] The number of voxels (\ie distinct $(x,y,z)$ coordinates)
708  in the detection.
709\item[Flag] Whether the detection has any warning flags (see below).
710\end{entry}
711The Name is derived from the WCS position. For instance, a source
712centred on the RA,Dec position 12$^h$53$^m$45$^s$,
713-36$^\circ$24$'$12$''$ will be called J125345$-$362412 (if the epoch
714is J2000) or B125345$-$362412 (if B1950). An alternative form is used
715for Galactic coordinates: a source centred on the position ($l$,$b$) =
716(323.1245, 5.4567) will be called G323.124$+$05.457. If the WCS is not
717valid (\ie is not present or does not have all the necessary
718information), the Name, RA, DEC, VEL and related columns are not
719printed, but the pixel coordinates are still provided.
720
721The velocity units can be specified by the user, using the parameter
722\texttt{spectralUnits} (enter it as a single string). The default value
723is km/s, which should be suitable for most users. These units are also
724used to give the units of integrated flux.
725
726The last column contains any warning flags about the detection. There
727are currently two options here. An `E' is printed if the detection is
728next to the edge of the image, meaning either the limit of the pixels,
729or the limit of the non-BLANK pixel region. An `N' is printed if the
730total flux, summed over all the (non-BLANK) pixels in the smallest box
731that completely encloses the detection, is negative. Note that this
732sum is likely to include non-detected pixels. It is of use in
733pointing out detections that lie next to strongly negative pixels,
734such as might arise due to interference -- the detected pixels might
735then also be due to the interference, so caution is advised.
736
737\subsubsection{Other results lists}
738
739Two alternative results files can also be requested. One option is a
740VOTable-format XML file, containing just the RA, Dec, Velocity and the
741corresponding widths of the detections, as well as the fluxes. The
742user should set \texttt{flagVOT = 1}, and put the desired filename in the
743parameter \texttt{votFile} -- note that the default is for it not to be
744produced. This file should be compatible with all Virtual Observatory
745tools (such as Aladin\footnote{ Aladin can be found on the web at
746\href{http://aladin.u-strasbg.fr/}{http://aladin.u-strasbg.fr/}}). The
747second option is an annotation file for use with the Karma toolkit of
748visualisation tools (in particular, with \texttt{kvis}). This will draw a
749circle at the position of each detection, and number it according to
750the Obj\# given above. To make use of this option, the user should
751set \texttt{flagKarma = 1}, and put the desired filename in the parameter
752\texttt{karmaFile} -- again, the default is for it not to be produced.
753
754As the program is running, it also (optionally) records the detections
755made in each individual spectrum or channel (see \S\ref{sec-detection}
756for details on this process). This is recorded in the file given by
757the parameter \texttt{LogFile}. This file does not include the columns
758\texttt{Name, RA, DEC, w\_RA, w\_DEC, VEL, w\_VEL}. This file is
759designed primarily for diagnostic purposes: \eg to see if a given set
760of pixels is detected in, say, one channel image, but does not survive
761the merging process. The list of pixels (and their fluxes) in the
762final detection list are also printed to this file, again for
763diagnostic purposes. The file also records the execution time, as well
764as the command-line statement used to run \duchamp. The creation of
765this log file can be prevented by setting \texttt{flagLog =
766false}. (This may be a good idea if you are not interested in its
767contents, as it can be a large file if many pixels are being
768detected.)
769
770\subsubsection{Graphical output -- spectra}
771
772As well as the output data file, a postscript file is created that
773shows the spectrum for each detection, together with a small cutout
774image (the 0th moment) and basic information about the detection (note
775that any flags are printed after the name of the detection, in the
776format \texttt{[E]}). If the cube was reconstructed, the spectrum from
777the reconstruction is shown in red, over the top of the original
778spectrum. The spectral extent of the detected object is indicated by
779two dashed blue lines, and the region covered by the ``Milky Way''
780channels is shown by a green hashed box.
781
782The spectrum that is plotted is governed by the
783\texttt{spectralMethod} parameter. It can be either \texttt{peak},
784where the spectrum is from the spatial pixel containing the
785detection's peak flux; or \texttt{sum}, where the spectrum is summed
786over all spatial pixels, and then corrected for the beam size.
787
788The spectral extent of the detection is indicated with blue lines, and
789a zoom is shown in a separate window. The cutout image can optionally
790include a border around the spatial pixels that are in the detection
791(turned on and off by the parameter \texttt{drawBorders} -- the
792default is \texttt{true}). It also includes a scale bar in the bottom
793left corner to indicate size -- it is 15~arcmin long (note that due to
794projection effects it may be a slightly different physical length from
795object to object). An example detection can be seen below in
796Fig.~\ref{fig-spect}.
797
798\begin{figure}[t]
799\begin{center}
800\includegraphics[width=\textwidth]{example_spectrum}
801\end{center}
802\caption{\footnotesize An example of the spectrum output. Note several
803  of the features discussed in the text: the red lines indicating the
804  reconstructed spectrum; the blue dashed lines indicating the
805  spectral extent of the detection; the green hashed area indicating
806  the Milky Way channels that are ignored by the searching algorithm;
807  the blue border showing its spatial extent on the 0th moment map;
808  and the 15~arcmin-long scale bar.}
809\label{fig-spect}
810\end{figure}
811
812\subsubsection{Graphical output -- maps}
813
814\begin{figure}[!t]
815\begin{center}
816\includegraphics[width=\textwidth]{example_moment_map}
817\end{center}
818\caption{\footnotesize An example of the moment map created by
819  \duchamp. The full extent of the cube is covered, and the 0th moment
820  of each object is shown (integrated individually over all the
821  detected channels).}
822\label{fig-moment}
823\end{figure}
824
825Finally, a couple of images are optionally produced: a 0th moment map
826of the cube, combining just the detected channels in each object,
827showing the integrated flux in grey-scale; and a ``detection image'',
828a grey-scale image where the pixel values are the number of channels
829that spatial pixel is detected in. In both cases, if
830\texttt{drawBorders = true}, a border is drawn around the spatial
831extent of each detection. An example moment map is shown in
832Fig.~\ref{fig-moment}.  The production or otherwise of these images is
833governed by the \texttt{flagMaps} parameter.
834
835The purpose of these images are to provide a visual guide to where the
836detections have been made, and, particularly in the case of the moment
837map, to provide an indication of the strength of the source. In both
838cases, the detections are numbered (in the same sense as the output
839list), and the spatial borders are marked out as for the cutout images
840in the spectra file. Both these images are saved as postscript files
841(given by the parameters \texttt{momentMap} and \texttt{detectionMap}
842respectively), with the latter also displayed in a {\sc pgplot}
843window (regardless of the state of \texttt{flagMaps}).
844
845\section{Notes and hints on the use of \duchamp}
846\label{sec-notes}
847
848In using \duchamp, the user has to make a number of decisions about
849the way the program runs. This section is designed to give the user
850some idea about what to choose.
851
852The main choice is whether or not to use the wavelet
853reconstruction. The main benefits of this are the marked reduction in
854the noise level, leading to regularly-shaped detections, and good
855reliability for faint sources. The main drawback with its use is the
856long execution time: to reconstruct a $170\times160\times1024$
857(\hipass) cube often requires three iterations and takes about 20-25
858minutes to run completely. Note that this is for the three-dimensional
859reconstruction: using \texttt{reconDim=1} makes the reconstruction
860quicker (the full program then takes about 6 minutes), but it is still
861the largest part of the time.
862
863The searching part of the procedure is much quicker: searching an
864un-reconstructed cube leads to execution times of only a couple of
865minutes. Alternatively, using the ability to read in previously-saved
866reconstructed arrays makes running the reconstruction more than once a
867more feasible prospect.
868
869On the positive side, the shape of the detections in a cube that has
870been reconstructed will be much more regular and smooth -- the ragged
871edges that objects in the raw cube possess are smoothed by the removal
872of most of the noise. This enables better determination of the shapes
873and characteristics of objects.
874
875A further point to consider when using the reconstruction is that if
876the two-dimensional reconstruction is chosen (\texttt{reconDim=2}), it
877can be susceptible to edge effects. If the valid area in the cube (\ie
878the part that is not BLANK) has non-rectangular edges, the convolution
879can produce artefacts in the reconstruction that mimic the edges and
880can lead (depending on the selection threshold) to some spurious
881sources. Caution is advised with such data -- the user is advised to
882check carefully the reconstructed cube for the presence of such
883artefacts. Note, however, that the 1- and 3-dimensional
884reconstructions are \emph{not} susceptible in the same way, since the
885spectral direction does not generally exhibit these BLANK edges, and
886so we recommend the use of either of these.
887
888If one chooses the reconstruction method, a further decision is
889required on the signal-to-noise cutoff used in determining acceptable
890wavelet coefficients. A larger value will remove more noise from the
891cube, at the expense of losing fainter sources, while a smaller value
892will include more noise, which may produce spurious detections, but
893will be more sensitive to faint sources. Values of less than about
894$3\sigma$ tend to not reduce the noise a great deal and can lead to
895many spurious sources (although this will depend on the nature of the
896cube).
897
898When it comes to searching, the FDR method produces more reliable results
899than simple sigma-clipping, particularly in the absence of reconstruction.
900However, it does not work in exactly the way one would expect for a
901given value of \texttt{alpha}. For instance, setting fairly liberal values
902of \texttt{alpha} (say, 0.1) will often lead to a much smaller fraction
903of false detections (\ie much less than 10\%). This is the effect of the
904merging algorithms, that combine the sources after the detection stage, 
905and reject detections not meeting the minimum pixel or channel requirements.
906It is thus better to aim for larger \texttt{alpha} values than those derived
907from a straight conversion of the desired false detection rate.
908
909Finally, as \duchamp\ is still undergoing development, there are some
910elements that are not fully developed. In particular, it is not as
911clever as I would like at avoiding interference. The ability to place
912requirements on the minimum number of channels and pixels partially
913circumvents this problem, but work is being done to make \duchamp\
914smarter at rejecting signals that are clearly (to a human eye at
915least) interference. See the following section for further
916improvements that are planned.
917
918\section{Future Developments}
919
920This is both a list of planned improvements and a wish-list of
921features that would be nice to include (but are not planned in the
922immediate future). Let me know if there are items not on this list, or
923items on the list you would like prioritised.
924
925\begin{itemize}
926
927\item Better determination of the noise characteristics of
928  spectral-line cubes, including understanding how the noise is
929  generated and developing a model for it. \textbf{Planned.}
930 
931\item Include more source analysis. Examples could be: shape
932  information; measurements of HI mass; more variety of measurements
933  of velocity width and profile. \textbf{Some planned.}
934
935\item Provide some indication of the significance of the detection
936  (\ie some S/N-like value). \textbf{Planned.}
937
938\item Improved ability to reject interference, possibly on the
939  spectral shape of features. \textbf{Planned.}
940
941\item Ability to separate (de-blend) distinct sources that have been
942  merged. \textbf{Planned.}
943
944\item Link to lists of possible counterparts (\eg via NED/SIMBAD/other
945  VO tools?). \textbf{Wish-list.}
946
947\item On-line web service interface, so a user can upload a cube and
948  get back a source-list. \textbf{Wish-list}.
949
950\item Embed \duchamp\ in a GUI, to move away from the text-based
951  interaction. \textbf{Wish-list}.
952\end{itemize}
953
954
955%\bibliographystyle{mn2e}
956%\bibliographystyle{abbrvnat}
957%\bibliography{mnrasmnemonic,sourceDetection}
958\begin{thebibliography}{}
959
960\bibitem[\protect\citeauthoryear{{Calabretta} \& {Greisen}}{{Calabretta} \&
961  {Greisen}}{2002}]{calabretta02}
962{Calabretta} M.,  {Greisen} E.,  2002, A\&A, 395, 1077
963
964\bibitem[\protect\citeauthoryear{{Greisen} \& {Calabretta}}{{Greisen} \&
965  {Calabretta}}{2002}]{greisen02}
966{Greisen} E.,  {Calabretta} M.,  2002, A\&A, 395, 1061
967
968\bibitem[\protect\citeauthoryear{{Hanisch}, {Farris}, {Greisen}, {Pence},
969  {Schlesinger}, {Teuben}, {Thompson} \& {Warnock}}{{Hanisch}
970  et~al.}{2001}]{hanisch01}
971{Hanisch} R.,  {Farris} A.,  {Greisen} E.,  {Pence} W.,  {Schlesinger} B.,
972  {Teuben} P.,  {Thompson} R.,    {Warnock} A.,  2001, A\&A, 376, 359
973
974\bibitem[\protect\citeauthoryear{{Hopkins}, {Miller}, {Connolly}, {Genovese},
975  {Nichol} \& {Wasserman}}{{Hopkins} et~al.}{2002}]{hopkins02}
976{Hopkins} A.,  {Miller} C.,  {Connolly} A.,  {Genovese} C.,  {Nichol} R.,
977  {Wasserman} L.,  2002, AJ, 123, 1086
978
979\bibitem[\protect\citeauthoryear{Lutz}{Lutz}{1980}]{lutz80}
980Lutz R.,  1980, The Computer Journal, 23, 262
981
982\bibitem[\protect\citeauthoryear{{Meyer} et~al.,}{{Meyer}
983  et~al.}{2004}]{meyer04:trunc}
984{Meyer} M.,  et~al., 2004, MNRAS, 350, 1195
985
986\bibitem[\protect\citeauthoryear{{Miller}, {Genovese}, {Nichol}, {Wasserman},
987  {Connolly}, {Reichart}, {Hopkins}, {Schneider} \& {Moore}}{{Miller}
988  et~al.}{2001}]{miller01}
989{Miller} C.,  {Genovese} C.,  {Nichol} R.,  {Wasserman} L.,  {Connolly} A.,
990  {Reichart} D.,  {Hopkins} A.,  {Schneider} J.,    {Moore} A.,  2001, AJ, 122,
991  3492
992
993\bibitem[\protect\citeauthoryear{Minchin}{Minchin}{1999}]{minchin99}
994Minchin R.,  1999, PASA, 16, 12
995
996\bibitem[\protect\citeauthoryear{Starck \& Murtagh}{Starck \&
997  Murtagh}{2002}]{starck02:book}
998Starck J.-L.,  Murtagh F.,  2002, {``Astronomical Image and Data Analysis''}.
999Springer
1000
1001\end{thebibliography}
1002
1003
1004\appendix
1005\newpage
1006\section{Obtaining and Installing \duchamp}
1007
1008The \duchamp\ web page can be found at the following location:\\
1009\href{http://www.atnf.csiro.au/people/Matthew.Whiting/Duchamp}%
1010{http://www.atnf.csiro.au/people/Matthew.Whiting/Duchamp}\\
1011Here you can find a gzipped tar archive of the source code that can be
1012downloaded and extracted, as well as this User's Guide in postscript
1013and hyperlinked PDF formats.
1014
1015\duchamp\ can be built on Unix systems by typing (assuming that the
1016prompt your terminal provides is a \texttt{> } -- don't type this
1017character!):
1018\begin{quote}
1019\texttt{%
1020> ./configure\\
1021> make\\
1022> make clean (optional -- to remove the object files)}
1023\end{quote}
1024
1025Run in this manner, \texttt{configure} should find all the necessary
1026libraries, but if some libraries have been installed in non-standard
1027locations, it may fail. In this case, you can specify additional
1028directories to look in by giving extra command-line arguments. There
1029are separate options for library files (eg. libcpgplot.a) and header
1030files (eg. cpgplot.h).
1031
1032For example, if \textsc{wcslib} had been installed in
1033\texttt{/home/mduchamp/wcslib}, there are two libraries that are
1034likely to be in separate subdirectories: \texttt{C/} and
1035\texttt{pgsbox/}. Each subdirectory needs to be searched for library
1036and header files, so one could build Duchamp by typing:
1037\begin{quote}
1038\texttt{%
1039>  ./configure $\backslash$ \\
1040LIBDIRS="/home/mduchamp/wcslib/C /home/mduchamp/wcslib/pgsbox"
1041$\backslash$\\
1042INCDIRS="/home/mduchamp/wcslib/C /home/mduchamp/wcslib/pgsbox"}
1043\end{quote}
1044And then just run make in the usual fashion:
1045\begin{quote}
1046\texttt{> make}
1047\end{quote}
1048
1049This will produce the executable \texttt{Duchamp}. There are two
1050possible ways to run it. The first is:
1051\begin{quote}
1052\texttt{> Duchamp -f [FITS file]}
1053\end{quote}
1054where \texttt{[FITS file]} is the file you wish to search. This method
1055simply uses the default values of all parameters.
1056
1057The second method allows some determination of the parameter values by
1058the user. Type:
1059\begin{quote}
1060\texttt{> Duchamp -p [parameter file]}
1061\end{quote}
1062where \texttt{[parameterFile]} is a file with the input parameters,
1063including the name of the cube you want to search. There are two
1064example input files included with the distribution. The smaller one,
1065\texttt{InputExample}, shows the typical parameters one might want to
1066set. The large one, \texttt{InputComplete}, lists all possible
1067parameters that can be entered, and a brief description of them. To
1068get going quickly, just replace the "your-file-here" in
1069\texttt{InputExample} with your image name, and type
1070\begin{quote}
1071\texttt{> Duchamp -p InputExample}
1072\end{quote}
1073
1074The following appendices provide details on the individual parameters,
1075and show examples of the output files that \duchamp\ produces.
1076
1077\newpage
1078\section{Available parameters}
1079\label{app-param}
1080
1081The full list of parameters that can be listed in the input file are
1082given here. If not listed, they take the default value given in
1083parentheses. Since the order of the parameters in the input file does
1084not matter, they are grouped here in logical sections.
1085
1086\subsection*{Input-output related}
1087\begin{entry}
1088\item[ImageFile (no default assumed)] The filename of the
1089  data cube to be analysed.
1090\item[flagSubsection \texttt{[false]}] A flag to indicate whether one
1091  wants a subsection of the requested image.
1092\item[Subsection \texttt{[ [*,*,*] ]}] The requested subsection, which
1093  should be specified in the format \texttt{[x1:x2,y1:y2,z1:z2]}, where
1094  the limits are inclusive. If the full range of a dimension is
1095  required, use a \texttt{*}, \eg if you want the full spectral range of
1096  a subsection of the image, use \texttt{[30:140,30:140,*]}.
1097\item[flagReconExists \texttt{[false]}] A flag to indicate whether the
1098  reconstructed array has been saved by a previous run of \duchamp. If
1099  set true, the reconstructed array will be read from the file given by
1100  \texttt{reconFile}, rather than calculated directly.
1101\item[reconFile (no default assumed)] The FITS file that contains the
1102  reconstructed array. If \texttt{flagReconExists} is true and this
1103  parameter is not defined, the default file searched will be
1104  determined by the \atrous\ parameters (see \S\ref{sec-recon}).
1105\item[OutFile \texttt{[duchamp-Results.txt]}] The file containing the
1106  final list of detections. This also records the list of input
1107  parameters.
1108\item[SpectraFile \texttt{[duchamp-Spectra.ps]}] The postscript file
1109  containing the resulting integrated spectra and images of the
1110  detections.
1111\item[flagLog \texttt{[true]}] A flag to indicate whether intermediate
1112  detections should be logged.
1113\item[LogFile \texttt{[duchamp-Logfile.txt]}] The file in which intermediate
1114  detections are logged. These are detections that have not been
1115  merged. This is primarily for use in debugging and diagnostic
1116  purposes -- normal use of the program will probably not require
1117  this.
1118\item[flagOutputRecon \texttt{[false]}] A flag to say whether or not to
1119  save the reconstructed cube as a FITS file. The filename will be
1120  derived from the ImageFile -- the reconstruction of \texttt{image.fits}
1121  will be saved as \texttt{image.RECON?.fits}, where \texttt{?} stands for
1122  the value of \texttt{snrRecon} (see below).
1123\item[flagOutputResid \texttt{[false]}] As for \texttt{flagOutputRecon}, but
1124  for the residual array -- the difference between the original cube
1125  and the reconstructed cube. The filename will be \texttt{image.RESID?.fits}.
1126\item[flagVOT \texttt{[false]}] A flag to say whether to create a VOTable
1127  file corresponding to the information in \texttt{outfile}. This will be
1128  an XML file in the Virtual Observatory VOTable format.
1129\item[votFile \texttt{[duchamp-Results.xml]}] The VOTable file with the
1130  list of final detections. Some input parameters are also recorded.
1131\item[flagKarma \texttt{[false]}] A flag to say whether to create a
1132  Karma annotation file corresponding to the information in
1133  \texttt{outfile}. This can be used as an overlay for the Karma
1134  programs such as \texttt{kvis}.
1135\item[karmaFile \texttt{[duchamp-Results.ann]}] The Karma annotation
1136  file showing the list of final detections.
1137\item[flagMaps \texttt{[true]}] A flag to say whether to save
1138  postscript files showing the 0th moment map of the whole cube
1139  (parameter \texttt{momentMap}) and the detection image
1140  (\texttt{detectionMap}).
1141\item[momentMap \texttt{[duchamp-MomentMap.ps]}] A postscript file
1142  containing a map of the 0th moment of the detected sources, as well
1143  as pixel and WCS coordinates.
1144\item[detectionMap \texttt{[duchamp-DetectionMap.ps]}] A postscript
1145  file showing each of the detected objects, coloured in greyscale by
1146  the number of channels spanned by each pixel. Also shows pixel and WCS
1147  coordinates.
1148\end{entry}
1149
1150\subsection*{Modifying the cube}
1151\begin{entry}
1152\item[flagBlankPix \texttt{[true]}] A flag to say whether to remove BLANK
1153  pixels from the analysis -- these are pixels set to some particular
1154  value because they fall outside the imaged area.
1155\item[blankPixValue \texttt{[-8.00061]}] The value of the BLANK pixels,
1156  if this information is not contained in the FITS header (the usual
1157  procedure is to obtain this value from the header information -- in
1158  which case the value set by this parameter is ignored).
1159\item[flagMW \texttt{[false]}] A flag to say whether to ignore channels
1160  contaminated by Milky Way (or other) emission -- the searching
1161  algorithms will not look at these channels.
1162\item[maxMW \texttt{[112]}] The maximum channel number containing
1163  ``Milky Way'' emission.
1164\item[minMW \texttt{[75]}] The minimum channel number containing
1165  ``Milky Way'' emission. Note that the range specified by
1166  \texttt{maxMW} and \texttt{minMW} is inclusive.
1167\item[flagBaseline \texttt{[false]}] A flag to say whether to remove the
1168  baseline from each spectrum in the cube for the purposes of
1169  reconstruction and detection.
1170\end{entry}
1171
1172\subsection*{Detection related}
1173
1174\subsubsection*{General detection}
1175\begin{entry}
1176\item[flagNegative \texttt{[false]}] A flag to indicate that the features
1177  being searched for are negative. The cube will be inverted prior to
1178  searching.
1179\item[snrCut \texttt{[3.]}] The cut-off value for thresholding, in terms
1180  of number of $\sigma$ above the mean.
1181\item[flagGrowth \texttt{[false]}] A flag indicating whether or not to
1182  grow the detected objects to a smaller threshold.
1183\item[growthCut \texttt{[2.]}] The smaller threshold using in growing
1184  detections. In units of $\sigma$ above the mean.
1185\end{entry}
1186
1187\subsubsection*{\Atrous\ reconstruction}
1188\begin{entry}
1189\item [flagATrous \texttt{[true]}] A flag indicating whether or not to
1190  reconstruct the cube using the \atrous\ wavelet
1191  reconstruction. See \S\ref{sec-recon} for details.
1192\item[reconDim \texttt{[3]}] The number of dimensions to use in the
1193  reconstruction. 1 means reconstruct each spectrum separately, 2
1194  means each channel map is done separately, and 3 means do the whole
1195  cube in one go.
1196\item[scaleMin \texttt{[1]}] The minimum wavelet scale to be used in the
1197  reconstruction. A value of 1 means ``use all scales''.
1198\item[snrRecon \texttt{[4]}] The thresholding cutoff used in the
1199  reconstruction -- only wavelet coefficients this many $\sigma$ above
1200  the mean (or greater) are included in the reconstruction.
1201\item[filterCode \texttt{[1]}] The code number of the filter to use in
1202  the reconstruction. The options are:
1203  \begin{itemize}
1204  \item \textbf{1:} B$_3$-spline filter: coefficients =
1205    $(\frac{1}{16}, \frac{1}{4}, \frac{3}{8}, \frac{1}{4}, \frac{1}{16})$
1206  \item \textbf{2:} Triangle filter: coefficients =
1207    $(\frac{1}{4}, \frac{1}{2}, \frac{1}{4})$
1208  \item \textbf{3:} Haar wavelet: coefficients =
1209    $(0, \frac{1}{2}, \frac{1}{2})$
1210  \end{itemize}
1211\end{entry}
1212
1213\subsubsection*{FDR method}
1214\begin{entry}
1215\item[flagFDR \texttt{[false]}] A flag indicating whether or not to use
1216  the False Discovery Rate method in thresholding the pixels.
1217\item[alphaFDR \texttt{[0.01]}] The $\alpha$ parameter used in the FDR
1218analysis. The average number of false detections, as a fraction of the
1219total number, will be less than $\alpha$ (see \S\ref{sec-detection}).
1220\end{entry}
1221
1222\subsubsection*{Merging detections}
1223\begin{entry}
1224\item[minPix \texttt{[2]}] The minimum number of spatial pixels for a single
1225  detection to be counted.
1226\item[minChannels \texttt{[3]}] The minimum number of consecutive
1227  channels that must be present in a detection.
1228\item[flagAdjacent \texttt{[true]}] A flag indicating whether to use the
1229  ``adjacent pixel'' criterion to decide whether to merge objects. If
1230  not, the next two parameters are used to determine whether objects
1231  are within the necessary thresholds.
1232\item[threshSpatial \texttt{[3.]}] The maximum allowed minimum spatial
1233  separation (in pixels) between two detections for them to be merged
1234  into one. Only used if \texttt{flagAdjacent = false}.
1235\item[threshVelocity \texttt{[7.]}] The maximum allowed minimum channel
1236  separation between two detections for them to be merged into
1237  one.
1238\end{entry}
1239
1240\subsubsection*{Other parameters}
1241\begin{entry}
1242\item[spectralMethod \texttt{[peak]}] This indicates which method is used
1243  to plot the output spectra: \texttt{peak} means plot the spectrum
1244  containing the detection's peak pixel; \texttt{sum} means sum the
1245  spectra of each detected spatial pixel, and correct for the beam
1246  size. Any other choice defaults to \texttt{peak}.
1247\item[spectralUnits \texttt{[km/s]}] The user can specify the units of
1248  the spectral axis. Assuming the WCS of the FITS file is valid, the
1249  spectral axis is transformed into velocity, and put into these units
1250  for all output and for calculations such as the integrated flux of a
1251  detection.
1252\item[drawBorders \texttt{[true]}] A flag indicating whether borders
1253  are to be drawn around the detected objects in the moment maps
1254  included in the output (see for example Fig.~\ref{fig-spect}).
1255\item[verbose \texttt{[true]}] A flag indicating whether to print the
1256  progress of computationally-intensive algorithms (such as the
1257  searching and merging) to screen.
1258\end{entry}
1259
1260
1261\newpage
1262\section{Example parameter files}
1263\label{app-input}
1264
1265This is what a typical parameter file would look like.
1266
1267\begin{verbatim}
1268imageFile       /DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits
1269logFile         logfile.txt
1270outFile         results.txt
1271spectraFile     spectra.ps
1272flagSubsection  false
1273flagOutputRecon false
1274flagOutputResid 0
1275flagBlankPix    1
1276flagMW          1
1277minMW           75
1278maxMW           112
1279minPix          3
1280flagGrowth      1
1281growthCut       1.5
1282flagATrous      0
1283scaleMin        1
1284snrRecon        4
1285flagFDR         1
1286alphaFDR        0.1
1287numPixPSF       20
1288snrCut          3
1289threshSpatial   3
1290threshVelocity  7
1291\end{verbatim}
1292
1293Note that, as in this example, the flag parameters can be entered as
1294strings (true/false) or integers (1/0). Also, note that it is not
1295necessary to include all these parameters in the file, only those that
1296need to be changed from the defaults (as listed in
1297Appendix~\ref{app-param}), which in this case would be very few. A
1298minimal parameter file might look like:
1299\begin{verbatim}
1300imageFile       /DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits
1301flagLog         false
1302snrRecon        3
1303snrCut          2.5
1304minChannels     4
1305\end{verbatim}
1306This will reconstruct the cube with a lower SNR value than the
1307default, select objects at a lower threshold,  with a looser minimum
1308channel requirement, and not keep a log of the intermediate
1309detections.
1310
1311The following page demonstrates how the parameters are presented to the
1312user, both on the screen at execution time, and in the output and log
1313files. On each line, there is a description on the parameter, the relevant
1314parameter name that is used in the input file (if there is one that the
1315user can enter), and the value of the parameter being used.
1316\newpage
1317\begin{landscape}
1318Typical presentation of parameters in output and log files: 
1319\begin{verbatim}
1320---- Parameters ----
1321Image to be analysed.........................[imageFile]  =  input.fits
1322Intermediate Logfile...........................[logFile]  =  duchamp-Logfile.txt         
1323Final Results file.............................[outFile]  =  duchamp-Results.txt         
1324Spectrum file..............................[spectraFile]  =  duchamp-Spectra.ps   
13250th Moment Map...............................[momentMap]  =  duchamp-MomentMap.ps
1326Detection Map.............................[detectionMap]  =  duchamp-DetectionMap.ps
1327Saving reconstructed cube?.............[flagoutputrecon]  =  false
1328Saving residuals from reconstruction?..[flagoutputresid]  =  false
1329------
1330Searching for Negative features?..........[flagNegative]  =  false
1331Fixing Blank Pixels?......................[flagBlankPix]  =  true
1332Blank Pixel Value.......................................  =  -8.00061
1333Removing Milky Way channels?....................[flagMW]  =  true
1334Milky Way Channels.......................[minMW - maxMW]  =  75-112
1335Beam Size (pixels)......................................  =  10.1788
1336Removing baselines before search?.........[flagBaseline]  =  false
1337Minimum # Pixels in a detection.................[minPix]  =  2
1338Minimum # Channels in a detection..........[minChannels]  =  3
1339Growing objects after detection?............[flagGrowth]  =  false
1340Using A Trous reconstruction?...............[flagATrous]  =  true
1341Number of dimensions in reconstruction........[reconDim]  =  3
1342Minimum scale in reconstruction...............[scaleMin]  =  1
1343SNR Threshold within reconstruction...........[snrRecon]  =  4
1344Filter being used for reconstruction........[filterCode]  =  1 (B3 spline function)
1345Using FDR analysis?............................[flagFDR]  =  false
1346SNR Threshold...................................[snrCut]  =  3
1347Using Adjacent-pixel criterion?...........[flagAdjacent]  =  true
1348Max. velocity separation for merging....[threshVelocity]  =  7
1349Method of spectral plotting.............[spectralMethod]  =  peak
1350\end{verbatim}
1351
1352\newpage
1353\section{Example results file}
1354\label{app-output}
1355This the typical content of an output file, after running \duchamp\
1356with the parameters illustrated on the previous page.
1357
1358{\scriptsize
1359  \begin{verbatim}
1360Results of the \duchamp\ source finder: Tue May 23 14:51:38 2006
1361---- Parameters ----
1362      (... omitted for clarity -- see previous page for examples...)
1363--------------------
1364Total number of detections = 25
1365--------------------
1366------------------------------------------------------------------------------------------------------------------------------------------------------
1367 Obj#       Name     X     Y     Z           RA          DEC      VEL     w_RA    w_DEC   w_VEL     F_int    F_peak  X1  X2  Y1  Y2  Z1  Z2  Npix Flag
1368                                                               [km/s] [arcmin] [arcmin]  [km/s] [Jy km/s] [Jy/beam]                         [pix]     
1369------------------------------------------------------------------------------------------------------------------------------------------------------
1370    1 J0618-2532  30.2  86.0 113.3  06:18:12.54 -25:32:44.79  208.502    45.17    34.61  26.383    24.394     0.350  25  35  82  90 112 114   137    E
1371    2 J0609-2156  59.5 140.6 114.6  06:09:19.66 -21:56:31.20  225.572    44.39    31.47  65.957    16.128     0.213  55  65 137 144 113 118   153     
1372    3 J0545-2143 141.2 143.2 114.8  05:45:51.71 -21:43:36.20  228.470    19.61    16.66  26.383     2.412     0.090 139 143 142 145 114 116    29     
1373    4 J0617-2633  33.3  70.8 115.6  06:17:25.52 -26:33:33.83  238.736    65.02    30.10  26.383     9.776     0.117  26  41  68  75 115 117   104    E
1374    5 J0601-2500  86.2  94.9 117.9  06:01:39.54 -25:00:32.46  269.419    27.99    24.02  26.383     3.920     0.124  83  89  92  97 117 119    44     
1375    6 J0602-2547  84.0  83.1 118.0  06:02:18.29 -25:47:31.69  270.319    20.01    19.99  26.383     2.999     0.118  82  86  81  85 117 119    34     
1376    7 J0547-2448 133.0  97.2 118.7  05:47:52.53 -24:48:38.16  279.113    19.72    12.54  26.383     1.474     0.074 131 135  96  98 118 120    21     
1377    8 J0606-2719  71.1  60.0 121.3  06:06:10.99 -27:19:48.61  314.090    52.36    39.59  39.574    14.268     0.150  65  77  55  64 120 123   154     
1378    9 J0611-2137  52.4 145.3 162.5  06:11:20.92 -21:37:29.57  857.955    32.39    23.49 118.722    43.178     0.410  49  56 142 147 158 167   265    E
1379   10 J0600-2859  89.7  35.3 202.4  06:00:34.08 -28:59:00.43 1383.160    23.93    24.10 171.487    24.439     0.173  87  92  33  38 196 209   271     
1380   11 J0558-2638  95.4  70.3 223.1  05:58:53.03 -26:38:45.91 1656.140    11.93    12.07  92.339     1.045     0.063  94  96  69  71 220 227    18     
1381   12 J0617-2723  34.7  58.3 227.4  06:17:07.07 -27:23:50.65 1712.868    16.75    23.53 290.209     8.529     0.093  33  36  56  61 215 237   118     
1382   13 J0558-2525  95.8  88.6 231.7  05:58:49.27 -25:25:33.60 1770.134    27.87    24.16 237.444    12.863     0.115  92  98  86  91 221 239   175     
1383   14 J0600-2141  88.8 144.4 232.5  06:00:54.02 -21:41:57.06 1780.188    27.96    24.13 224.252    30.743     0.166  86  92 142 147 222 239   344    E
1384   15 J0615-2634  40.0  70.8 232.6  06:15:25.50 -26:34:20.04 1782.214    12.44    15.69  52.765     2.084     0.068  39  41  69  72 231 235    31     
1385   16 J0604-2606  76.0  78.4 233.0  06:04:41.13 -26:06:21.19 1787.226    24.13    23.87 211.061    23.563     0.155  73  78  76  81 225 241   278     
1386   17 J0601-2340  87.9 114.9 235.8  06:01:08.83 -23:40:19.37 1824.122    31.95    28.09 237.444    82.380     0.297  85  92 112 118 227 245   647     
1387   18 J0615-2235  38.2 130.5 254.5  06:15:32.09 -22:35:37.24 2070.934    12.29    11.70 105.531     1.555     0.070  37  39 129 131 249 257    24     
1388   19 J0617-2305  31.4 122.8 258.1  06:17:33.45 -23:05:28.94 2118.752    12.34    11.65  26.383     1.022     0.062  30  32 122 124 257 259    16     
1389   20 J0612-2149  49.6 142.2 270.3  06:12:11.04 -21:49:29.72 2279.926    16.27    15.73 395.740    15.156     0.101  48  51 141 144 257 287   204     
1390   21 J0616-2133  35.3 146.0 300.6  06:16:15.78 -21:33:09.69 2679.148    20.22     7.47 224.252     3.014     0.127  33  37 145 146 294 311    28    E
1391   22 J0555-2956 107.3  20.9 367.6  05:55:08.02 -29:56:09.08 3562.236    19.71    20.30  39.574     5.891     0.169 105 109  19  23 366 369    58     
1392   23 J0557-2246  99.8 128.2 434.0  05:57:43.77 -22:46:42.95 4438.776    11.88    16.12 105.531     1.703     0.167  99 101 127 130 430 438    17    N
1393   24 J0616-2648  38.1  67.2 546.8  06:16:02.10 -26:48:35.49 5926.464    12.35    11.67  26.383     1.276     0.064  37  39  66  68 546 548    18     
1394   25 J0552-2916 117.0  30.5 727.0  05:52:13.64 -29:16:58.02 8303.952    11.59    20.25 303.400    35.523     0.479 116 118  28  32 716 739   111     
1395  \end{verbatim}
1396}
1397Note that the
1398width of the table can make it hard to read. A good trick for those
1399using UNIX/Linux is to make use of the \texttt{a2ps} command. The
1400following works well, producing a postscript file \texttt{results.ps}:
1401\\\verb|a2ps -1 -r -f8 -o duchamp-Results.ps duchamp-Results.txt|
1402
1403%\end{landscape}
1404
1405\newpage
1406\section{Example VOTable output}
1407\label{app-votable}
1408This is part of the VOTable, in XML format, corresponding to the
1409output file in Appendix~\ref{app-output} (the indentation has been
1410removed to make it fit on the page).
1411
1412%\begin{landscape}
1413{\scriptsize
1414  \begin{verbatim}
1415<?xml version="1.0"?>
1416<VOTABLE version="1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
1417 xsi:noNamespaceSchemaLocation="http://www.ivoa.net/xml/VOTable/VOTable/v1.1">
1418<COOSYS ID="J2000" equinox="J2000." epoch="J2000." system="eq_FK5"/>
1419<RESOURCE name="Duchamp Output">
1420<TABLE name="Detections">
1421<DESCRIPTION>Detected sources and parameters from running the Duchamp source finder.</DESCRIPTION>
1422<PARAM name="FITS file" datatype="char" ucd="meta.file;meta.fits" value="/DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits"/>
1423<PARAM name="Threshold" datatype="float" ucd="stat.snr" value="2.5">
1424<PARAM name="ATrous note" datatype="char" ucd="meta.note" value="The a trous reconstruction method was used, with the following parameters.">
1425<PARAM name="ATrous Dimension" datatype="int" ucd="meta.code;stat" value="3">
1426<PARAM name="ATrous Cut" datatype="float" ucd="stat.snr" value="4">
1427<PARAM name="ATrous Minimum Scale" datatype="int" ucd="stat.param" value="1">
1428<PARAM name="ATrous Filter" datatype="char" ucd="meta.code;stat" value="B3 spline function">
1429<FIELD name="ID" ID="col1" ucd="meta.id" datatype="int" width="4"/>
1430<FIELD name="Name" ID="col2" ucd="meta.id;meta.main" datatype="char" arraysize="14"/>
1431<FIELD name="RA" ID="col3" ucd="pos.eq.ra;meta.main" ref="J2000" datatype="float" width="10" precision="6" unit="deg"/>
1432<FIELD name="Dec" ID="col4" ucd="pos.eq.dec;meta.main" ref="J2000" datatype="float" width="10" precision="6" unit="deg"/>
1433<FIELD name="w_RA" ID="col3" ucd="phys.angSize;pos.eq.ra" ref="J2000" datatype="float" width="7" precision="2" unit="arcmin"/>
1434<FIELD name="w_Dec" ID="col4" ucd="phys.angSize;pos.eq.dec" ref="J2000" datatype="float" width="7" precision="2" unit="arcmin"/>
1435<FIELD name="Vel" ID="col4" ucd="phys.veloc;src.dopplerVeloc" datatype="float" width="9" precision="3" unit="km/s"/>
1436<FIELD name="w_Vel" ID="col4" ucd="phys.veloc;src.dopplerVeloc;spect.line.width" datatype="float" width="8" precision="3" unit="km/s"/>
1437<FIELD name="Integrated_Flux" ID="col4" ucd="phys.flux;spect.line.intensity" datatype="float" width="10" precision="3" unit="km/s"/>
1438<DATA>
1439<TABLEDATA>
1440<TR>
1441<TD>   1</TD><TD> J0609-2200</TD><TD> 92.410416</TD><TD>-22.013390</TD><TD>  48.50</TD><TD>  39.42</TD><TD>  213.061</TD><TD>  65.957</TD><TD>    17.572</TD>
1442</TR>
1443<TR>
1444<TD>   2</TD><TD> J0608-2605</TD><TD> 92.042633</TD><TD>-26.085157</TD><TD>  44.47</TD><TD>  39.47</TD><TD>  233.119</TD><TD>  39.574</TD><TD>     4.144</TD>
1445</TR>
1446<TR>
1447<TD>   3</TD><TD> J0606-2724</TD><TD> 91.637840</TD><TD>-27.412022</TD><TD>  52.48</TD><TD>  47.57</TD><TD>  302.213</TD><TD>  39.574</TD><TD>    17.066</TD>
1448</TR>
1449(... table truncated for clarity ...)
1450</TABLEDATA>
1451</DATA>
1452</TABLE>
1453</RESOURCE>
1454</VOTABLE>
1455  \end{verbatim}
1456}
1457\end{landscape}
1458
1459\newpage
1460\section{Example Karma Annotation File output}
1461\label{app-karma}
1462
1463This is the format of the Karma Annotation file, showing the locations
1464of the detected objects. This can be loaded by the plotting tools of
1465the Karma package (for instance, \texttt{kvis}) as an overlay on the FITS
1466file.
1467
1468\begin{verbatim}
1469# Duchamp Source Finder results for
1470#  cube /DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits
1471COLOR RED
1472COORD W
1473CIRCLE 92.3376 -21.9475 0.403992
1474TEXT 92.3376 -21.9475 1
1475CIRCLE 91.9676 -26.0193 0.37034
1476TEXT 91.9676 -26.0193 2
1477CIRCLE 91.5621 -27.3459 0.437109
1478TEXT 91.5621 -27.3459 3
1479CIRCLE 92.8285 -21.6344 0.269914
1480TEXT 92.8285 -21.6344 4
1481CIRCLE 90.1381 -28.9838 0.234179
1482TEXT 90.1381 -28.9838 5
1483CIRCLE 89.72 -26.6513 0.132743
1484TEXT 89.72 -26.6513 6
1485CIRCLE 94.2743 -27.4003 0.195175
1486TEXT 94.2743 -27.4003 7
1487CIRCLE 92.2739 -21.6941 0.134538
1488TEXT 92.2739 -21.6941 8
1489CIRCLE 89.7133 -25.4259 0.232252
1490TEXT 89.7133 -25.4259 9
1491CIRCLE 90.2206 -21.6993 0.266247
1492TEXT 90.2206 -21.6993 10
1493CIRCLE 93.8581 -26.5766 0.163153
1494TEXT 93.8581 -26.5766 11
1495CIRCLE 91.176 -26.1064 0.234356
1496TEXT 91.176 -26.1064 12
1497CIRCLE 90.2844 -23.6716 0.299509
1498TEXT 90.2844 -23.6716 13
1499CIRCLE 93.8774 -22.581 0.130925
1500TEXT 93.8774 -22.581 14
1501CIRCLE 94.3882 -23.0934 0.137108
1502TEXT 94.3882 -23.0934 15
1503CIRCLE 93.0491 -21.8223 0.202928
1504TEXT 93.0491 -21.8223 16
1505CIRCLE 94.0685 -21.5603 0.168456
1506TEXT 94.0685 -21.5603 17
1507CIRCLE 86.0568 -27.6095 0.101113
1508TEXT 86.0568 -27.6095 18
1509CIRCLE 88.7932 -29.9453 0.202624
1510TEXT 88.7932 -29.9453 19
1511\end{verbatim}
1512
1513\newpage
1514\section{Robust statistics for a Normal distribution}
1515\label{app-madfm}
1516
1517The Normal, or Gaussian, distribution for mean $\mu$ and standard
1518deviation $\sigma$ can be written as
1519\[
1520f(x) = \frac{1}{\sqrt{2\pi\sigma^2}}\ e^{-(x-\mu)^2/2\sigma^2}.
1521 \]
1522
1523When one has a purely Gaussian signal, it is straightforward to
1524estimate $\sigma$ by calculating the standard deviation (or rms) of
1525the data. However, if there is a small amount of signal present on top
1526of Gaussian noise, and one wants to estimate the $\sigma$ for the
1527noise, the presence of the large values from the signal can bias the
1528estimator to higher values.
1529
1530An alternative way is to use the median ($m$) and median absolute deviation
1531from the median ($s$) to estimate $\mu$ and $\sigma$. The median is the
1532middle of the distribution, defined for a continuous distribution by
1533\[
1534\int_{-\infty}^{m} f(x) \diff x = \int_{m}^{\infty} f(x) \diff x.
1535\]
1536From symmetry, we quickly see that for the continuous Normal
1537distribution, $m=\mu$. We consider the case henceforth of $\mu=0$,
1538without loss of generality.
1539
1540To find $s$, we find the distribution of the absolute deviation from
1541the median, and then find the median of that distribution. This
1542distribution is given by
1543\begin{eqnarray*}
1544g(x) &= &{\mbox{\rm distribution of }} |x|\\
1545     &= &f(x) + f(-x),\ x\ge0\\
1546     &= &\sqrt{\frac{2}{\pi\sigma^2}}\, e^{-x^2/2\sigma^2},\ x\ge0.
1547\end{eqnarray*}
1548So, the median absolute deviation from the median, $s$, is given by
1549\[
1550\int_{0}^{s} g(x) \diff x = \int_{s}^{\infty} g(x) \diff x.
1551\]
1552Now, $\int_{0}^{\infty}e^{-x^2/2\sigma^2} \diff x = \sqrt{\pi\sigma^2/2}$, and
1553so $\int_{s}^{\infty} e^{-x^2/2\sigma^2} \diff x =
1554\sqrt{\pi\sigma^2/2} - \int_{0}^{s} e^{-\frac{x^2}{2\sigma^2}} \diff x
1555$. Hence, to find $s$ we simply solve the following equation (setting $\sigma=1$ for
1556simplicity -- equivalent to stating $x$ and $s$ in units of $\sigma$):
1557\[
1558\int_{0}^{s}e^{-x^2/2} \diff x - \sqrt{\pi/8} = 0.
1559\]
1560This is hard to solve analytically (no nice analytic solution exists
1561for the finite integral that I'm aware of), but straightforward to
1562solve numerically, yielding the value of $s=0.6744888$. Thus, to
1563estimate $\sigma$ for a Normally distributed data set, one can calculate
1564$s$, then divide by 0.6744888 (or multiply by 1.4826042) to obtain the
1565correct estimator.
1566
1567Note that this is different to solutions quoted elsewhere,
1568specifically in \citet{meyer04:trunc}, where the same robust estimator
1569is used but with an incorrect conversion to standard deviation -- they
1570assume $\sigma = s\sqrt{\pi/2}$. This, in fact, is the conversion used
1571to convert the \emph{mean} absolute deviation from the mean to the
1572standard deviation. This means that the cube noise in the \hipass\
1573catalogue (their parameter Rms$_{\rm cube}$) should be 18\% larger
1574than quoted.
1575
1576\section{How Gaussian noise changes with wavelet scale.}
1577\label{app-scaling}
1578
1579The key element in the wavelet reconstruction of an array is the
1580thresholding of the individual wavelet coefficient arrays. This is
1581usually done by choosing a level to be some number of standard
1582deviations above the mean value.
1583
1584However, since the wavelet arrays are produced by convolving the input
1585array by an increasingly large filter, the pixels in the coefficient
1586arrays become increasingly correlated as the scale of the filter
1587increases. This results in the measured standard deviation from a
1588given coefficient array decreasing with increasing scale. To calculate
1589this, we need to take into account how many other pixels each pixel in
1590the convolved array depends on.
1591
1592To demonstrate, suppose we have a 1-D array with $N$ pixel values
1593given by $F_i,\ i=1,...,N$, and we convolve it with the B$_3$-spline
1594filter, defined by the set of coefficients
1595$\{1/16,1/4,3/8,1/4,1/16\}$. The flux of the $i$th pixel in the
1596convolved array will be
1597\[
1598F'_i = \frac{1}{16}F_{i-2} + \frac{1}{4}F_{i-1} + \frac{3}{8}F_{i}
1599+ \frac{1}{4}F_{i+1} + \frac{1}{16}F_{i+2}
1600\]
1601and the flux of the corresponding pixel in the wavelet array will be
1602\[
1603W'_i = F_i - F'_i = \frac{-1}{16}F_{i-2} - \frac{1}{4}F_{i-1} + \frac{5}{8}F_{i}
1604- \frac{1}{4}F_{i+1} - \frac{1}{16}F_{i+2}
1605\]
1606Now, assuming each pixel has the same standard deviation
1607$\sigma_i=\sigma$, we can work out the standard deviation for the
1608wavelet array:
1609\[
1610\sigma'_i = \sigma \sqrt{\left(\frac{1}{16}\right)^2 + \left(\frac{1}{4}\right)^2
1611  + \left(\frac{5}{8}\right)^2 + \left(\frac{1}{4}\right)^2 + \left(\frac{1}{16}\right)^2}
1612          = 0.72349\ \sigma
1613\]
1614Thus, the first scale wavelet coefficient array will have a standard
1615deviation of 72.3\% of the input array. This procedure can be followed
1616to calculate the necessary values for all scales, dimensions and
1617filters used by \duchamp.
1618
1619Calculating these values is clearly a critical step in performing the
1620reconstruction. \citet{starck02:book} did so by simulating data sets
1621with Gaussian noise, taking the wavelet transform, and measuring the
1622value of $\sigma$ for each scale. We take a different approach, by
1623calculating the scaling factors directly from the filter coefficients
1624by taking the wavelet transform of an array made up of a 1 in the
1625central pixel and 0s everywhere else. The scaling value is then
1626derived by taking the square root of the sum (in quadrature) of all
1627the wavelet coefficient values at each scale. We give the scaling
1628factors for the three filters available to \duchamp\ on the following
1629page. These values are hard-coded into \duchamp, so no on-the-fly
1630calculation of them is necessary.
1631
1632Memory limitations prevent us from calculating factors for large
1633scales, particularly for the three-dimensional case (hence the --
1634symbols in the tables). To calculate factors for
1635higher scales than those available, we note the following
1636relationships apply for large scales to a sufficient level of precision:
1637\begin{itemize}
1638\item 1-D: factor(scale $i$) = factor(scale $i-1$)$/\sqrt{2}$.
1639\item 2-D: factor(scale $i$) = factor(scale $i-1$)$/2$.
1640\item 1-D: factor(scale $i$) = factor(scale $i-1$)$/\sqrt{8}$.
1641\end{itemize}
1642
1643\newpage
1644\begin{itemize}
1645\item \textbf{B$_3$-Spline Function:} $\{1/16,1/4,3/8,1/4,1/16\}$
1646
1647\begin{tabular}{llll}
1648Scale & 1 dimension      & 2 dimension     & 3 dimension\\ \hline
16491     & 0.723489806      & 0.890796310     & 0.956543592\\
16502     & 0.285450405      & 0.200663851     & 0.120336499\\
16513     & 0.177947535      & 0.0855075048    & 0.0349500154\\
16524     & 0.122223156      & 0.0412474444    & 0.0118164242\\
16535     & 0.0858113122     & 0.0204249666    & 0.00413233507\\
16546     & 0.0605703043     & 0.0101897592    & 0.00145703714\\
16557     & 0.0428107206     & 0.00509204670   & 0.000514791120\\
16568     & 0.0302684024     & 0.00254566946   & --\\
16579     & 0.0214024008     & 0.00127279050   & --\\
165810    & 0.0151336781     & 0.000636389722  & --\\
165911    & 0.0107011079     & 0.000318194170  & --\\
166012    & 0.00756682272    & --              & --\\
166113    & 0.00535055108    & --              & --\\
1662%14    & 0.00378341085   & --              & --\\
1663%15    & 0.00267527545   & --              & --\\
1664%16    & 0.00189170541   & --              & --\\
1665%17    & 0.00133763772   & --              & --\\
1666%18    & 0.000945852704   & --             & --
1667\end{tabular}
1668
1669\item \textbf{Triangle Function:} $\{1/4,1/2,1/4\}$
1670
1671\begin{tabular}{llll}
1672Scale & 1 dimension      & 2 dimension     & 3 dimension\\ \hline
16731     & 0.612372436      & 0.800390530     & 0.895954449  \\
16742     & 0.330718914      & 0.272878894     & 0.192033014\\
16753     & 0.211947812      & 0.119779282     & 0.0576484078\\
16764     & 0.145740298      & 0.0577664785    & 0.0194912393\\
16775     & 0.102310944      & 0.0286163283    & 0.00681278387\\
16786     & 0.0722128185     & 0.0142747506    & 0.00240175885\\
16797     & 0.0510388224     & 0.00713319703   & 0.000848538128 \\
16808     & 0.0360857673     & 0.00356607618   & 0.000299949455 \\
16819     & 0.0255157615     & 0.00178297280   & -- \\
168210    & 0.0180422389     & 0.000891478237  & --  \\
168311    & 0.0127577667     & 0.000445738098  & --  \\
168412    & 0.00902109930    & 0.000222868922  & --  \\
168513    & 0.00637887978    & --              & -- \\
1686%14   & 0.00451054902    & --              & -- \\
1687%15   & 0.00318942978    & --              & -- \\
1688%16   & 0.00225527449    & --              & -- \\
1689%17   & 0.00159471988    & --              & -- \\
1690%18   & 0.000112763724   & --              & --
1691
1692\end{tabular}
1693
1694\item \textbf{Haar Wavelet:} $\{0,1/2,1/2\}$
1695
1696\begin{tabular}{llll}
1697Scale & 1 dimension      & 2 dimension     & 3 dimension\\ \hline
16981     & 0.707167810      & 0.433012702     & 0.935414347 \\
16992     & 0.500000000      & 0.216506351     & 0.330718914\\
17003     & 0.353553391      & 0.108253175     & 0.116926793\\
17014     & 0.250000000      & 0.0541265877    & 0.0413398642\\
17025     & 0.176776695      & 0.0270632939    & 0.0146158492\\
17036     & 0.125000000      & 0.0135316469    & 0.00516748303
1704
1705\end{tabular}
1706
1707
1708\end{itemize}
1709
1710\end{document}
Note: See TracBrowser for help on using the repository browser.