source: trunk/docs/Guide.tex @ 106

Last change on this file since 106 was 106, checked in by Matthew Whiting, 18 years ago

Minor changes to improve execution.
Slight changes to default parameters and to their references in the Guide.

File size: 82.3 KB
Line 
1\documentclass[12pt,a4paper]{article}
2
3%%%%%% LINE SPACING %%%%%%%%%%%%
4\usepackage{setspace}
5\singlespacing
6%\onehalfspacing
7%\doublespacing
8
9%Define a test for doing PDF format -- use different code below
10\newif\ifPDF
11\ifx\pdfoutput\undefined\PDFfalse
12\else\ifnum\pdfoutput > 0\PDFtrue
13     \else\PDFfalse
14     \fi
15\fi
16
17\textwidth=161 mm
18\textheight=245 mm
19\topmargin=-15 mm
20\oddsidemargin=0 mm
21\parindent=6 mm
22
23\usepackage[sort]{natbib}
24\usepackage{lscape}
25\bibpunct[,]{(}{)}{;}{a}{}{,}
26
27\newcommand{\eg}{e.g.\ }
28\newcommand{\ie}{i.e.\ }
29\newcommand{\hi}{H{\sc i}}
30\newcommand{\hipass}{{\sc hipass}}
31\newcommand{\duchamp}{\emph{Duchamp}}
32\newcommand{\atrous}{\textit{{\`a} trous}}
33\newcommand{\Atrous}{\textit{{\`A} trous}}
34\newcommand{\diff}{{\rm d}}
35\newcommand{\entrylabel}[1]{\mbox{\textsf{\bf{#1:}}}\hfil}
36\newenvironment{entry}
37        {\begin{list}{}%
38                {\renewcommand{\makelabel}{\entrylabel}%
39                        \setlength{\labelwidth}{30mm}%
40                        \setlength{\labelsep}{5pt}%
41                        \setlength{\itemsep}{2pt}%
42                        \setlength{\parsep}{2pt}%
43                        \setlength{\leftmargin}{35mm}%
44                }%
45        }%
46{\end{list}}
47
48
49\title{A Guide to the \duchamp\ Source Finding Software}
50\author{Matthew Whiting\\
51%{\small \href{mailto:Matthew.Whiting@csiro.au}{Matthew.Whiting@csiro.au}}\\
52Australia Telescope National Facility\\CSIRO}
53%\date{January 2006}
54\date{}
55
56% If we are creating a PDF, use different options for graphicx, hyperref.
57\ifPDF
58  \usepackage[pdftex]{graphicx,color}
59  \usepackage[pdftex]{hyperref}
60  \hypersetup{colorlinks=true,%             
61              citecolor=red,%
62              filecolor=red,%
63              linkcolor=red,%
64              urlcolor=red,%
65              }
66\else
67  \usepackage[dvips]{graphicx}
68  \usepackage[dvips]{hyperref}
69\fi
70
71\pagestyle{headings}
72\begin{document}
73
74\maketitle
75\thispagestyle{empty}
76\begin{figure}[!h]
77\begin{center}
78\includegraphics[width=\textwidth]{cover_image}
79\end{center}
80\end{figure}
81
82\newpage
83\tableofcontents
84
85\newpage
86\section{Introduction and getting going quickly}
87
88This document gives details on the program \duchamp: how to use it and
89what it does. This has been designed to provide a source-detection
90facility for spectral-line data cubes. The basic execution of
91\duchamp\ is to read in a FITS data cube, find sources in the cube,
92and produce a text file of positions, velocities and fluxes of the
93detections, as well as a postscript file of the spectra of each
94detection.
95
96So, you have a FITS cube, and you want to find the sources in it. What
97do you do? The first step is to make an input file that contains the
98list of parameters. Brief and detailed examples are shown in
99Appendix~\ref{app-input}. This provides the input file name, the various
100output files, and defines various parameters that control the
101execution.
102
103The standard way to run \duchamp\ is by the command
104\begin{quote}
105\texttt{Duchamp -p [parameter file]}
106\end{quote}
107replacing \texttt{[parameter file]} with the name of the file you have
108just created/copied. Alternatively, you can use the syntax
109\begin{quote}
110\texttt{Duchamp -f [FITS file]}
111\end{quote}
112where \texttt{[FITS file]} is the file you wish to search. In the latter
113case, all parameters will take their default values detailed in
114Appendix~\ref{app-param}. In either case, the program will then work
115away and give you the list of detections and their spectra. The
116program execution is summarised below, and detailed in
117\S\ref{sec-flow}. Information on inputs is in \S\ref{sec-param} and
118Appendix~\ref{app-param}, and descriptions of the output is in
119\S\ref{sec-output}.
120
121\subsection{A summary of the execution steps}
122
123The basic flow of the program is summarised here -- all steps are
124discussed in more detail in the following sections.
125\begin{enumerate}
126\item If the \texttt{-p} option is used, the parameter file given on
127  the command line is read in, and the parameters absorbed.
128\item The FITS image is located and read in to memory.
129\item If requested, a FITS image with a previously reconstructed array
130  is read in.
131\item If requested, blank pixels are trimmed from the edges, and
132  the baseline of each spectrum is removed.
133\item If the reconstruction method is requested, and the reconstructed
134  array has not been read in at Step 3 above, the cube is
135  reconstructed using the \atrous\ wavelet method.
136\item Searching for objects then takes place, using the requested
137  thresholding method.
138\item The list of objects is condensed by merging neighbouring objects
139  and removing those deemed unacceptable.
140\item The baselines and trimmed pixels are replaced prior to output.
141\item The details of the detections are written to screen and to the
142  requested output file.
143\item Maps showing the spatial location of the detections are written.
144\item The integrated spectra of each detection are written to a
145  postscript file.
146\item If requested, the reconstructed array can be written to a new
147  FITS file.
148\end{enumerate}
149
150\subsection{Guide to terminology}
151
152First, a brief note on the use of terminology in this guide. \duchamp\
153is designed to work on FITS ``cubes''. These are FITS\footnote{FITS is
154the Flexible Image Transport System -- see \citet{hanisch01} or
155websites such as
156\href{http://fits.cv.nrao.edu/FITS.html}{http://fits.cv.nrao.edu/FITS.html}
157for details.} image arrays with three dimensions -- they are assumed
158to have the following form: the first two dimensions (referred to as
159$x$ and $y$) are spatial directions (that is, relating to the position
160on the sky), while the third dimension, $z$, is the spectral
161direction, which can correspond to frequency, wavelength, or
162velocity. The three dimensional analogue of pixels are ``voxels'', or
163volume cells -- a voxel is defined by a unique $(x,y,z)$ location and
164has a unique flux or intensity value associated with it.
165
166Each spatial pixel (a given $(x,y)$ coordinate) can be said to be a
167single spectrum, while a slice through the cube perpendicular to the
168spectral direction at a given $z$-value is a single channel (the 2-D
169image is a channel map).
170
171Detection involves locating a contiguous group of voxels with fluxes
172above a certain threshold. \duchamp\ makes no assumptions as to the
173size or shape of the detected features, other than having
174user-selected minimum size criteria.
175
176Features that are detected are assumed to be positive. The user can
177choose to search for negative features by setting an input parameter
178-- this inverts the cube prior to the search (see
179\S\ref{sec-detection} for details).
180
181Note that it is possible to run \duchamp\ on a two-dimensional image
182(\ie one with no frequency or velocity information), or indeed a
183one-dimensional array, and many of the features of the program will
184work fine. The focus, however, is on object detection in three
185dimensions.
186
187\subsection{Why \duchamp?}
188
189Well, it's important for a program to have a name, and the initial
190working title of \emph{cubefind} was somewhat uninspiring. I wanted to
191avoid the classic astronomical approach of designing a cute acronym,
192and since it is designed to work on cubes, I looked at naming it after
193a cubist. \emph{Picasso}, sadly, was already taken \citep{minchin99},
194so I settled on naming it after Marcel Duchamp, another cubist, but
195also one of the first artists to work with ``found objects''.
196
197\section{User Inputs}
198\label{sec-param}
199
200Input to the program is provided by means of a parameter
201file. Parameters are listed in the file, followed by the value that
202should be assigned to them. The syntax used is \texttt{paramName
203value}. Parameter names are not case-sensitive, and lines in the input
204file that start with \texttt{\#} are ignored. If a parameter is listed
205more than once, the latter value is used, but otherwise the order in
206which the parameters are listed in the input file is arbitrary.
207
208If a parameter is not listed, the default value is assumed. The
209defaults are chosen to provide a good result (using the reconstruction
210method), so the user doesn't need to specify many new parameters in
211the input file. Note that the image file \textbf{must} be specified! The
212parameters that can be set are listed in Appendix~\ref{app-param},
213with their default values in parentheses.
214
215The 'flag' parameters are stored as \texttt{bool} variables, and so are
216either \texttt{true = 1} or \texttt{false = 0}. \duchamp\ will only
217read them from the file as integers, and so they should be entered in
218the file as 0 or 1 (see example file in Appendix~\ref{app-input}).
219
220\section{What \duchamp\ is doing}
221\label{sec-flow}
222
223The execution flow of \duchamp\ is detailed here, indicating the
224main algorithmic steps that are used. The program is written in C/C++
225and makes use of the {\sc cfitsio}, {\sc wcslib} and {\sc pgplot}
226libraries.
227
228%\subsection{Parameter input}
229%
230%The user provides parameters that govern the selection of files and
231%the parameters used by the various subroutines in the program. This is
232%done via a parameter file, and the parameters are stored in a C++
233%class for use throughout the program. The form of the parameter file is
234%discussed in \S\ref{sec-param}, and the parameters themselves are
235%listed in Appendix~\ref{app-param}.
236
237\subsection{Image input}
238\label{sec-input}
239
240The cube is read in using basic {\sc cfitsio} commands, and stored as
241an array in a special C++ class. This class keeps track of
242the list of detected objects, as well as any reconstructed arrays that
243are made (see \S\ref{sec-recon}). The World Coordinate System (WCS)
244information for the cube is also obtained from the FITS header by {\sc
245wcslib} functions \citep{greisen02, calabretta02}, and this
246information, in the form of a \texttt{wcsprm} structure, is also stored
247in the same class.
248
249A sub-section of an image can be requested via the \texttt{subsection}
250parameter in the parameter file -- this can be a good idea if the cube
251has very noisy edges, which may produce many spurious detections. The
252generalised form of the subsection that is used by {\sc cfitsio} is
253\texttt{[x1:x2:dx,y1:y2:dy,z1:z2:dz]}, such that the x-coordinates run
254from \texttt{x1} to \texttt{x2} (inclusive), with steps of
255\texttt{dx}. The step value can be omitted (so a subsection of the
256form \texttt{[2:50,2:50,10:1000]} is still valid). \duchamp\ does not
257make use of any step value present in the subsection string, and any
258that are present are removed before the file is opened.
259
260If one wants the full range of a coordinate then replace the range
261with an asterisk, \eg \texttt{[2:50,2:50,*]}. If one wants to use a
262subsection, one must set \texttt{flagSubsection = 1}. A complete
263description of the section syntax can be found at the {\sc fitsio} web
264site
265\footnote{
266\href{http://heasarc.gsfc.nasa.gov/docs/software/fitsio/c/c\_user/node90.html}%
267{http://heasarc.gsfc.nasa.gov/docs/software/fitsio/c/c\_user/node90.html}}.
268
269\subsection{Image modification}
270\label{sec-modify}
271
272Several modifications to the cube can be made that improve the
273execution and efficiency of \duchamp\ (these are optional -- their
274use is indicated by the relevant flags set in the input parameter
275file).
276
277\subsubsection{Blank pixel removal}
278
279First, the cube is trimmed of any BLANK pixels that pad the image out
280to a rectangular shape. This is optional, its use determined by the
281\texttt{flagBlankPix} parameter. The value for these pixels is read from
282the FITS header (using the BLANK, BSCALE and BZERO keywords), but if
283these are not present then the value can be specified by the user in
284the parameter file using \texttt{blankPixValue}.
285
286This stage is particularly important for the reconstruction step, as
287lots of BLANK pixels on the edges will smooth out features in the
288wavelet calculation stage. The trimming will also reduce the size of
289the cube's array, speeding up the execution. The amount of trimming is
290recorded, and these pixels are added back in once the source-detection
291is completed (so that quoted pixel positions are applicable to the
292original cube).
293
294Rows and columns are trimmed one at a time until the first non-BLANK
295pixel is reached, so that the image remains rectangular. In practice,
296this means that there will be BLANK pixels left in the trimmed image
297(if the non-BLANK region is non-rectangular). However, these are
298ignored in all further calculations done on the cube.
299
300\subsubsection{Baseline removal}
301
302Second, the user may request the removal of baselines from the
303spectra, via the parameter \texttt{flagBaseline}. This may be necessary
304if there is a strong baseline ripple present, which can result in
305spurious detections on the high points of the ripple. The baseline is
306calculated from a wavelet reconstruction procedure (see
307\S\ref{sec-recon}) that keeps only the two largest scales. This is
308done separately for each spatial pixel (\ie for each spectrum in the
309cube), and the baselines are stored and added back in before any
310output is done. In this way the quoted fluxes and displayed spectra
311are as one would see from the input cube itself -- even though the
312detection (and reconstruction if applicable) is done on the
313baseline-removed cube.
314
315The presence of very strong signals (for instance, masers at several
316hundred Jy) can affect the determination of the baseline, leading to a
317large dip centred on the signal in the baseline-subtracted
318spectrum. To prevent this, the signal is trimmed prior to the
319reconstruction process at some standard threshold (at $8\sigma$ above
320the mean). The baseline determined should thus be representative of
321the true, signal-free baseline. Note that this trimming is only a
322temporary measure which does not affect the source-detection.
323
324\subsubsection{Ignoring bright Milky Way emission}
325
326Also, a single set of contiguous channels can be ignored -- these may
327exhibit very strong emission, such as that from the Milky Way as seen
328in extragalactic \hi\ cubes (hence the references to ``Milky Way'' in
329relation to this task -- apologies to Galactic astronomers!). Such
330dominant channels will both produce many unnecessary, uninteresting
331and large (in size and hence in memory usage) detections, and so will
332slow the program down and detract from the interesting detections. The
333use of this feature is controlled by the \texttt{flagMW} parameter, and
334the exact channels concerned are able to be set by the user (using
335\texttt{maxMW} and \texttt{minMW} -- these give an inclusive range of
336channels). When employed, these channels are temporarily blanked out
337for the searching, and the scaling of the spectral output (see
338Fig.~\ref{fig-spect}) will not take them into account. They will be
339present in the reconstructed array, however, and so will be included
340in the saved FITS file (see \S\ref{sec-reconIO}). When the final
341spectra are plotted, the range of channels covered by these parameters
342is indicated by a green hashed box.
343
344\subsection{Image reconstruction}
345\label{sec-recon}
346
347This is an optional step, but one that greatly enhances the
348source-detection process. The user can direct \duchamp\ to reconstruct
349the data cube using the \atrous\ wavelet procedure. A good
350description of the procedure can be found in
351\citet{starck02:book}. The reconstruction is an effective way of
352removing a lot of the noise in the image, allowing one to search
353reliably to fainter levels, and reducing the number of spurious
354detections. The payoff is that it can be relatively time- and
355memory-intensive.
356
357\subsubsection{Algorithm}
358
359The steps in the \atrous\ reconstruction are as follows:
360\begin{enumerate}
361\item Set the reconstructed array to 0 everywhere.
362\item The input array is discretely convolved with a given filter
363  function. This is determined from the parameter file via the
364  \texttt{filterCode} parameter -- see Appendix~\ref{app-param} for
365  details on the filters available.
366\item The wavelet coefficients are calculated by taking the difference
367  between the convolved array and the input array.
368\item If the wavelet coefficients at a given point are above the
369  requested threshold (given by \texttt{snrRecon} as the number of
370  $\sigma$ above the mean and adjusted to the current scale -- see
371  Appendix~\ref{app-scaling}), add these to the reconstructed array.
372\item The separation of the filter coefficients is doubled.
373\item The procedure is repeated from step 2, using the convolved array
374  as the input array.
375\item Continue until the required maximum number of scales is reached.
376\item Add the final smoothed (\ie convolved) array to the
377  reconstructed array. This provides the ``DC offset'', as each of the
378  wavelet coefficient arrays will have zero mean.
379\end{enumerate}
380
381The reconstruction has at least two iterations. The first iteration
382makes a first pass at the wavelet reconstruction (the process outlined
383in the 8 stages above), but the residual array will inevitably have
384some structure still in it, so the wavelet filtering is done on the
385residual, and any significant wavelet terms are added to the final
386reconstruction. This step is repeated until the change in the $\sigma$
387of the background is less than some fiducial amount.
388
389It is important to note that the \atrous\ decomposition is an
390example of a ``redundant'' transformation. If no thresholding is
391performed, the sum of all the wavelet coefficient arrays and the final
392smoothed array is identical to the input array. The thresholding thus
393removes only the unwanted structure in the array.
394
395Note that any BLANK pixels that are still in the cube will not be
396altered by the reconstruction -- they will be left as BLANK so that
397the shape of the valid part of the cube is preserved.
398
399\subsubsection{Statistics}
400
401The correct calculation of the reconstructed array needs good
402estimation of the underlying mean and standard deviation of the
403background noise distribution. These statistics are estimated using
404robust methods, to avoid corruption by strong outlying points. The
405mean of the distribution is actually estimated by the median, while
406the median absolute deviation from the median (MADFM) is calculated
407and corrected assuming Gaussianity to estimate the underlying standard
408deviation $\sigma$. The Gaussianity (or Normality) assumption is
409critical, as the MADFM does not give the same value as the usual rms
410or standard deviation value -- for a normal distribution
411$N(\mu,\sigma)$ we find MADFM$=0.6744888\sigma$. The difference
412between the MADFM and $\sigma$ is corrected for, so the user need only
413think in the usual multiples of $\sigma$ when setting
414\texttt{snrRecon}. See Appendix~\ref{app-madfm} for a derivation of
415this value.
416
417When thresholding the different wavelet scales, the value of $\sigma$
418as measured from the wavelet array needs to be scaled to account for the
419increased amount of correlation between neighbouring pixels (due to
420the convolution). See Appendix~\ref{app-scaling} for details on this
421scaling.
422
423\subsubsection{User control of reconstruction parameters}
424
425The most important parameter for the user to select in relation to the
426reconstruction is the threshold for each wavelet array. This is set
427using the \texttt{snrRecon} parameter, and is given as a multiple of the
428rms (estimated by the MADFM) above the mean (which for the wavelet
429arrays should be approximately zero). There are several other
430parameters that can be altered as well that affect the outcome of the
431reconstruction.
432
433By default, the cube is reconstructed in three dimensions, using a
4343-dimensional filter and 3-dimensional convolution. This can be
435altered, however, using the parameter \texttt{reconDim}. If set to 1,
436this means the cube is reconstructed by considering each spectrum
437separately, whereas \texttt{reconDim=2} will mean the cube is
438reconstructed by doing each channel map separately. The merits of
439these choices are discussed in \S\ref{sec-notes}, but it should be
440noted that a 2-dimensional reconstruction can be susceptible to edge
441effects if the spatial shape is not rectangular.
442
443The user can also select the minimum scale to be used in the
444reconstruction -- the first scale exhibits the highest frequency
445variations, and so ignoring this one can sometimes be beneficial in
446removing excess noise. The default, however, is to use all scales
447(\texttt{minscale = 1}).
448
449Finally, the filter that is used for the convolution can be selected
450by using \texttt{filterCode} and the relevant code number -- the
451choices are listed in Appendix~\ref{app-param}. A larger filter will
452give a better reconstruction, but take longer and use more memory when
453executing. When multi-dimensional reconstruction is selected, this
454filter is used to construct a 2- or 3-dimensional equivalent.
455
456\subsection{Reconstruction I/O}
457\label{sec-reconIO}
458
459The reconstruction stage can be relatively time-consuming, particularly
460for large cubes and reconstructions in 3-D. To get around this, \duchamp\
461provides a shortcut to allow users to perform multiple searches (\eg with
462different thresholds) on the same reconstruction without calculating the
463reconstruction each time.
464
465The first step is to choose to save the reconstructed array as a FITS
466file by setting \texttt{flagOutputRecon = true}. The file will be saved
467in the same directory as the input image, so the user needs to have write
468permissions for that directory.
469
470The filename will be derived from the input filename, with extra
471information detailing the reconstruction that has been done. For
472example, suppose \texttt{image.fits} has been reconstructed using a
4733-dimensional reconstruction with filter 2, thresholded at $4\sigma$
474using all scales. The output filename will then be
475\texttt{image.RECON-3-2-4-1.fits} (\ie it uses the four parameters
476relevant for the \atrous\ reconstruction as listed in
477Appendix~\ref{app-param}). The new FITS file will also have these
478parameters as header keywords. If a subsection of the input image has
479been used (see \S\ref{sec-input}), the format of the output filename
480will be \texttt{image.sub.RECON-3-2-4-1.fits}, and the subsection that
481has been used is also stored in the FITS header.
482
483Likewise, the residual image, defined as the difference between the input
484and reconstructed arrays, can also be saved in the same manner by setting
485\texttt{flagOutputResid = true}. Its filename will be the same as above,
486with RESID replacing RECON.
487
488If a reconstructed image has been saved, it can be read in and used
489instead of redoing the reconstruction. To do so, the user should set
490\texttt{flagReconExists = true}. The user can indicate the name of the
491reconstructed FITS file using the \texttt{reconFile} parameter, or, if
492this is not specified, \duchamp\ searches for the file with the name
493as defined above. If the file is not found, the reconstruction is
494performed as normal. Note that to do this, the user needs to set
495\texttt{flagAtrous = true} (obviously, if this is \texttt{false}, the
496reconstruction is not needed).
497
498\subsection{Searching the image}
499\label{sec-detection}
500
501The image is searched for detections in two ways: spectrally (a
5021-dimensional search in the spectrum in each spatial pixel), and
503spatially (a 2-dimensional search in the spatial image in each
504channel). In both cases, the algorithm finds connected pixels that are
505above the user-specified threshold. In the case of the spatial image
506search, the algorithm of \citet{lutz80} is used to raster scan through
507the image and connect groups of pixels on neighbouring rows.
508
509Note that this algorithm cannot be applied directly to a 3-dimensional
510case, as it requires that objects are completely nested in a row: that
511is, if you are scanning along a row, and one object finishes and
512another starts, you know that you will not get back to the first one
513(if at all) until the second is completely finished for that
514row. Three-dimensional data does not have this property, which is why
515we break up the searching into 1- and 2-dimensional cases.
516
517The determination of the threshold is done in one of two ways. The
518first way is a simple sigma-clipping, where a threshold is set at a
519fixed number $n$ of standard deviations above the mean, and pixels
520above this threshold are flagged as detected. The value of $n$ is set
521with the parameter \texttt{snrCut}. As before, the value of the
522standard deviation is estimated by the MADFM, and corrected by the
523ratio derived in Appendix~\ref{app-madfm}.
524
525The second method uses the False Discovery Rate (FDR) technique
526\citep{miller01,hopkins02}, whose basis we briefly detail here. The
527false discovery rate (given by the number of false detections divided
528by the total number of detections) is fixed at a certain value
529$\alpha$ (\eg $\alpha=0.05$ implies 5\% of detections are false
530positives). In practice, an $\alpha$ value is chosen, and the ensemble
531average FDR (\ie $\langle FDR \rangle$) when the method is used will
532be less than $\alpha$.  One calculates $p$ -- the probability,
533assuming the null hypothesis is true, of obtaining a test statistic as
534extreme as the pixel value (the observed test statistic) -- for each
535pixel, and sorts them in increasing order. One then calculates $d$
536where
537\[
538d = \max_j \left\{ j : P_j < \frac{j\alpha}{c_N N} \right\},
539\]
540and then rejects all hypotheses whose $p$-values are less than or equal
541to $P_d$. (So a $P_i<P_d$ will be rejected even if $P_i \geq
542j\alpha/c_N N$.) Note that ``reject hypothesis'' here means ``accept
543the pixel as an object pixel'' (\ie we are rejecting the null
544hypothesis that the pixel belongs to the background).
545
546The $c_N$ values here are normalisation constants that depend on the
547correlated nature of the pixel values. If all the pixels are
548uncorrelated, then $c_N=1$. If $N$ pixels are correlated, then their
549tests will be dependent on each other, and so $c_N = \sum_{i=1}^N
550i^{-1}$. \citet{hopkins02} consider real radio data, where the pixels
551are correlated over the beam. In this case the sum is made over the
552$N$ pixels that make up the beam. The value of $N$ is calculated from
553the FITS header (if the correct keywords -- BMAJ, BMIN -- are not
554present, a default value of 10 pixels is assumed).
555
556The theory behind the FDR method implies a direct connection between the
557choice of $\alpha$ and the fraction of detections that will be false
558positives. However, due to the merging process, this direct connection is
559lost when looking at the final number of detections -- see discussion in
560\S\ref{sec-notes}. The effect is that the number of false detections will
561be less than indicated by the $\alpha$ value used.
562
563If a reconstruction has been made, the residuals (defined as original
564$-$ reconstruction) are used to estimate the noise parameters of the
565cube. Otherwise they are estimated directly from the cube itself. In
566both cases, robust estimators are used as described above.
567
568Detections must have a minimum number of pixels to be counted. This
569minimum number is given by the input parameters \texttt{minPix} (for
5702-dimensional searches) and \texttt{minChannels} (for 1-dimensional
571searches).
572
573The search only looks for positive features. If one is interested
574instead in negative features (such as absorption lines), set the
575parameter \texttt{flagNegative = true}. This will invert the cube (\ie
576multiply all pixels by $-1$) prior to the search, and then re-invert
577the cube (and the fluxes of any detections) after searching is
578complete. All outputs are done in the same manner as normal, so that
579fluxes of detections will be negative.
580
581\subsection{Merging detected objects}
582\label{sec-merger}
583
584The searching step produces a list of detected objects that will have many
585repeated detections of a given object -- for instance, spectral
586detections in adjacent pixels of the same object and/or spatial
587detections in neighbouring channels. These are then combined in an
588algorithm that matches all objects judged to be ``close''. This
589determination is made in one of two ways.
590
591One way is to define two thresholds -- one spatial and one in velocity
592-- and say that two objects should be merged if there is at least one
593pair of pixels that lie within these threshold distances of each
594other. These thresholds are specified by the parameters
595\texttt{threshSpatial} and \texttt{threshVelocity} (in units of pixels
596and channels respectively).
597
598Alternatively, the spatial requirement can be changed to say that
599there must be a pair of pixels that are \emph{adjacent} -- a stricter,
600but perhaps more realistic requirement, particularly when the spatial pixels
601have a large angular size (as is the case for \hi\ surveys). This
602method can be selected by setting the parameter
603\texttt{flagAdjacent} to 1 (\ie \texttt{true}) in the parameter file. The
604velocity thresholding is done in the same way as the first option.
605
606Once the detections have been merged, they may be ``grown''. This is a
607process of increasing the size of the detection by adding adjacent
608pixels that are above some secondary threshold. This threshold is
609lower than the one used for the initial detection, but above the noise
610level, so that faint pixels are only detected when they are close to a
611bright pixel. The value of this threshold is a possible input
612parameter (\texttt{growthCut}), with a default value of $1.5\sigma$. The
613use of the growth algorithm is controlled by the \texttt{flagGrowth}
614parameter -- the default value of which is \texttt{false}. If the
615detections are grown, they are sent through the merging algorithm a
616second time, to pick up any detections that now overlap or have grown
617over each other.
618
619Finally, to be accepted, the detections must span \emph{both} a minimum
620number of channels (to remove any spurious single-channel spikes that
621may be present), and a minimum number of spatial pixels. These
622numbers, as for the original detection step, are set with the
623\texttt{minChannels} and \texttt{minPix} parameters. The channel
624requirement means there must be at least one set of \texttt{minChannels}
625consecutive channels in the source for it to be accepted.
626
627\section{Outputs}
628\label{sec-output}
629
630\subsection{During execution}
631
632\duchamp\ provides the user with feedback whilst it is running, to
633keep the user informed on the progress of the analysis. Most of this
634consists of self-explanatory messages about the particular stage the
635program is up to. The relevant parameters are printed to the screen at
636the start (once the file has been successfully read in), so the user
637is able to make a quick check that the setup is correct (see
638Appendix~{app-input} for an example).
639
640If the cube is being trimmed (\S\ref{sec-modify}), the resulting
641dimensions are printed to indicate how much has been trimmed. If a
642reconstruction is being done, a continually updating message shows
643either the current iteration and scale, compared to the maximum scale
644(when \texttt{reconDim=3}), or a progress bar showing the amount of
645the cube that has been reconstructed (for smaller values of
646\texttt{reconDim}).
647
648During the searching algorithms, the progress through the 1D and 2D
649searches are shown. When the searches have completed,
650the number of objects found in both the 1D and 2D searches are
651reported (see \S\ref{sec-detection} for details).
652
653In the merging process (where multiple detections of the same object
654are combined -- see \S\ref{sec-merger}), two stages of output
655occur. The first is when each object in the list is compared with all
656others. The output shows two numbers: the first being how far through
657the list the current object is, and the second being the length of the
658list. As the algorithm proceeds, the first number should increase and
659the second should decrease (as objects are combined). When the numbers
660meet (\ie the whole list has been compared), the second phase begins,
661in which multiply-appearing pixels in each object are removed, as are
662objects not meeting the minimum channels requirement. During this
663phase, the total number of accepted objects is shown, which should
664steadily increase until all have been accepted or rejected. Note that
665these steps can be very quick for small numbers of detections.
666
667Since this continual printing to screen has some overhead of time and
668CPU involved, the user can elect to not print this information by
669setting the parameter \texttt{verbose = 0}. In this case, the user is
670still informed as to the steps being undertaken, but the details of
671the progress are not shown.
672
673\subsection{Results}
674
675\subsubsection{Table of Results}
676
677Finally, we get to the results -- the reason for running \duchamp\ in
678the first place. Once the detection list is finalised, it is sorted by
679the mean velocity of the detections (or, if there is no good WCS
680associated with the cube, by the mean Z-pixel position). The results
681are then printed to the screen and to the output file, given by the
682\texttt{OutFile} parameter. The results list, an example of which can be
683seen in Appendix~\ref{app-output}, contains the following columns
684(note that the title of the columns depending on WCS information will
685depend on the projection of the WCS):
686
687\begin{entry}
688\item[Obj\#] The ID number of the detection (simply the sequential
689  count for the list, which is ordered by increasing velocity).
690\item[Name] The IAU-format name of the detection (derived from the WCS
691  projection).
692\item[X] The average X-pixel position.
693\item[Y] The average Y-pixel position.
694\item[Z] The average Z-pixel position.
695\item[RA/GLON] The Right Ascension or Galactic Longitude of the centre
696of the object.
697\item[DEC/GLAT] The Declination or Galactic Latitude of the centre of
698the object.
699\item[VEL] The mean velocity of the object [units given by the
700  \texttt{spectralUnits} parameter].
701\item[w\_RA/w\_GLON] The width of the object in Right Ascension or
702Galactic Longitude [arcmin].
703\item[w\_DEC/w\_GLAT] The width of the object in Declination Galactic
704  Latitude [arcmin].
705\item[w\_VEL] The full velocity width of the detection (max channel
706  $-$ min channel, in velocity units [see note below]).
707\item[F\_int] The integrated flux over the object, in the units of
708  flux times velocity, corrected for the beam if necessary.
709\item[F\_peak] The peak flux over the object, in the units of flux.
710\item[X1, X2] The minimum and maximum X-pixel coordinates.
711\item[Y1, Y2] The minimum and maximum Y-pixel coordinates.
712\item[Z1, Z2] The minimum and maximum Z-pixel coordinates.
713\item[Npix] The number of voxels (\ie distinct $(x,y,z)$ coordinates)
714  in the detection.
715\item[Flag] Whether the detection has any warning flags (see below).
716\end{entry}
717The Name is derived from the WCS position. For instance, a source
718centred on the RA,Dec position 12$^h$53$^m$45$^s$,
719-36$^\circ$24$'$12$''$ will be called J1253$-$3624 (if the epoch is
720J2000) or B1253$-$3624 (if B1950). An alternative form is used for
721Galactic coordinates: a source centred on the position ($l$,$b$) =
722(323.1245, 5.4567) will be called G323.12$+$05.45. If the WCS is not
723valid (\ie is not present or does not have all the necessary
724information), the Name, RA, DEC, VEL and related columns are not
725printed, but the pixel coordinates are still provided.
726
727The velocity units can be specified by the user, using the parameter
728\texttt{spectralUnits} (enter it as a single string). The default value
729is km/s, which should be suitable for most users. These units are also
730used to give the units of integrated flux.
731
732The last column contains any warning flags about the detection. There
733are currently two options here. An `E' is printed if the detection is
734next to the edge of the image, meaning either the limit of the pixels,
735or the limit of the non-BLANK pixel region. An `N' is printed if the
736total flux, summed over all the (non-BLANK) pixels in the smallest box
737that completely encloses the detection, is negative. Note that this
738sum is likely to include non-detected pixels. It is of use in
739pointing out detections that lie next to strongly negative pixels,
740such as might arise due to interference -- the detected pixels might
741then also be due to the interference, so caution is advised.
742
743\subsubsection{Other results lists}
744
745Two alternative results files can also be requested. One option is a
746VOTable-format XML file, containing just the RA, Dec, Velocity and the
747corresponding widths of the detections, as well as the fluxes. The
748user should set \texttt{flagVOT = 1}, and put the desired filename in the
749parameter \texttt{votFile} -- note that the default is for it not to be
750produced. This file should be compatible with all Virtual Observatory
751tools (such as Aladin\footnote{ Aladin can be found on the web at
752\href{http://aladin.u-strasbg.fr/}{http://aladin.u-strasbg.fr/}}). The
753second option is an annotation file for use with the Karma toolkit of
754visualisation tools (in particular, with \texttt{kvis}). This will draw a
755circle at the position of each detection, and number it according to
756the Obj\# given above. To make use of this option, the user should
757set \texttt{flagKarma = 1}, and put the desired filename in the parameter
758\texttt{karmaFile} -- again, the default is for it not to be produced.
759
760As the program is running, it also (optionally) records the detections
761made in each individual spectrum or channel (see
762\S\ref{sec-detection} for details on this process). This is
763recorded in the file given by the parameter \texttt{LogFile}. This file
764does not include the columns \texttt{Name, RA, DEC, w\_RA, w\_DEC, VEL,
765w\_VEL}. This file is designed primarily for diagnostic purposes: \eg
766to see if a given set of pixels is detected in, say, one channel
767image, but does not survive the merging process. The list of pixels
768(and their fluxes) in the final detection list are also printed to
769this file, again for diagnostic purposes. This feature can be turned
770off by setting \texttt{flagLog = false}. (This may be a good idea if you
771are not interested in its contents, as it can be a large file.)
772
773\begin{figure}[t]
774\begin{center}
775\includegraphics[width=\textwidth]{example_spectrum}
776\end{center}
777\caption{\footnotesize An example of the spectrum output. Note several
778  of the features discussed in the text: the red lines indicating the
779  reconstructed spectrum; the blue dashed lines indicating the
780  spectral extent of the detection; the green hashed area indicating
781  the Milky Way channels that are ignored by the searching algorithm;
782  the blue border showing its spatial extent on the 0th moment map;
783  and the 15~arcmin-long scale bar.}
784\label{fig-spect}
785\end{figure}
786
787\begin{figure}[!t]
788\begin{center}
789\includegraphics[width=\textwidth]{example_moment_map}
790\end{center}
791\caption{\footnotesize An example of the moment map created by
792  \duchamp. The full extent of the cube is covered, and the 0th moment
793  of each object is shown (integrated individually over all the
794  detected channels).}
795\label{fig-moment}
796\end{figure}
797
798\subsubsection{Graphical output -- spectra}
799
800As well as the output data file, a postscript file is created that
801shows the spectrum for each detection, together with a small cutout
802image (the 0th moment) and basic information about the detection (note
803that any flags are printed after the name of the detection, in the
804format \texttt{[E]}). If the cube was reconstructed, the spectrum from
805the reconstruction is shown in red, over the top of the original
806spectrum. The spectral extent of the detected object is indicated by
807two dashed blue lines, and the region covered by the ``Milky Way''
808channels is shown by a green hashed box.
809
810The spectrum that is plotted is governed by the
811\texttt{spectralMethod} parameter. It can be either \texttt{peak},
812where the spectrum is from the spatial pixel containing the
813detection's peak flux; or \texttt{sum}, where the spectrum is summed
814over all spatial pixels, and then corrected for the beam size.
815
816The spectral extent of the detection is indicated with blue lines, and
817a zoom is shown in a separate window. The cutout image can optionally
818include a border around the spatial pixels that are in the detection
819(turned on and off by the parameter \texttt{drawBorders} -- the
820default is \texttt{true}). It also includes a scale bar in the bottom
821left corner to indicate size -- it is 15~arcmin long (note that due to
822projection effects it may be a slightly different physical length from
823object to object). An example detection can be seen below in
824Fig.~\ref{fig-spect}.
825
826\subsubsection{Graphical output -- maps}
827
828Finally, a couple of images are optionally produced: a 0th moment map
829of the cube, combining just the detected channels in each object,
830showing the integrated flux in grey-scale; and a ``detection image'',
831a grey-scale image where the pixel values are the number of channels
832that spatial pixel is detected in. In both cases, if
833\texttt{drawBorders = true}, a border is drawn around the spatial
834extent of each detection. An example moment map is shown in
835Fig.~\ref{fig-moment}.  The production or otherwise of these images is
836governed by the \texttt{flagMaps} parameter.
837
838The purpose of these images are to provide a visual guide to where the
839detections have been made, and, particularly in the case of the moment
840map, to provide an indication of the strength of the source. In both
841cases, the detections are numbered (in the same sense as the output
842list), and the spatial borders are marked out as for the cutout images
843in the spectra file. Both these images are saved as postscript files
844(given by the parameters \texttt{momentMap} and \texttt{detectionMap}
845respectively), with the latter also displayed in a {\sc pgplot}
846window (regardless of the state of \texttt{flagMaps}).
847
848\section{Notes and hints on the use of \duchamp}
849\label{sec-notes}
850
851In using \duchamp, the user has to make a number of decisions about
852the way the program runs. This section is designed to give the user
853some idea about what to choose.
854
855The main choice is whether or not to use the wavelet
856reconstruction. The main benefits of this are the marked reduction in
857the noise level, leading to regularly-shaped detections, and good
858reliability for faint sources. The main drawback with its use is the
859long execution time: to reconstruct a $170\times160\times1024$
860(\hipass) cube often requires three iterations and takes about 20-25
861minutes to run completely. Note that this is for the three-dimensional
862reconstruction: using \texttt{reconDim=1} makes the reconstruction
863quicker (the full program then takes about 6 minutes), but it is still
864the largest part of the time.
865
866The searching part of the procedure is much quicker: searching an
867un-reconstructed cube leads to execution times of only a couple of
868minutes. Alternatively, using the ability to read in previously-saved
869reconstructed arrays makes running the reconstruction more than once a
870more feasible prospect.
871
872On the positive side, the shape of the detections in a cube that has
873been reconstructed will be much more regular and smooth -- the ragged
874edges that objects in the raw cube possess are smoothed by the removal
875of most of the noise. This enables better determination of the shapes
876and characteristics of objects.
877
878A further point to consider when using the reconstruction is that if
879the two-dimensional reconstruction is chosen (\texttt{reconDim=2}), it
880can be susceptible to edge effects. If the valid area in the cube (\ie
881the part that is not BLANK) has non-rectangular edges, the convolution
882can produce artefacts in the reconstruction that mimic the edges and
883can lead (depending on the selection threshold) to some spurious
884sources. Caution is advised with such data -- the user is advised to
885check carefully the reconstructed cube for the presence of such
886artefacts. Note, however, that the 1- and 3-dimensional
887reconstructions are \emph{not} susceptible in the same way, since the
888spectral direction does not generally exhibit these BLANK edges, and
889so we recommend the use of either of these.
890
891If one chooses the reconstruction method, a further decision is
892required on the signal-to-noise cutoff used in determining acceptable
893wavelet coefficients. A larger value will remove more noise from the
894cube, at the expense of losing fainter sources, while a smaller value
895will include more noise, which may produce spurious detections, but
896will be more sensitive to faint sources. Values of less than about
897$3\sigma$ tend to not reduce the noise a great deal and can lead to
898many spurious sources (although this will depend on the nature of the
899cube).
900
901When it comes to searching, the FDR method produces more reliable results
902than simple sigma-clipping, particularly in the absence of reconstruction.
903However, it does not work in exactly the way one would expect for a
904given value of \texttt{alpha}. For instance, setting fairly liberal values
905of \texttt{alpha} (say, 0.1) will often lead to a much smaller fraction
906of false detections (\ie much less than 10\%). This is the effect of the
907merging algorithms, that combine the sources after the detection stage, 
908and reject detections not meeting the minimum pixel or channel requirements.
909It is thus better to aim for larger \texttt{alpha} values than those derived
910from a straight conversion of the desired false detection rate.
911
912Finally, as \duchamp\ is still undergoing development, there are some
913elements that are not fully developed. In particular, it is not as
914clever as I would like at avoiding interference. The ability to place
915requirements on the minimum number of channels and pixels partially
916circumvents this problem, but work is being done to make \duchamp\
917smarter at rejecting signals that are clearly (to a human eye at
918least) interference. See the following section for further
919improvements that are planned.
920
921%\section{Drawbacks of the current program}
922%
923%The program currently has a few problems/drawbacks/things to be aware
924%of that will hopefully be fixed in the future:
925%\begin{itemize}
926%
927%\item Narrow interference spikes are still getting found, particularly
928%  if there is no reconstruction, or reconstruction with a relatively
929%  low \texttt{snrRecon} (such as 2 or 3). Increasing the
930%  \texttt{minChannels} parameter is one way to circumvent this, but
931%  making the algorithm a bit more clever would be preferable.
932%
933%\item Sources that have strong continuum ripple and/or artefacts often
934%  generate many spurious detections. This needs some work to avoid
935%  \duchamp\ doing this, and until then users are advised to be aware
936%  of the possibility. Strong continuum ripples may generate many
937%  sources on the same spatial pixel, and this will be apparent on the
938%  detection images.
939%
940%\item Spectra are integrated over every spatial pixel of the
941%  detection, and this may dilute the actual detection, making it
942%  harder to see \ie the apparent strength of the line as plotted may
943%  not give a true indication of how strong it really is.
944%
945%%\item A caution on the merging part of the procedure. This can be time
946%%  consuming if there are many detections that do not require merging
947%%  -- in this case, the time will go like $N^2$ ($N$ = number of
948%%  detections). If there are plenty of mergers, the size of the list
949%%  reduces quickly, so the execution time will be less.
950%
951%
952%\end{itemize}
953
954
955%\section{Comparison with other software (to be developed further...)}
956%
957%\subsection{fred, by Matt Howlett}
958%
959%This is the program used in the \hipass\ analysis. It smoothes the
960%data spectrally with a boxcar filter of a size that varies over a
961%user-specified range, and then thresholds the data.
962%
963%Works effectively, but generally doesn't find as many sources as
964%\duchamp, particularly when the reconstruction is used. Sensitive to
965%faint, broad features that fall below the reconstruction threshold.
966%
967%Execution takes a long time, depending on the range of filter widths
968%that are used.
969%
970%\subsection{sfind}
971%
972%Hard to evaluate, as it does not (as far as I can see) output the
973%channel number at which detections are made, and does not merge
974%detections made at adjacent channels (\ie it just works in 2
975%dimensions).
976%
977
978\section{Future Developments}
979
980This is both a list of planned improvements and a wish-list of
981features that would be nice to include (but are not planned in the
982immediate future). Let me know if there are items not on this list, or
983items on the list you would like prioritised.
984
985\begin{itemize}
986
987\item Better determination of the noise characteristics of
988  spectral-line cubes, including understanding how the noise is
989  generated and developing a model for it. \textbf{Planned.}
990 
991\item Include more source analysis. Examples could be: shape
992  information; measurements of HI mass; more variety of measurements
993  of velocity width and profile. \textbf{Some planned.}
994
995\item Provide some indication of the significance of the detection
996  (\ie some S/N-like value). \textbf{Planned.}
997
998\item Improved ability to reject interference, possibly on the
999  spectral shape of features. \textbf{Planned.}
1000
1001\item Ability to separate (de-blend) distinct sources that have been
1002  merged. \textbf{Planned.}
1003
1004\item Link to lists of possible counterparts (\eg via NED/SIMBAD/other
1005  VO tools?). \textbf{Wish-list.}
1006
1007\item On-line web service interface, so a user can upload a cube and
1008  get back a source-list. \textbf{Wish-list}.
1009
1010\item Embed \duchamp\ in a GUI, to move away from the text-based
1011  interaction. \textbf{Wish-list}.
1012\end{itemize}
1013
1014
1015%\bibliographystyle{mn2e}
1016%\bibliographystyle{abbrvnat}
1017%\bibliography{mnrasmnemonic,sourceDetection}
1018\begin{thebibliography}{}
1019
1020\bibitem[\protect\citeauthoryear{{Calabretta} \& {Greisen}}{{Calabretta} \&
1021  {Greisen}}{2002}]{calabretta02}
1022{Calabretta} M.,  {Greisen} E.,  2002, A\&A, 395, 1077
1023
1024\bibitem[\protect\citeauthoryear{{Greisen} \& {Calabretta}}{{Greisen} \&
1025  {Calabretta}}{2002}]{greisen02}
1026{Greisen} E.,  {Calabretta} M.,  2002, A\&A, 395, 1061
1027
1028\bibitem[\protect\citeauthoryear{{Hanisch}, {Farris}, {Greisen}, {Pence},
1029  {Schlesinger}, {Teuben}, {Thompson} \& {Warnock}}{{Hanisch}
1030  et~al.}{2001}]{hanisch01}
1031{Hanisch} R.,  {Farris} A.,  {Greisen} E.,  {Pence} W.,  {Schlesinger} B.,
1032  {Teuben} P.,  {Thompson} R.,    {Warnock} A.,  2001, A\&A, 376, 359
1033
1034\bibitem[\protect\citeauthoryear{{Hopkins}, {Miller}, {Connolly}, {Genovese},
1035  {Nichol} \& {Wasserman}}{{Hopkins} et~al.}{2002}]{hopkins02}
1036{Hopkins} A.,  {Miller} C.,  {Connolly} A.,  {Genovese} C.,  {Nichol} R.,
1037  {Wasserman} L.,  2002, AJ, 123, 1086
1038
1039\bibitem[\protect\citeauthoryear{Lutz}{Lutz}{1980}]{lutz80}
1040Lutz R.,  1980, The Computer Journal, 23, 262
1041
1042\bibitem[\protect\citeauthoryear{{Meyer} et~al.,}{{Meyer}
1043  et~al.}{2004}]{meyer04:trunc}
1044{Meyer} M.,  et~al., 2004, MNRAS, 350, 1195
1045
1046\bibitem[\protect\citeauthoryear{{Miller}, {Genovese}, {Nichol}, {Wasserman},
1047  {Connolly}, {Reichart}, {Hopkins}, {Schneider} \& {Moore}}{{Miller}
1048  et~al.}{2001}]{miller01}
1049{Miller} C.,  {Genovese} C.,  {Nichol} R.,  {Wasserman} L.,  {Connolly} A.,
1050  {Reichart} D.,  {Hopkins} A.,  {Schneider} J.,    {Moore} A.,  2001, AJ, 122,
1051  3492
1052
1053\bibitem[\protect\citeauthoryear{Minchin}{Minchin}{1999}]{minchin99}
1054Minchin R.,  1999, PASA, 16, 12
1055
1056\bibitem[\protect\citeauthoryear{Starck \& Murtagh}{Starck \&
1057  Murtagh}{2002}]{starck02:book}
1058Starck J.-L.,  Murtagh F.,  2002, {``Astronomical Image and Data Analysis''}.
1059Springer
1060
1061\end{thebibliography}
1062
1063
1064\appendix
1065\newpage
1066\section{Available parameters}
1067\label{app-param}
1068
1069The full list of parameters that can be listed in the input file are
1070given here. If not listed, they take the default value given in
1071parentheses. Since the order of the parameters in the input file does
1072not matter, they are grouped here in logical sections.
1073
1074\subsection*{Input-output related}
1075\begin{entry}
1076\item[ImageFile (no default assumed)] The filename of the
1077  data cube to be analysed.
1078\item[flagSubsection \texttt{[false]}] A flag to indicate whether one
1079  wants a subsection of the requested image.
1080\item[Subsection \texttt{[ [*,*,*] ]}] The requested subsection, which
1081  should be specified in the format \texttt{[x1:x2,y1:y2,z1:z2]}, where
1082  the limits are inclusive. If the full range of a dimension is
1083  required, use a \texttt{*}, \eg if you want the full spectral range of
1084  a subsection of the image, use \texttt{[30:140,30:140,*]}.
1085\item[flagReconExists \texttt{[false]}] A flag to indicate whether the
1086  reconstructed array has been saved by a previous run of \duchamp. If
1087  set true, the reconstructed array will be read from the file given by
1088  \texttt{reconFile}, rather than calculated directly.
1089\item[reconFile (no default assumed)] The FITS file that contains the
1090  reconstructed array. If \texttt{flagReconExists} is true and this
1091  parameter is not defined, the default file searched will be
1092  determined by the \atrous\ parameters (see \S\ref{sec-recon}).
1093\item[OutFile \texttt{[duchamp-Results.txt]}] The file containing the
1094  final list of detections. This also records the list of input
1095  parameters.
1096\item[SpectraFile \texttt{[duchamp-Spectra.ps]}] The postscript file
1097  containing the resulting integrated spectra and images of the
1098  detections.
1099\item[flagLog \texttt{[true]}] A flag to indicate whether intermediate
1100  detections should be logged.
1101\item[LogFile \texttt{[duchamp-Logfile.txt]}] The file in which intermediate
1102  detections are logged. These are detections that have not been
1103  merged. This is primarily for use in debugging and diagnostic
1104  purposes -- normal use of the program will probably not require
1105  this.
1106\item[flagOutputRecon \texttt{[false]}] A flag to say whether or not to
1107  save the reconstructed cube as a FITS file. The filename will be
1108  derived from the ImageFile -- the reconstruction of \texttt{image.fits}
1109  will be saved as \texttt{image.RECON?.fits}, where \texttt{?} stands for
1110  the value of \texttt{snrRecon} (see below).
1111\item[flagOutputResid \texttt{[false]}] As for \texttt{flagOutputRecon}, but
1112  for the residual array -- the difference between the original cube
1113  and the reconstructed cube. The filename will be \texttt{image.RESID?.fits}.
1114\item[flagVOT \texttt{[false]}] A flag to say whether to create a VOTable
1115  file corresponding to the information in \texttt{outfile}. This will be
1116  an XML file in the Virtual Observatory VOTable format.
1117\item[votFile \texttt{[duchamp-Results.xml]}] The VOTable file with the
1118  list of final detections. Some input parameters are also recorded.
1119\item[flagKarma \texttt{[false]}] A flag to say whether to create a
1120  Karma annotation file corresponding to the information in
1121  \texttt{outfile}. This can be used as an overlay for the Karma
1122  programs such as \texttt{kvis}.
1123\item[karmaFile \texttt{[duchamp-Results.ann]}] The Karma annotation
1124  file showing the list of final detections.
1125\item[flagMaps \texttt{[true]}] A flag to say whether to save
1126  postscript files showing the 0th moment map of the whole cube
1127  (parameter \texttt{momentMap}) and the detection image
1128  (\texttt{detectionMap}).
1129\item[momentMap \texttt{[duchamp-MomentMap.ps]}] A postscript file
1130  containing a map of the 0th moment of the detected sources, as well
1131  as pixel and WCS coordinates.
1132\item[detectionMap \texttt{[duchamp-DetectionMap.ps]}] A postscript
1133  file showing each of the detected objects, coloured in greyscale by
1134  the number of channels spanned by each pixel. Also shows pixel and WCS
1135  coordinates.
1136\end{entry}
1137
1138\subsection*{Modifying the cube}
1139\begin{entry}
1140\item[flagBlankPix \texttt{[true]}] A flag to say whether to remove BLANK
1141  pixels from the analysis -- these are pixels set to some particular
1142  value because they fall outside the imaged area.
1143\item[blankPixValue \texttt{[-8.00061]}] The value of the BLANK pixels,
1144  if this information is not contained in the FITS header (the usual
1145  procedure is to obtain this value from the header information -- in
1146  which case the value set by this parameter is ignored).
1147\item[flagMW \texttt{[false]}] A flag to say whether to ignore channels
1148  contaminated by Milky Way (or other) emission -- the searching
1149  algorithms will not look at these channels.
1150\item[maxMW \texttt{[112]}] The maximum channel number containing
1151  ``Milky Way'' emission.
1152\item[minMW \texttt{[75]}] The minimum channel number containing
1153  ``Milky Way'' emission. Note that the range specified by
1154  \texttt{maxMW} and \texttt{minMW} is inclusive.
1155\item[flagBaseline \texttt{[false]}] A flag to say whether to remove the
1156  baseline from each spectrum in the cube for the purposes of
1157  reconstruction and detection.
1158\end{entry}
1159
1160\subsection*{Detection related}
1161
1162\subsubsection*{General detection}
1163\begin{entry}
1164\item[flagNegative \texttt{[false]}] A flag to indicate that the features
1165  being searched for are negative. The cube will be inverted prior to
1166  searching.
1167\item[snrCut \texttt{[3.]}] The cut-off value for thresholding, in terms
1168  of number of $\sigma$ above the mean.
1169\item[flagGrowth \texttt{[false]}] A flag indicating whether or not to
1170  grow the detected objects to a smaller threshold.
1171\item[growthCut \texttt{[2.]}] The smaller threshold using in growing
1172  detections. In units of $\sigma$ above the mean.
1173\end{entry}
1174
1175\subsubsection*{\Atrous\ reconstruction}
1176\begin{entry}
1177\item [flagATrous \texttt{[true]}] A flag indicating whether or not to
1178  reconstruct the cube using the \atrous\ wavelet
1179  reconstruction. See \S\ref{sec-recon} for details.
1180\item[reconDim \texttt{[3]}] The number of dimensions to use in the
1181  reconstruction. 1 means reconstruct each spectrum separately, 2
1182  means each channel map is done separately, and 3 means do the whole
1183  cube in one go.
1184\item[scaleMin \texttt{[1]}] The minimum wavelet scale to be used in the
1185  reconstruction. A value of 1 means ``use all scales''.
1186\item[snrRecon \texttt{[4]}] The thresholding cutoff used in the
1187  reconstruction -- only wavelet coefficients this many $\sigma$ above
1188  the mean (or greater) are included in the reconstruction.
1189\item[filterCode \texttt{[1]}] The code number of the filter to use in
1190  the reconstruction. The options are:
1191  \begin{itemize}
1192  \item \textbf{1:} B$_3$-spline filter: coefficients =
1193    $(\frac{1}{16}, \frac{1}{4}, \frac{3}{8}, \frac{1}{4}, \frac{1}{16})$
1194  \item \textbf{2:} Triangle filter: coefficients = $(\frac{1}{4}, \frac{1}{2}, \frac{1}{4})$
1195  \item \textbf{3:} Haar wavelet: coefficients = $(0, \frac{1}{2}, \frac{1}{2})$
1196  \end{itemize}
1197\end{entry}
1198
1199\subsubsection*{FDR method}
1200\begin{entry}
1201\item[flagFDR \texttt{[false]}] A flag indicating whether or not to use
1202  the False Discovery Rate method in thresholding the pixels.
1203\item[alphaFDR \texttt{[0.01]}] The $\alpha$ parameter used in the FDR
1204analysis. The average number of false detections, as a fraction of the
1205total number, will be less than $\alpha$ (see \S\ref{sec-detection}).
1206\end{entry}
1207
1208\subsubsection*{Merging detections}
1209\begin{entry}
1210\item[minPix \texttt{[2]}] The minimum number of spatial pixels for a single
1211  detection to be counted.
1212\item[minChannels \texttt{[3]}] The minimum number of consecutive
1213  channels that must be present in a detection.
1214\item[flagAdjacent \texttt{[true]}] A flag indicating whether to use the
1215  ``adjacent pixel'' criterion to decide whether to merge objects. If
1216  not, the next two parameters are used to determine whether objects
1217  are within the necessary thresholds.
1218\item[threshSpatial \texttt{[3.]}] The maximum allowed minimum spatial
1219  separation (in pixels) between two detections for them to be merged
1220  into one. Only used if \texttt{flagAdjacent = false}.
1221\item[threshVelocity \texttt{[7.]}] The maximum allowed minimum channel
1222  separation between two detections for them to be merged into
1223  one.
1224\end{entry}
1225
1226\subsubsection*{Other parameters}
1227\begin{entry}
1228\item[spectralMethod \texttt{[peak]}] This indicates which method is used
1229  to plot the output spectra: \texttt{peak} means plot the spectrum
1230  containing the detection's peak pixel; \texttt{sum} means sum the
1231  spectra of each detected spatial pixel, and correct for the beam
1232  size. Any other choice defaults to \texttt{peak}.
1233\item[spectralUnits \texttt{[km/s]}] The user can specify the units of
1234  the spectral axis. Assuming the WCS of the FITS file is valid, the
1235  spectral axis is transformed into velocity, and put into these units
1236  for all output and for calculations such as the integrated flux of a
1237  detection.
1238\item[drawBorders \texttt{[true]}] A flag indicating whether borders
1239  are to be drawn around the detected objects in the moment maps
1240  included in the output (see for example Fig.~\ref{fig-spect}).
1241\item[verbose \texttt{[true]}] A flag indicating whether to print the
1242  progress of computationally-intensive algorithms (such as the
1243  searching and merging) to screen.
1244\end{entry}
1245
1246
1247\newpage
1248\section{Example parameter files}
1249\label{app-input}
1250
1251This is what a typical parameter file would look like.
1252
1253\begin{verbatim}
1254imageFile       /DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits
1255logFile         logfile.txt
1256outFile         results.txt
1257spectraFile     spectra.ps
1258flagSubsection  0
1259flagOutputRecon 0
1260flagOutputResid 0
1261flagBlankPix    1
1262flagMW          1
1263minMW           75
1264maxMW           112
1265minPix          3
1266flagGrowth      1
1267growthCut       1.5
1268flagATrous      0
1269scaleMin        1
1270snrRecon        4
1271flagFDR         1
1272alphaFDR        0.1
1273numPixPSF       20
1274snrCut          3
1275threshSpatial   3
1276threshVelocity  7
1277\end{verbatim}
1278
1279Note that it is not necessary to include all these parameters in the
1280file, only those that need to be changed from the defaults (as listed
1281in Appendix~\ref{app-param}), which in this case would be very few. A
1282minimal parameter file might look like:
1283\begin{verbatim}
1284imageFile       /DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits
1285flagLog         0
1286snrRecon        3
1287snrCut          2.5
1288minChannels     4
1289\end{verbatim}
1290This will reconstruct the cube with a lower SNR value than the
1291default, select objects at a lower threshold,  with a looser minimum
1292channel requirement, and not keep a log of the intermediate
1293detections.
1294
1295The following page demonstrates how the parameters are presented to the
1296user, both on the screen at execution time, and in the output and log
1297files. On each line, there is a description on the parameter, the relevant
1298parameter name that is used in the input file (if there is one that the
1299user can enter), and the value of the parameter being used.
1300\newpage
1301\begin{landscape}
1302Typical presentation of parameters in output and log files: 
1303\begin{verbatim}
1304---- Parameters ----
1305Image to be analysed.........................[imageFile]  =  input.fits
1306Intermediate Logfile...........................[logFile]  =  duchamp-Logfile.txt         
1307Final Results file.............................[outFile]  =  duchamp-Results.txt         
1308Spectrum file..............................[spectraFile]  =  duchamp-Spectra.ps   
13090th Moment Map...............................[momentMap]  =  duchamp-MomentMap.ps
1310Detection Map.............................[detectionMap]  =  duchamp-DetectionMap.ps
1311Saving reconstructed cube?.............[flagoutputrecon]  =  false
1312Saving residuals from reconstruction?..[flagoutputresid]  =  false
1313------
1314Searching for Negative features?..........[flagNegative]  =  false
1315Fixing Blank Pixels?......................[flagBlankPix]  =  true
1316Blank Pixel Value.......................................  =  -8.00061
1317Removing Milky Way channels?....................[flagMW]  =  true
1318Milky Way Channels.......................[minMW - maxMW]  =  75-112
1319Beam Size (pixels)......................................  =  10.1788
1320Removing baselines before search?.........[flagBaseline]  =  false
1321Minimum # Pixels in a detection.................[minPix]  =  2
1322Minimum # Channels in a detection..........[minChannels]  =  3
1323Growing objects after detection?............[flagGrowth]  =  false
1324Using A Trous reconstruction?...............[flagATrous]  =  true
1325Number of dimensions in reconstruction........[reconDim]  =  3
1326Minimum scale in reconstruction...............[scaleMin]  =  1
1327SNR Threshold within reconstruction...........[snrRecon]  =  4
1328Filter being used for reconstruction........[filterCode]  =  1 (B3 spline function)
1329Using FDR analysis?............................[flagFDR]  =  false
1330SNR Threshold...................................[snrCut]  =  3
1331Using Adjacent-pixel criterion?...........[flagAdjacent]  =  true
1332Max. velocity separation for merging....[threshVelocity]  =  7
1333Method of spectral plotting.............[spectralMethod]  =  peak
1334\end{verbatim}
1335
1336\newpage
1337\section{Example results file}
1338\label{app-output}
1339This the typical content of an output file, after running \duchamp\
1340with the parameters illustrated on the previous page.
1341
1342{\scriptsize
1343  \begin{verbatim}
1344Results of the \duchamp\ source finder: Tue May 23 14:51:38 2006
1345---- Parameters ----
1346      (... omitted for clarity -- see previous page for examples...)
1347--------------------
1348Total number of detections = 25
1349--------------------
1350------------------------------------------------------------------------------------------------------------------------------------------------------
1351 Obj#       Name     X     Y     Z           RA          DEC      VEL     w_RA    w_DEC   w_VEL     F_int    F_peak  X1  X2  Y1  Y2  Z1  Z2  Npix Flag
1352                                                               [km/s] [arcmin] [arcmin]  [km/s] [Jy km/s] [Jy/beam]                         [pix]     
1353------------------------------------------------------------------------------------------------------------------------------------------------------
1354    1 J0618-2532  30.2  86.0 113.3  06:18:12.54 -25:32:44.79  208.502    45.17    34.61  26.383    24.394     0.350  25  35  82  90 112 114   137    E
1355    2 J0609-2156  59.5 140.6 114.6  06:09:19.66 -21:56:31.20  225.572    44.39    31.47  65.957    16.128     0.213  55  65 137 144 113 118   153     
1356    3 J0545-2143 141.2 143.2 114.8  05:45:51.71 -21:43:36.20  228.470    19.61    16.66  26.383     2.412     0.090 139 143 142 145 114 116    29     
1357    4 J0617-2633  33.3  70.8 115.6  06:17:25.52 -26:33:33.83  238.736    65.02    30.10  26.383     9.776     0.117  26  41  68  75 115 117   104    E
1358    5 J0601-2500  86.2  94.9 117.9  06:01:39.54 -25:00:32.46  269.419    27.99    24.02  26.383     3.920     0.124  83  89  92  97 117 119    44     
1359    6 J0602-2547  84.0  83.1 118.0  06:02:18.29 -25:47:31.69  270.319    20.01    19.99  26.383     2.999     0.118  82  86  81  85 117 119    34     
1360    7 J0547-2448 133.0  97.2 118.7  05:47:52.53 -24:48:38.16  279.113    19.72    12.54  26.383     1.474     0.074 131 135  96  98 118 120    21     
1361    8 J0606-2719  71.1  60.0 121.3  06:06:10.99 -27:19:48.61  314.090    52.36    39.59  39.574    14.268     0.150  65  77  55  64 120 123   154     
1362    9 J0611-2137  52.4 145.3 162.5  06:11:20.92 -21:37:29.57  857.955    32.39    23.49 118.722    43.178     0.410  49  56 142 147 158 167   265    E
1363   10 J0600-2859  89.7  35.3 202.4  06:00:34.08 -28:59:00.43 1383.160    23.93    24.10 171.487    24.439     0.173  87  92  33  38 196 209   271     
1364   11 J0558-2638  95.4  70.3 223.1  05:58:53.03 -26:38:45.91 1656.140    11.93    12.07  92.339     1.045     0.063  94  96  69  71 220 227    18     
1365   12 J0617-2723  34.7  58.3 227.4  06:17:07.07 -27:23:50.65 1712.868    16.75    23.53 290.209     8.529     0.093  33  36  56  61 215 237   118     
1366   13 J0558-2525  95.8  88.6 231.7  05:58:49.27 -25:25:33.60 1770.134    27.87    24.16 237.444    12.863     0.115  92  98  86  91 221 239   175     
1367   14 J0600-2141  88.8 144.4 232.5  06:00:54.02 -21:41:57.06 1780.188    27.96    24.13 224.252    30.743     0.166  86  92 142 147 222 239   344    E
1368   15 J0615-2634  40.0  70.8 232.6  06:15:25.50 -26:34:20.04 1782.214    12.44    15.69  52.765     2.084     0.068  39  41  69  72 231 235    31     
1369   16 J0604-2606  76.0  78.4 233.0  06:04:41.13 -26:06:21.19 1787.226    24.13    23.87 211.061    23.563     0.155  73  78  76  81 225 241   278     
1370   17 J0601-2340  87.9 114.9 235.8  06:01:08.83 -23:40:19.37 1824.122    31.95    28.09 237.444    82.380     0.297  85  92 112 118 227 245   647     
1371   18 J0615-2235  38.2 130.5 254.5  06:15:32.09 -22:35:37.24 2070.934    12.29    11.70 105.531     1.555     0.070  37  39 129 131 249 257    24     
1372   19 J0617-2305  31.4 122.8 258.1  06:17:33.45 -23:05:28.94 2118.752    12.34    11.65  26.383     1.022     0.062  30  32 122 124 257 259    16     
1373   20 J0612-2149  49.6 142.2 270.3  06:12:11.04 -21:49:29.72 2279.926    16.27    15.73 395.740    15.156     0.101  48  51 141 144 257 287   204     
1374   21 J0616-2133  35.3 146.0 300.6  06:16:15.78 -21:33:09.69 2679.148    20.22     7.47 224.252     3.014     0.127  33  37 145 146 294 311    28    E
1375   22 J0555-2956 107.3  20.9 367.6  05:55:08.02 -29:56:09.08 3562.236    19.71    20.30  39.574     5.891     0.169 105 109  19  23 366 369    58     
1376   23 J0557-2246  99.8 128.2 434.0  05:57:43.77 -22:46:42.95 4438.776    11.88    16.12 105.531     1.703     0.167  99 101 127 130 430 438    17    N
1377   24 J0616-2648  38.1  67.2 546.8  06:16:02.10 -26:48:35.49 5926.464    12.35    11.67  26.383     1.276     0.064  37  39  66  68 546 548    18     
1378   25 J0552-2916 117.0  30.5 727.0  05:52:13.64 -29:16:58.02 8303.952    11.59    20.25 303.400    35.523     0.479 116 118  28  32 716 739   111     
1379  \end{verbatim}
1380}
1381Note that the
1382width of the table can make it hard to read. A good trick for those
1383using UNIX/Linux is to make use of the \texttt{a2ps} command. The
1384following works well, producing a postscript file \texttt{results.ps}:
1385\\\verb|a2ps -1 -r -f8 -o duchamp-Results.ps duchamp-Results.txt|
1386
1387%\end{landscape}
1388
1389\newpage
1390\section{Example VOTable output}
1391\label{app-votable}
1392This is part of the VOTable, in XML format, corresponding to the
1393output file in Appendix~\ref{app-output} (the indentation has been
1394removed to make it fit on the page).
1395
1396%\begin{landscape}
1397{\scriptsize
1398  \begin{verbatim}
1399<?xml version="1.0"?>
1400<VOTABLE version="1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
1401 xsi:noNamespaceSchemaLocation="http://www.ivoa.net/xml/VOTable/VOTable/v1.1">
1402<COOSYS ID="J2000" equinox="J2000." epoch="J2000." system="eq_FK5"/>
1403<RESOURCE name="Duchamp Output">
1404<TABLE name="Detections">
1405<DESCRIPTION>Detected sources and parameters from running the Duchamp source finder.</DESCRIPTION>
1406<PARAM name="FITS file" datatype="char" ucd="meta.file;meta.fits" value="/DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits"/>
1407<PARAM name="Threshold" datatype="float" ucd="stat.snr" value="2.5">
1408<PARAM name="ATrous note" datatype="char" ucd="meta.note" value="The a trous reconstruction method was used, with the following parameters.">
1409<PARAM name="ATrous Dimension" datatype="int" ucd="meta.code;stat" value="3">
1410<PARAM name="ATrous Cut" datatype="float" ucd="stat.snr" value="4">
1411<PARAM name="ATrous Minimum Scale" datatype="int" ucd="stat.param" value="1">
1412<PARAM name="ATrous Filter" datatype="char" ucd="meta.code;stat" value="B3 spline function">
1413<FIELD name="ID" ID="col1" ucd="meta.id" datatype="int" width="4"/>
1414<FIELD name="Name" ID="col2" ucd="meta.id;meta.main" datatype="char" arraysize="14"/>
1415<FIELD name="RA" ID="col3" ucd="pos.eq.ra;meta.main" ref="J2000" datatype="float" width="10" precision="6" unit="deg"/>
1416<FIELD name="Dec" ID="col4" ucd="pos.eq.dec;meta.main" ref="J2000" datatype="float" width="10" precision="6" unit="deg"/>
1417<FIELD name="w_RA" ID="col3" ucd="phys.angSize;pos.eq.ra" ref="J2000" datatype="float" width="7" precision="2" unit="arcmin"/>
1418<FIELD name="w_Dec" ID="col4" ucd="phys.angSize;pos.eq.dec" ref="J2000" datatype="float" width="7" precision="2" unit="arcmin"/>
1419<FIELD name="Vel" ID="col4" ucd="phys.veloc;src.dopplerVeloc" datatype="float" width="9" precision="3" unit="km/s"/>
1420<FIELD name="w_Vel" ID="col4" ucd="phys.veloc;src.dopplerVeloc;spect.line.width" datatype="float" width="8" precision="3" unit="km/s"/>
1421<FIELD name="Integrated_Flux" ID="col4" ucd="phys.flux;spect.line.intensity" datatype="float" width="10" precision="3" unit="km/s"/>
1422<DATA>
1423<TABLEDATA>
1424<TR>
1425<TD>   1</TD><TD> J0609-2200</TD><TD> 92.410416</TD><TD>-22.013390</TD><TD>  48.50</TD><TD>  39.42</TD><TD>  213.061</TD><TD>  65.957</TD><TD>    17.572</TD>
1426</TR>
1427<TR>
1428<TD>   2</TD><TD> J0608-2605</TD><TD> 92.042633</TD><TD>-26.085157</TD><TD>  44.47</TD><TD>  39.47</TD><TD>  233.119</TD><TD>  39.574</TD><TD>     4.144</TD>
1429</TR>
1430<TR>
1431<TD>   3</TD><TD> J0606-2724</TD><TD> 91.637840</TD><TD>-27.412022</TD><TD>  52.48</TD><TD>  47.57</TD><TD>  302.213</TD><TD>  39.574</TD><TD>    17.066</TD>
1432</TR>
1433(... table truncated for clarity ...)
1434</TABLEDATA>
1435</DATA>
1436</TABLE>
1437</RESOURCE>
1438</VOTABLE>
1439  \end{verbatim}
1440}
1441\end{landscape}
1442
1443\newpage
1444\section{Example Karma Annotation File output}
1445\label{app-karma}
1446
1447This is the format of the Karma Annotation file, showing the locations
1448of the detected objects. This can be loaded by the plotting tools of
1449the Karma package (for instance, \texttt{kvis}) as an overlay on the FITS
1450file.
1451
1452\begin{verbatim}
1453# Duchamp Source Finder results for
1454#  cube /DATA/SITAR_1/whi550/cubes/H201_abcde_luther_chop.fits
1455COLOR RED
1456COORD W
1457CIRCLE 92.3376 -21.9475 0.403992
1458TEXT 92.3376 -21.9475 1
1459CIRCLE 91.9676 -26.0193 0.37034
1460TEXT 91.9676 -26.0193 2
1461CIRCLE 91.5621 -27.3459 0.437109
1462TEXT 91.5621 -27.3459 3
1463CIRCLE 92.8285 -21.6344 0.269914
1464TEXT 92.8285 -21.6344 4
1465CIRCLE 90.1381 -28.9838 0.234179
1466TEXT 90.1381 -28.9838 5
1467CIRCLE 89.72 -26.6513 0.132743
1468TEXT 89.72 -26.6513 6
1469CIRCLE 94.2743 -27.4003 0.195175
1470TEXT 94.2743 -27.4003 7
1471CIRCLE 92.2739 -21.6941 0.134538
1472TEXT 92.2739 -21.6941 8
1473CIRCLE 89.7133 -25.4259 0.232252
1474TEXT 89.7133 -25.4259 9
1475CIRCLE 90.2206 -21.6993 0.266247
1476TEXT 90.2206 -21.6993 10
1477CIRCLE 93.8581 -26.5766 0.163153
1478TEXT 93.8581 -26.5766 11
1479CIRCLE 91.176 -26.1064 0.234356
1480TEXT 91.176 -26.1064 12
1481CIRCLE 90.2844 -23.6716 0.299509
1482TEXT 90.2844 -23.6716 13
1483CIRCLE 93.8774 -22.581 0.130925
1484TEXT 93.8774 -22.581 14
1485CIRCLE 94.3882 -23.0934 0.137108
1486TEXT 94.3882 -23.0934 15
1487CIRCLE 93.0491 -21.8223 0.202928
1488TEXT 93.0491 -21.8223 16
1489CIRCLE 94.0685 -21.5603 0.168456
1490TEXT 94.0685 -21.5603 17
1491CIRCLE 86.0568 -27.6095 0.101113
1492TEXT 86.0568 -27.6095 18
1493CIRCLE 88.7932 -29.9453 0.202624
1494TEXT 88.7932 -29.9453 19
1495\end{verbatim}
1496
1497\newpage
1498\section{Installing \duchamp\ (README file)}
1499\begin{verbatim}
1500There is an executable (Duchamp) that has been compiled on a Debian
1501Linux kernel 2.6.8-2-686, with gcc version 3.3.5 (Debian 1:3.3.5-13)
1502
1503If that is no good to you, you can compile it yourself using the
1504Makefile included in this directory (sorry for not having a configure
1505script or similar yet!).
1506
1507Duchamp uses three main external libraries: pgplot, cfitsio and
1508wcslib. You will need to set the paths for the base directory and
1509three libraries, as they are currently configured for my use and will
1510not be of much use to you. These are:
1511
1512BASE --> the current directory
1513PGDIR --> where the pgplot libraries (and header files) are located
1514CFITSIODIR --> where the header file fitsio.h is
1515CFITSIOLDIR --> where the cfitsio library is located (libcfitsio.a)
1516WCSDIR --> where the wcslib header files are
1517WCSLDIR --> where the wcslib library is located (libwcs.a)
1518
1519If you do not have the libraries, they can be downloaded from the
1520following locations:
1521PGPlot -- http://www.astro.caltech.edu/~tjp/pgplot/
1522cfitsio -- http://heasarc.gsfc.nasa.gov/docs/software/fitsio/fitsio.html
1523wcslib -- http://www.atnf.csiro.au/people/Mark.Calabretta/WCS/index.html
1524
1525Once you've set up the Makefile correctly, then simply typing
1526> make duchamp
1527will compile the program.
1528
1529To run it, you need to use the syntax
1530> Duchamp -p parameterFile
1531where parameterFile is a file with the input parameters, including the
1532name of the cube you want to search.
1533
1534There are two example input files included with the distribution. The
1535smaller one, InputExample, shows the typical parameters one might want
1536to set. The large one, InputComplete, lists all parameters that can be
1537entered, and a brief description of them. Refer to the documentation
1538for further details.
1539
1540To get going quickly, just replace the "your-file-here" in
1541InputExample with your image name, and type
1542> Duchamp -p InputExample
1543and you're off!
1544\end{verbatim}
1545
1546\section{Robust statistics for a Normal distribution}
1547\label{app-madfm}
1548
1549The Normal, or Gaussian, distribution for mean $\mu$ and standard
1550deviation $\sigma$ can be written as
1551\[
1552f(x) = \frac{1}{\sqrt{2\pi\sigma^2}}\ e^{-(x-\mu)^2/2\sigma^2}.
1553 \]
1554
1555When one has a purely Gaussian signal, it is straightforward to
1556estimate $\sigma$ by calculating the standard deviation (or rms) of
1557the data. However, if there is a small amount of signal present on top
1558of Gaussian noise, and one wants to estimate the $\sigma$ for the
1559noise, the presence of the large values from the signal can bias the
1560estimator to higher values.
1561
1562An alternative way is to use the median ($m$) and median absolute deviation
1563from the median ($s$) to estimate $\mu$ and $\sigma$. The median is the
1564middle of the distribution, defined for a continuous distribution by
1565\[
1566\int_{-\infty}^{m} f(x) \diff x = \int_{m}^{\infty} f(x) \diff x.
1567\]
1568From symmetry, we quickly see that for the continuous Normal
1569distribution, $m=\mu$. We consider the case henceforth of $\mu=0$,
1570without loss of generality.
1571
1572To find $s$, we find the distribution of the absolute deviation from
1573the median, and then find the median of that distribution. This
1574distribution is given by
1575\begin{eqnarray*}
1576g(x) &= &{\mbox{\rm distribution of }} |x|\\
1577     &= &f(x) + f(-x),\ x\ge0\\
1578     &= &\sqrt{\frac{2}{\pi\sigma^2}}\, e^{-x^2/2\sigma^2},\ x\ge0.
1579\end{eqnarray*}
1580So, the median absolute deviation from the median, $s$, is given by
1581\[
1582\int_{0}^{s} g(x) \diff x = \int_{s}^{\infty} g(x) \diff x.
1583\]
1584Now, $\int_{0}^{\infty}e^{-x^2/2\sigma^2} \diff x = \sqrt{\pi\sigma^2/2}$, and
1585so $\int_{s}^{\infty} e^{-x^2/2\sigma^2} \diff x =
1586\sqrt{\pi\sigma^2/2} - \int_{0}^{s} e^{-\frac{x^2}{2\sigma^2}} \diff x
1587$. Hence, to find $s$ we simply solve the following equation (setting $\sigma=1$ for
1588simplicity -- equivalent to stating $x$ and $s$ in units of $\sigma$):
1589\[
1590\int_{0}^{s}e^{-x^2/2} \diff x - \sqrt{\pi/8} = 0.
1591\]
1592This is hard to solve analytically (no nice analytic solution exists
1593for the finite integral that I'm aware of), but straightforward to
1594solve numerically, yielding the value of $s=0.6744888$. Thus, to
1595estimate $\sigma$ for a Normally distributed data set, one can calculate
1596$s$, then divide by 0.6744888 (or multiply by 1.4826042) to obtain the
1597correct estimator.
1598
1599Note that this is different to solutions quoted elsewhere,
1600specifically in \citet{meyer04:trunc}, where the same robust estimator
1601is used but with an incorrect conversion to standard deviation -- they
1602assume $\sigma = s\sqrt{\pi/2}$. This, in fact, is the conversion used
1603to convert the \emph{mean} absolute deviation from the mean to the
1604standard deviation. This means that the cube noise in the \hipass\
1605catalogue (their parameter Rms$_{\rm cube}$) should be 18\% larger
1606than quoted.
1607
1608\section{How Gaussian noise changes with wavelet scale.}
1609\label{app-scaling}
1610
1611The key element in the wavelet reconstruction of an array is the
1612thresholding of the individual wavelet coefficient arrays. This is
1613usually done by choosing a level to be some number of standard
1614deviations above the mean value.
1615
1616However, since the wavelet arrays are produced by convolving the input
1617array by an increasingly large filter, the pixels in the coefficient
1618arrays become increasingly correlated as the scale of the filter
1619increases. This results in the measured standard deviation from a
1620given coefficient array decreasing with increasing scale. To calculate
1621this, we need to take into account how many other pixels each pixel in
1622the convolved array depends on.
1623
1624To demonstrate, suppose we have a 1-D array with $N$ pixel values
1625given by $F_i,\ i=1,...,N$, and we convolve it with the B$_3$-spline
1626filter, defined by the set of coefficients
1627$\{1/16,1/4,3/8,1/4,1/16\}$. The flux of the $i$th pixel in the
1628convolved array will be
1629\[
1630F'_i = \frac{1}{16}F_{i-2} + \frac{1}{4}F_{i-1} + \frac{3}{8}F_{i}
1631+ \frac{1}{4}F_{i+1} + \frac{1}{16}F_{i+2}
1632\]
1633and the flux of the corresponding pixel in the wavelet array will be
1634\[
1635W'_i = F_i - F'_i = \frac{-1}{16}F_{i-2} - \frac{1}{4}F_{i-1} + \frac{5}{8}F_{i}
1636- \frac{1}{4}F_{i+1} - \frac{1}{16}F_{i+2}
1637\]
1638Now, assuming each pixel has the same standard deviation
1639$\sigma_i=\sigma$, we can work out the standard deviation for the
1640wavelet array:
1641\[
1642\sigma'_i = \sigma \sqrt{\left(\frac{1}{16}\right)^2 + \left(\frac{1}{4}\right)^2
1643  + \left(\frac{5}{8}\right)^2 + \left(\frac{1}{4}\right)^2 + \left(\frac{1}{16}\right)^2}
1644          = 0.72349\ \sigma
1645\]
1646Thus, the first scale wavelet coefficient array will have a standard
1647deviation of 72.3\% of the input array. This procedure can be followed
1648to calculate the necessary values for all scales, dimensions and
1649filters used by \duchamp.
1650
1651Calculating these values is clearly a critical step in performing the
1652reconstruction. \citet{starck02:book} did so by simulating data sets
1653with Gaussian noise, taking the wavelet transform, and measuring the
1654value of $\sigma$ for each scale. We take a different approach, by
1655calculating the scaling factors directly from the filter coefficients
1656by taking the wavelet transform of an array made up of a 1 in the
1657central pixel and 0s everywhere else. The scaling value is then
1658derived by taking the square root of the sum (in quadrature) of all
1659the wavelet coefficient values at each scale. We give the scaling
1660factors for the three filters available to \duchamp\ on the following
1661page. These values are hard-coded into \duchamp, so no on-the-fly
1662calculation of them is necessary.
1663
1664Memory limitations prevent us from calculating factors for large
1665scales, particularly for the three-dimensional case (hence the --
1666symbols in the tables). To calculate factors for
1667higher scales than those available, we note the following
1668relationships apply for large scales to a sufficient level of precision:
1669\begin{itemize}
1670\item 1-D: factor(scale $i$) = factor(scale $i-1$)$/\sqrt{2}$.
1671\item 2-D: factor(scale $i$) = factor(scale $i-1$)$/2$.
1672\item 1-D: factor(scale $i$) = factor(scale $i-1$)$/\sqrt{8}$.
1673\end{itemize}
1674
1675\newpage
1676\begin{itemize}
1677\item \textbf{B$_3$-Spline Function:} $\{1/16,1/4,3/8,1/4,1/16\}$
1678
1679\begin{tabular}{llll}
1680Scale & 1 dimension      & 2 dimension     & 3 dimension\\ \hline
16811     & 0.723489806      & 0.890796310     & 0.956543592\\
16822     & 0.285450405      & 0.200663851     & 0.120336499\\
16833     & 0.177947535      & 0.0855075048    & 0.0349500154\\
16844     & 0.122223156      & 0.0412474444    & 0.0118164242\\
16855     & 0.0858113122     & 0.0204249666    & 0.00413233507\\
16866     & 0.0605703043     & 0.0101897592    & 0.00145703714\\
16877     & 0.0428107206     & 0.00509204670   & 0.000514791120\\
16888     & 0.0302684024     & 0.00254566946   & --\\
16899     & 0.0214024008     & 0.00127279050   & --\\
169010    & 0.0151336781     & 0.000636389722  & --\\
169111    & 0.0107011079     & 0.000318194170  & --\\
169212    & 0.00756682272    & --              & --\\
169313    & 0.00535055108    & --              & --\\
1694%14    & 0.00378341085   & --              & --\\
1695%15    & 0.00267527545   & --              & --\\
1696%16    & 0.00189170541   & --              & --\\
1697%17    & 0.00133763772   & --              & --\\
1698%18    & 0.000945852704   & --             & --
1699\end{tabular}
1700
1701\item \textbf{Triangle Function:} $\{1/4,1/2,1/4\}$
1702
1703\begin{tabular}{llll}
1704Scale & 1 dimension      & 2 dimension     & 3 dimension\\ \hline
17051     & 0.612372436      & 0.800390530     & 0.895954449  \\
17062     & 0.330718914      & 0.272878894     & 0.192033014\\
17073     & 0.211947812      & 0.119779282     & 0.0576484078\\
17084     & 0.145740298      & 0.0577664785    & 0.0194912393\\
17095     & 0.102310944      & 0.0286163283    & 0.00681278387\\
17106     & 0.0722128185     & 0.0142747506    & 0.00240175885\\
17117     & 0.0510388224     & 0.00713319703   & 0.000848538128 \\
17128     & 0.0360857673     & 0.00356607618   & 0.000299949455 \\
17139     & 0.0255157615     & 0.00178297280   & -- \\
171410    & 0.0180422389     & 0.000891478237  & --  \\
171511    & 0.0127577667     & 0.000445738098  & --  \\
171612    & 0.00902109930    & 0.000222868922  & --  \\
171713    & 0.00637887978    & --              & -- \\
1718%14   & 0.00451054902    & --              & -- \\
1719%15   & 0.00318942978    & --              & -- \\
1720%16   & 0.00225527449    & --              & -- \\
1721%17   & 0.00159471988    & --              & -- \\
1722%18   & 0.000112763724   & --              & --
1723
1724\end{tabular}
1725
1726\item \textbf{Haar Wavelet:} $\{0,1/2,1/2\}$
1727
1728\begin{tabular}{llll}
1729Scale & 1 dimension      & 2 dimension     & 3 dimension\\ \hline
17301     & 0.707167810      & 0.433012702     & 0.935414347 \\
17312     & 0.500000000      & 0.216506351     & 0.330718914\\
17323     & 0.353553391      & 0.108253175     & 0.116926793\\
17334     & 0.250000000      & 0.0541265877    & 0.0413398642\\
17345     & 0.176776695      & 0.0270632939    & 0.0146158492\\
17356     & 0.125000000      & 0.0135316469    & 0.00516748303
1736
1737\end{tabular}
1738
1739
1740\end{itemize}
1741
1742\end{document}
Note: See TracBrowser for help on using the repository browser.