Changeset 2713


Ignore:
Timestamp:
12/28/12 15:52:45 (11 years ago)
Author:
WataruKawasaki
Message:

New Development: Yes

JIRA Issue: Yes CAS-3618

Ready for Test: Yes

Interface Changes: No

What Interface Changed:

Test Programs:

Put in Release Notes: No

Module(s): sd

Description: new method for scantable to calculate AIC, AICc, BIC and GCV.


Location:
trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/python/scantable.py

    r2687 r2713  
    25392539            raise RuntimeError(msg)
    25402540
     2541    @asaplog_post_dec
     2542    def calc_aic(self, value=None, blfunc=None, order=None, mask=None,
     2543                 whichrow=None, uselinefinder=None, edge=None,
     2544                 threshold=None, chan_avg_limit=None):
     2545        """\
     2546        Calculates and returns model selection criteria for a specified
     2547        baseline model and a given spectrum data.
     2548        Available values include Akaike Information Criterion (AIC), the
     2549        corrected Akaike Information Criterion (AICc) by Sugiura(1978),
     2550        Bayesian Information Criterion (BIC) and the Generalised Cross
     2551        Validation (GCV).
     2552
     2553        Parameters:
     2554            value:         name of model selection criteria to calculate.
     2555                           available ones include 'aic', 'aicc', 'bic' and
     2556                           'gcv'. default is 'aicc'.
     2557            blfunc:        baseline function name. available ones include
     2558                           'chebyshev', 'cspline' and 'sinusoid'.
     2559                           default is 'chebyshev'.
     2560            order:         parameter for basline function. actually stands for
     2561                           order of polynomial (order) for 'chebyshev',
     2562                           number of spline pieces (npiece) for 'cspline' and
     2563                           maximum wave number for 'sinusoid', respectively.
     2564                           default is 5 (which is also the default order value
     2565                           for [auto_]chebyshev_baseline()).
     2566            mask:          an optional mask. default is [].
     2567            whichrow:      row number. default is 0 (the first row)
     2568            uselinefinder: use sd.linefinder() to flag out line regions
     2569                           default is True.
     2570            edge:           an optional number of channel to drop at
     2571                            the edge of spectrum. If only one value is
     2572                            specified, the same number will be dropped
     2573                            from both sides of the spectrum. Default
     2574                            is to keep all channels. Nested tuples
     2575                            represent individual edge selection for
     2576                            different IFs (a number of spectral channels
     2577                            can be different)
     2578                            default is (0, 0).
     2579            threshold:      the threshold used by line finder. It is
     2580                            better to keep it large as only strong lines
     2581                            affect the baseline solution.
     2582                            default is 3.
     2583            chan_avg_limit: a maximum number of consequtive spectral
     2584                            channels to average during the search of
     2585                            weak and broad lines. The default is no
     2586                            averaging (and no search for weak lines).
     2587                            If such lines can affect the fitted baseline
     2588                            (e.g. a high order polynomial is fitted),
     2589                            increase this parameter (usually values up
     2590                            to 8 are reasonable). Most users of this
     2591                            method should find the default value sufficient.
     2592                            default is 1.
     2593
     2594        Example:
     2595            aic = scan.calc_aic(blfunc='chebyshev', order=5, whichrow=0)
     2596        """
     2597
     2598        try:
     2599            varlist = vars()
     2600
     2601            if value          is None: value          = 'aicc'
     2602            if blfunc         is None: blfunc         = 'chebyshev'
     2603            if order          is None: order          = 5
     2604            if mask           is None: mask           = []
     2605            if whichrow       is None: whichrow       = 0
     2606            if uselinefinder  is None: uselinefinder  = True
     2607            if edge           is None: edge           = (0, 0)
     2608            if threshold      is None: threshold      = 3
     2609            if chan_avg_limit is None: chan_avg_limit = 1
     2610
     2611            return self._calc_aic(value, blfunc, order, mask,
     2612                                  whichrow, uselinefinder, edge,
     2613                                  threshold, chan_avg_limit)
     2614           
     2615        except RuntimeError, e:
     2616            raise_fitting_failure_exception(e)
    25412617
    25422618    @asaplog_post_dec
  • trunk/src/Scantable.cpp

    r2683 r2713  
    26562656}
    26572657
     2658double Scantable::calculateModelSelectionCriteria(const std::string& valname, const std::string& blfunc, int order, const std::vector<bool>& inMask, int whichrow, bool useLineFinder, const std::vector<int>& edge, float threshold, int chanAvgLimit)
     2659{
     2660  if (useLineFinder) {
     2661    int minEdgeSize = getIFNos().size()*2;
     2662
     2663
     2664    int edgeSize = edge.size();
     2665    std::vector<int> currentEdge;
     2666    if (edgeSize >= 2) {
     2667      int idx = 0;
     2668      if (edgeSize > 2) {
     2669        if (edgeSize < minEdgeSize) {
     2670          throw(AipsError("Length of edge element info is less than that of IFs"));
     2671        }
     2672        idx = 2 * getIF(whichrow);
     2673      }
     2674      currentEdge.push_back(edge[idx]);
     2675      currentEdge.push_back(edge[idx+1]);
     2676    } else {
     2677      throw(AipsError("Wrong length of edge element"));
     2678    }
     2679
     2680    STLineFinder lineFinder = STLineFinder();
     2681    std::vector<float> sp = getSpectrum(whichrow);
     2682    lineFinder.setData(sp);
     2683    lineFinder.setOptions(threshold, 3, chanAvgLimit);
     2684    lineFinder.findLines(getCompositeChanMask(whichrow, inMask), currentEdge, whichrow);
     2685    std::vector<bool> chanMask = lineFinder.getMask();
     2686   
     2687    return doCalculateModelSelectionCriteria(valname, sp, chanMask, blfunc, order);
     2688
     2689  } else {
     2690    return doCalculateModelSelectionCriteria(valname, getSpectrum(whichrow), getCompositeChanMask(whichrow, inMask), blfunc, order);
     2691  }
     2692
     2693}
     2694
     2695double Scantable::doCalculateModelSelectionCriteria(const std::string& valname, const std::vector<float>& spec, const std::vector<bool>& mask, const std::string& blfunc, int order)
     2696{
     2697  int nparam;
     2698  std::vector<float> params;
     2699  int nClipped = 0;
     2700  float thresClip = 0.0;
     2701  int nIterClip = 0;
     2702  std::vector<float> res;
     2703  if (blfunc == "chebyshev") {
     2704    nparam = order + 1;
     2705    res = doChebyshevFitting(spec, mask, order, params, nClipped, thresClip, nIterClip, true);
     2706  } else if (blfunc == "sinusoid") {
     2707    std::vector<int> nWaves;
     2708    nWaves.clear();
     2709    for (int i = 0; i <= order; ++i) {
     2710      nWaves.push_back(i);
     2711    }
     2712    nparam = 2*order + 1;  // order = nwave
     2713    res = doSinusoidFitting(spec, mask, nWaves, params, nClipped, thresClip, nIterClip, true);
     2714  } else if (blfunc == "cspline") {
     2715    std::vector<int> pieceEdges(order+1);  //order = npiece
     2716    nparam = order + 3;
     2717    params.resize(4*order);
     2718    res = doCubicSplineFitting(spec, mask, order, pieceEdges, params, nClipped, thresClip, nIterClip, true);
     2719  } else {
     2720    throw(AipsError("blfunc must be chebyshev, cspline or sinusoid."));
     2721  }
     2722
     2723  double msq = 0.0;
     2724  int nusedchan = 0;
     2725  int nChan = res.size();
     2726  for (int i = 0; i < nChan; ++i) {
     2727    if (mask[i]) {
     2728      msq += (double)res[i]*(double)res[i];
     2729      nusedchan++;
     2730    }
     2731  }
     2732  if (nusedchan == 0) {
     2733    throw(AipsError("all channels masked."));
     2734  }
     2735  msq /= (double)nusedchan;
     2736
     2737  nparam++;  //add 1 for sigma of Gaussian distribution
     2738  const double PI = 6.0 * asin(0.5); // PI (= 3.141592653...)
     2739
     2740  if (valname.find("aic") == 0) {
     2741    // Original Akaike Information Criterion (AIC)
     2742    double aic = nusedchan * (log(2.0 * PI * msq) + 1.0) + 2.0 * nparam;
     2743
     2744    // Corrected AIC by Sugiura(1978)
     2745    if (valname == "aicc") {
     2746      if (nusedchan - nparam - 1 <= 0) {
     2747        throw(AipsError("channel size is too small to calculate AICc."));
     2748      }
     2749      aic += 2.0*nparam*(nparam + 1)/(double)(nusedchan - nparam - 1);
     2750    }
     2751
     2752    return aic;
     2753
     2754  } else if (valname == "bic") {
     2755    // Bayesian Information Criterion (BIC)
     2756    double bic = nusedchan * log(msq) + nparam * log((double)nusedchan);
     2757    return bic;
     2758
     2759  } else if (valname == "gcv") {
     2760    // Generalised Cross Validation
     2761    double x = 1.0 - (double)nparam / (double)nusedchan;
     2762    double gcv = msq / (x * x);
     2763    return gcv;
     2764
     2765  } else {
     2766    throw(AipsError("valname must be aic, aicc, bic or gcv."));
     2767  }
     2768}
     2769
    26582770void Scantable::autoChebyshevBaseline(const std::vector<bool>& mask, int order, float thresClip, int nIterClip, const std::vector<int>& edge, float threshold, int chanAvgLimit, bool getResidual, const std::string& progressInfo, const bool outLogger, const std::string& blfile)
    26592771{
     
    27512863  if ((x < -1.0)||(x > 1.0)) {
    27522864    throw(AipsError("out of definition range (-1 <= x <= 1)."));
     2865  } else if (x == 1.0) {
     2866    return 1.0;
     2867  } else if (x == 0.0) {
     2868    double res;
     2869    if (n%2 == 0) {
     2870      if (n%4 == 0) {
     2871        res = 1.0;
     2872      } else {
     2873        res = -1.0;
     2874      }
     2875    } else {
     2876      res = 0.0;
     2877    }
     2878    return res;
     2879  } else if (x == -1.0) {
     2880    double res = (n%2 == 0 ? 1.0 : -1.0);
     2881    return res;
    27532882  } else if (n < 0) {
    27542883    throw(AipsError("the order must be zero or positive."));
     
    27662895        }
    27672896      }
    2768       res += (m%2 == 0 ? 1.0 : -1.0)*(double)n/(double)(n-m)*pow(2.0*x, (double)(n-2*m-1))/2.0*c;
     2897      res += (m%2 == 0 ? 1.0 : -1.0)*(double)n/(double)(n-m)*pow(2.0*x, (double)(n-2*m))/2.0*c;
    27692898    }
    27702899    return res;
  • trunk/src/Scantable.h

    r2667 r2713  
    619619  std::vector<uint> getMoleculeIdColumnData() const;
    620620  void setMoleculeIdColumnData(const std::vector<uint>& molids);
     621  double calculateModelSelectionCriteria(const std::string& valname,
     622                                         const std::string& blfunc,
     623                                         int order,
     624                                         const std::vector<bool>& inMask,
     625                                         int whichrow,
     626                                         bool useLineFinder,
     627                                         const std::vector<int>& edge,
     628                                         float threshold,
     629                                         int chanAvgLimit);
    621630
    622631
     
    804813  void applyChanFlag( casa::uInt whichrow, const std::vector<bool>& msk, casa::uChar flagval);
    805814
     815  double doCalculateModelSelectionCriteria(const std::string& valname,
     816                                           const std::vector<float>& spec,
     817                                           const std::vector<bool>& mask,
     818                                           const std::string& blfunc,
     819                                           int order);
     820
    806821};
    807822
  • trunk/src/ScantableWrapper.h

    r2667 r2713  
    321321  void setMoleculeIdColumnData(const std::vector<uint>& molids)
    322322  { table_->setMoleculeIdColumnData(molids); }
    323 
     323  double calculateModelSelectionCriteria(const std::string& valname, const std::string& blfunc, int order, const std::vector<bool>& inMask, int whichrow, bool useLineFinder, const std::vector<int>& edge, float threshold, int chanAvgLimit)
     324  { return table_->calculateModelSelectionCriteria(valname, blfunc, order, inMask, whichrow, useLineFinder, edge, threshold, chanAvgLimit); }
    324325
    325326private:
  • trunk/src/python_Scantable.cpp

    r2667 r2713  
    163163    .def("_getmolidcol_list", &ScantableWrapper::getMoleculeIdColumnData)
    164164    .def("_setmolidcol_list", &ScantableWrapper::setMoleculeIdColumnData)
     165    .def("_calc_aic", &ScantableWrapper::calculateModelSelectionCriteria)
    165166  ;
    166167};
Note: See TracChangeset for help on using the changeset viewer.