#!/usr/bin/python
# program to plot observation time versus correlator time from DiFX logs.

import sys, os, re, optparse, datetime, time
import scipy
from scipy import interpolate
from math import *
import matplotlib
# Force matplotlib to not use any Xwindows backend if X display not available.
# This doesn't work if there was an X forward but ssh is no longer listening -
# can happen under screen. Deal with that later if initiating the plot fails.
if not 'DISPLAY' in os.environ.keys():
    sys.stderr.write('Warning: no display available\n')
    matplotlib.use('Agg')
from matplotlib import pyplot
import numpy


def corrtime2secs(obstime_match):
    corrtime = obstime_match.groups()[0]
    corr_fracsec = int(obstime_match.groups()[1]) * 1e-3
    corrdatetime = datetime.datetime.strptime(corrtime, '%Y-%m-%d %H:%M:%S')
    corr_secs = time.mktime(corrdatetime.timetuple()) + corr_fracsec
    return corr_secs


usage = '''%prog [options] <difxlog> 
extracts the observation and correlation times from the <difxlog> file and plots the correlation speedup factor'''

parser = optparse.OptionParser(usage=usage, version='%prog ' + '1.0')
parser.add_option( "--output", "-o",
        type='str', dest="outfile", default=False,
        help='print to file' )
parser.add_option( "--time", "-t",
        action="store_true", dest="plottime", default=False,
        help='Plot elapsed correlation time instead of speedup factor' )
parser.add_option( "--averaging_time", "-a",
        type=float, dest="avg", default=300,
        help='Average the data for AVG seconds (300)' )
parser.add_option( "--speedup_poly", "-p",
        type=int, dest="poly_order", default=False,
        help='Fit time data with poly of order p, before differentiating to find speedup factor' )
parser.add_option( "--removegap", "-r",
        action="store_true", dest="removegap", default=False,
        help='Remove gaps in the observation time' )
parser.add_option( "--verbose", "-v",
        action="store_true", dest="verbose", default=False,
        help='Increase verbosity' )
parser.add_option( "--labelfile", "-l",
        action="store_true", dest="labelfile", default=False,
        help='Label plot with file start positions' )
parser.add_option( "--grep", "-g",
        type=str, dest="grep", default=False,
        help='Extract only lines containing GREP pattern' )

(options, args) = parser.parse_args()
if len(args) < 1:
    parser.print_help()
    parser.error("give a DiFX log file")

filenames = args

title = 'Correlator Speedup ' #+ str(filenames)
if options.removegap:
    title += ' (no gaps)'

try:
    pyplot.title(title)
except:
    # if setting the title fails, it probably means the default backend has
    # failed because the X session is no longer attached. So switch to a
    # backend that doesn't care (no interactive plots allowed).
    matplotlib.rcParams['backend'] = 'Agg'
    pyplot.switch_backend('Agg')
    #print matplotlib.rcParams['backend']
    sys.stderr.write('Warning: no display available - switching to non-interactive backend\n')
    pyplot.title(title)

header = " ".join(filenames)
maxnames = 8
if len(filenames) > maxnames:
    header = ", ".join(filenames[0:maxnames-1]) + ' ... ' + str(filenames[-1])
    header += ' (' + str(len(filenames)) + ' files)'
pyplot.suptitle(header, fontsize='x-small')

if options.plottime:
    pyplot.xlabel('Correlation time/hours')
    pyplot.ylabel('Observation time/hours')
else:
    pyplot.xlabel('Observation time/hours')
    pyplot.ylabel('Speedup factor')

# remove overall scaling from axis values
no_offset = matplotlib.ticker.ScalarFormatter(useOffset=False)
pyplot.gca().xaxis.set_major_formatter(no_offset)
pyplot.gca().yaxis.set_major_formatter(no_offset)

correlation = numpy.array([0])
observation = numpy.array([0])
new_files = []
for filename in args:
    thisdatafile = open(filename).readlines()

    this_xdata = []
    this_ydata = []

    nlines = 0
    for line in thisdatafile:
        nlines += 1
        line = line.strip()

        # get the time of the first line in the log file
        if nlines == 1:
            starttime_match = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),(\d{3})', line)
            starttime = corrtime2secs(starttime_match)
            this_xdata.append(starttime)
            # give this an observation time of -1 (first vis. is at time 0)
            this_ydata.append(-1)
            continue

        # match the correlator time and observation time in the log file
        #obstime_match = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),(\d{3}).*The approximate mjd/seconds is (.*)', line)
        obstime_match = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),(\d{3}).*to write out time (.*)', line)
        if not obstime_match:
            continue
        if (options.grep != False) and not re.search(options.grep, line):
            continue

        # convert correlator and observation time strings to seconds
        corr_secs = corrtime2secs(obstime_match)
        #obstime_day, obstime_secs = obstime_match.groups()[2].split('/')
        #obs_secs = int(obstime_day)*(24*60*60.) + int(obstime_secs)
        obs_secs = float(obstime_match.groups()[2])

        # this_* are the data extracted for the current log file only
        this_xdata.append(corr_secs)
        this_ydata.append(obs_secs)

    this_xdata = numpy.array(this_xdata)
    this_ydata = numpy.array(this_ydata)

    # find the integration time - typical spacing between observation times in the log
    int_time = numpy.median(numpy.diff(this_ydata))
    # nskip is fraction of the data points we will keep
    if int_time < options.avg:
        nskip = int(options.avg//int_time)
    else:
        nskip = 1
        sys.stderr.write(filename + "Warning: averaging time less than correlator integration time, resetting AVG to: " + str(int_time) + '\n')

    # make sure averaging time is not too long
    #nskip = int(min(nskip, len(this_ydata//2) ))
    minpoints = 3
    if nskip > len(this_ydata)//minpoints:
        nskip = max(len(this_ydata)//minpoints, 1)
        print filename, ": averaging time too long, reducing to", int_time*nskip

    # remove gaps in the observation if requested
    if options.removegap:
        orig_length = this_ydata[-1] - this_ydata[0]
        this_ydata = numpy.arange(0, len(this_ydata)*int_time, int_time)
        print filename, ': removed', orig_length - this_ydata[-1], 'secs'

    # move array to origin (start time=int_time for first file and
    # continues to next file without a gap)
    offset_y = observation[-1] + int_time
    offset_x = correlation[-1] + int_time
    this_ydata = this_ydata - this_ydata[0] + offset_y
    this_xdata = this_xdata - this_xdata[0] + offset_x

    if options.verbose:
        print filename, 't_int:', int_time, 'n_int:', len(this_ydata), 'start:', this_ydata[0]

    # only keep a fraction of the points (this also effectively smooths)
    this_ydata = [this_ydata[i] for i in range(nskip, len(this_ydata), nskip)]
    this_xdata = [this_xdata[i] for i in range(nskip, len(this_xdata), nskip)]

    # concatenate this file's data to the master arrays
    observation = numpy.concatenate((observation, this_ydata))
    correlation = numpy.concatenate((correlation, this_xdata))

    if options.labelfile:
        new_files.append((this_xdata[0]/3600., this_ydata[0]/3600.))


# convert elapsed times from seconds to hours
observation /= 3600.
correlation /= 3600.

print 'Observation time:', (observation[-1] - observation[0]), 'hours'
print 'Correlation time:', (correlation[-1] - correlation[0]), 'hours'
print 'Speedup factor  :', observation[-1]/correlation[-1]

speedup = []
if options.plottime:
    # simply plot correlation time against observation time
    pyplot.plot(correlation, observation, '.')
else:
    # plot the cumulative speedup and instantaneous speedup against
    # observation time
    speedup = observation/correlation
    label1 = 'Integrated speedup {speedup:0.3f}'.format(speedup=speedup[-1])
    pyplot.plot(observation, speedup, label=label1)
    speedup = numpy.diff(observation)/numpy.diff(correlation)
    xdata_diff = observation[1:]
    pyplot.plot(xdata_diff, speedup, label='Instantaneous speedup')
    if options.poly_order:
        # slightly silly option to fit the data with a polynomial before
        # calculating speedup
        poly_fit = numpy.polyfit(correlation, observation, options.poly_order)
        ydata_poly = numpy.polyval(poly_fit, correlation)
        ydata_poly = numpy.diff(ydata_poly)/numpy.diff(correlation)
        pyplot.plot(xdata_diff, ydata_poly, label='Smoothed Instantaneous speedup')

#corrmax = correlation[-1]
#obsmax = observation[-1]

# Plot a line indicating where real-time correlation would be.
xmin = False
xmax = False
ymin = False
ymax = False
if options.plottime:
    xmin = correlation[0]
    xmax = correlation[-1]
    ymin = observation[0]
    ymax = observation[-1]
    pyplot.plot([0, ymax], [0, ymax], label='Real time')
else:
    xmin = observation[0]
    xmax = observation[-1]
    ymin = min(min(speedup), 1)
    ymax = max(max(speedup), 1)
    pyplot.plot([0, xmax], [1, 1], label='Real time')

deltax = xmax - xmin
deltay = ymax - ymin


# annotate plot with the start times of the different files if requested
if options.labelfile:
    textside = -1
    ifile = -1
    for new_file in new_files:
        textside *= -1
        ifile += 1
        pointpos = []
        if options.plottime:
            pointpos = new_file
        else:
            speedup = new_file[1]/new_file[0]
            pointpos = (new_file[1], speedup)
        ytextoffset = (len(filenames)-ifile)*0.45*deltay*textside/(len(filenames))
        ytextpos = 0.5*(ymin+ymax) + ytextoffset
        if abs(ytextpos - pointpos[1]) < deltay*0.1:
            ytextpos = 0.5*(ymin+ymax) + ytextoffset*-1
        textpos = (pointpos[0], ytextpos)
        #textpos = (pointpos[0], 0.5*(ymin+ymax) + 0.45*deltay*textside)
        pyplot.annotate(filenames[ifile], pointpos, textpos,
                arrowprops=dict(arrowstyle="->"), fontsize='x-small')

pyplot.legend(loc='best', prop={'size':'small'})

#pyplot.figtext(0,0, str(filenames), transform=pyplot.gca().transAxes)

if options.outfile:
    pyplot.savefig(options.outfile)
else:
    pyplot.show()
pyplot.close()


