source: trunk/htdocs/perl/find_archives.pl @ 220

Last change on this file since 220 was 220, checked in by MatthewWhiting, 16 years ago

Some minor modifications to the perl script, and even more minor ones to the s70.x program (from testing).

  • Property svn:executable set to *
File size: 30.3 KB
Line 
1#! /usr/bin/perl -w
2
3# find_archives.pl
4# Author: Matthew Whiting
5# Date: January 2008
6# ATNF, CSIRO
7
8# A modified version of Albert's find_archives.pl that hopefully works
9# a bit more consistently for the different data types.
10
11# Crawls through data archive directories (given as an input parameter),
12# searches for all observation and/or calibration files, enquires about
13# their pulsar parameters and inputs these details as well as their
14# location into a mySQL database
15
16use strict;
17#use lib "/DATA/SITAR_1/whi550/VO/PulsarVO/SQL-Abstract-1.22/blib/lib";
18use File::Find ();
19use DBI qw(:sql_types);
20use SQL::Abstract;
21use POSIX;
22
23#  foreach (@{ $DBI::EXPORT_TAGS{sql_types} }) {
24#      printf "%s=%d\n", $_, &{"DBI::$_"};
25#  }
26
27# for the convenience of &wanted calls, including -eval statements:
28use vars qw/*name *dir *prune @archive_dirs $archive_dir @files
29    $vap_cmd $vap_out $params $num_vap_params $db_handle
30    $filename $raj $decj $dm $period $bw $cnfg
31    $freq $inst $mjdint $mjdfrac $npol $nchan $nbin $nsub
32    $rcvr $site $length $obsrvr $data_loc $data_type
33    $file_size $MJD $raH $raM $raS $decD $decM $decS $rajd $decjd $decstring
34    $pi $pion180 $NGP_RA $NGP_DEC $ASC_NODE $draR $decR
35    $sinb $sinl $cosl $gb $gl $gl_raw
36    $bmaj $bmin $bpa $hdrver $survey $nbeam
37    $output $vap $archive_extensions $ext       
38    $default
39    @result_params $FILE_SIZE_INDEX $FB $DFB $WBC $CPSR2 $NULLFIELD
40    $params_NAME   
41    $params_PROJID 
42    $params_RAJ     
43    $params_DECJ   
44    $params_FREQ   
45    $params_BW     
46    $params_LENGTH 
47    $params_DATE   
48    $params_TIME   
49    $params_MJD
50    $params_BMAJ   
51    $params_BMIN   
52    $params_BPA     
53    $params_DM     
54    $params_PERIOD 
55    $params_NCHAN   
56    $params_NPOL   
57    $params_NBIN   
58    $params_NSUB   
59    $params_NBITS   
60    $params_TSAMP   
61    $params_NBEAM
62    $params_CNFG   
63    $params_INST   
64    $params_RCVR   
65    $params_HDRVER
66    $params_TELESCOP
67    $params_SITE   
68    $params_OBSRVR 
69    $params_FILENAME
70    /;  # $machine
71                                               
72*name   = *File::Find::name;
73*dir    = *File::Find::dir;
74*prune  = *File::Find::prune;
75
76# The data types
77$FB = "FB";
78$WBC = "WBC";
79$DFB = "DFB";
80$CPSR2 = "CPSR2";
81
82# filename extensions
83$archive_extensions = "rf cf cfb fb";
84
85# How to write NULL fields to the database
86#$NULLFIELD = undef;
87$NULLFIELD = "NULL";
88
89# Pulsar parameters (listed in "vap -H")
90$params  = "name projid ra dec freq bw length stt_date stt_time mjd bmaj bmin bpa dm period nchan npol nbin nsub nbits tsamp nbeam beconfig backend rcvr hdrver telescop asite observer";
91
92# The location of vap
93$vap = "/pulsar/psr/linux/bin/vap -n";
94
95# vap result parameter indices. Used to locate the resulting
96# parameter's location
97# Note that we start at 1, since the vap output has the filename in the first position
98$params_NAME    =  1;
99$params_PROJID  =  2;
100$params_RAJ     =  3;
101$params_DECJ    =  4;
102$params_FREQ    =  5;
103$params_BW      =  6;
104$params_LENGTH  =  7;
105$params_DATE    =  8;
106$params_TIME    =  9;
107$params_MJD     = 10;
108$params_BMAJ    = 11;
109$params_BMIN    = 12;
110$params_BPA     = 13;
111$params_DM      = 14;
112$params_PERIOD  = 15;
113$params_NCHAN   = 16;
114$params_NPOL    = 17;
115$params_NBIN    = 18;
116$params_NSUB    = 19;
117$params_NBITS   = 20;
118$params_TSAMP   = 21;
119$params_NBEAM   = 22;
120$params_CNFG    = 23;
121$params_INST    = 24;
122$params_RCVR    = 25;
123$params_HDRVER  = 26;
124$params_TELESCOP= 27;
125$params_SITE    = 28;
126$params_OBSRVR  = 29;
127$params_FILENAME= 30;
128
129
130#####################
131# Begin main method
132#
133my @vap_params = split(/ /, $params);
134$num_vap_params = scalar(@vap_params);
135
136# The column number (starting from 0) of the file
137# size when calling ls -l
138$FILE_SIZE_INDEX = 4;
139
140# Use the default operation which is to firstly delete any
141# stale records, then crawl through and insert new records
142$default = 1;
143
144if (scalar(@ARGV) == 0) {
145        print
146"find_archives.pl: A crawler that populates a relational database\
147                with indexes to observational data along with their respective\
148                cal files.\n\
149 Usage: find_archives.pl [options] directory1 [directory2 ...]\
150        If no options are provided, it will default to do both -d and -p\
151 Options:\
152 \t-d Delete stale records\
153 \t-p Populate database with new records\n";
154       
155}
156
157# Connect to the database
158$db_handle = connectdb();
159
160#########################################################
161# Step 1. First remove any stale entries in the database
162#         (remove file locations that no longer exist)
163#########################################################
164
165if ($ARGV[0] eq "-d") {
166        deleteStaleRecords();
167        $default = 0;
168        shift @ARGV;
169}
170
171
172#########################################################
173# Step 2. Crawl through each input directory and insert
174#         any new archives
175
176if ($ARGV[0] eq "-p") {
177        shift @ARGV;
178        @archive_dirs = @ARGV;
179
180        foreach $archive_dir (@archive_dirs) {
181
182                # Traverse desired filesystems
183                File::Find::find({wanted => \&wanted}, $archive_dir);
184        }
185        $default = 0;
186}
187
188# Else do both
189if ($default) {
190
191        deleteStaleRecords();
192
193        @archive_dirs = @ARGV;
194
195        foreach $archive_dir (@archive_dirs) {
196               
197                # Check that the input directory is legit
198               
199                # Traverse desired filesystems
200                File::Find::find({wanted => \&wanted}, $archive_dir);
201        }
202
203}
204#if ($db_handle->disconnect()) {
205#       print "Successfully Disconnected from database\n";
206#}
207#else {
208#       print "Error: Failed to disconnect from the database\n";
209#       exit;
210#}
211
212######################################################################
213
214######################################################################
215# Subroutine definitions
216#
217######################################################################
218
219#######################################################
220
221sub wanted {
222 
223       
224        if (-d and /^[a-zA-Z]{0,1}[0-9]{4}[+-][0-9]{4}.*/s ) {
225
226            if ($File::Find::name !~ /.*S$/) { # ignore scrunch directories
227
228                print "\n\nIn directory $_\n\n";
229                print "\$File::Find::name = $File::Find::name\n";
230               
231                my $search_str;
232                foreach $ext (split (/\s+/, $archive_extensions) ) {
233                        $search_str .= $File::Find::name . "/*.$ext "
234                }
235
236                my @all_files = glob($search_str);
237               
238                if (scalar(@all_files) == 0) {
239                    # No data files means it's likely to be a CPSR2 directory
240
241                    # Foreach observation
242                    my @cpsr2_obs_dirs = glob($File::Find::name . "/????-??-??-??:??:??");
243                   
244                    my $cpsr2_obs_dir;
245                    foreach $cpsr2_obs_dir (@cpsr2_obs_dirs) {
246                        print "dir is $cpsr2_obs_dir\n";
247                       
248                        # Need to check if this entry already exists
249                        $cpsr2_obs_dir =~ s/\s//g;
250                       
251                        if (isDuplicate($cpsr2_obs_dir) == 1) {
252                            print "$cpsr2_obs_dir is a duplicate entry. Skipping....\n\n";
253                            next;
254                        }                               
255                       
256                        print "$cpsr2_obs_dir NOT a duplicate. Extracting data...\n";
257                        find_data_cpsr2($cpsr2_obs_dir);                       
258                       
259                    }
260                   
261                }
262                else{
263
264                    my $file;
265                    foreach $file (@all_files) {
266                       
267                        # Need to check if this entry already exists
268                        $file =~ s/\s//g;
269                       
270                        if (isDuplicate($file) == 1) {
271                            print "$file is a duplicate entry. Skipping....\n\n";
272                            next;
273                        }                               
274                       
275                        print "$file NOT a duplicate. Inserting into database here ...\n";
276                        find_data($file);
277                       
278                    }
279                   
280                }
281               
282            }
283           
284        }
285    }
286               
287
288
289#######################################################
290
291sub find_data {
292   
293# Usage: find_data(filename)
294# Runs vap on the filename, and extracts parameters, then sends them to populate_database() to be written to the database
295#
296# NOTE: this is for non-CPSR2 data
297
298    my $file = $_[0];
299
300    my $vap_cmd = "$vap -c \"$params\" $file  | grep -v filename | grep -v -x \"\" |";
301    print "Calling vap...$vap_cmd\n";
302    open(VAP,$vap_cmd);
303    while ($vap_out = <VAP>) {
304        $vap_out =~ s/^\s+//g; #remove any leading space
305        my @result_params = split(/\s+/, $vap_out);
306       
307        populate_database(@result_params,$file);
308#       if (scalar(@result_params) == ($num_vap_params + 1)) {    # +1 because first column is filename
309#           print "@result_params\n";
310#           populate_observations($file);
311#       }
312#       else {
313#           print "Incorrect number of cols = ".scalar(@result_params).". Expected " . $num_vap_params+1 ."\n";
314#           print "@result_params\n";
315#       }
316    }
317    print "\n";
318 
319}
320
321#######################################################
322
323sub find_data_cpsr2 {
324   
325# Usage: find_data_cpsr2(filename) where filename = ????-??-??-??:??:??
326# Extracts parameters for the observation using vap, then sends them to populate_database() to be written to the database
327#
328# NOTE: this is only for CPSR2 data
329
330    my $dir = $_[0];
331    my $ext;
332    my @result_params;
333    my @tmp_params;
334    my $prefix = "m n o";
335
336    my @all_files_fb = glob($dir."/*.fb");
337    my @all_files_cfb = glob($dir."/*.cfb");
338    if ( scalar(@all_files_fb)!=0 ){
339        $ext = ".fb";
340    }
341    else{
342        $ext = ".cfb";
343    }
344    print "Searching in $dir for $ext files\n";
345
346    my $pre;
347    foreach $pre (split (/\s+/, ($prefix))) {
348
349        my @all_files;
350        my $search_str = $dir."/".$pre."*".$ext;
351        print "$pre: Searching for files like: $search_str\n";
352        @all_files = glob($search_str);
353        print "Found ". scalar(@all_files) . "\n";
354
355        if(scalar(@all_files) != 0) {
356   
357            # Get most parameters from first file
358            my $vap_cmd = "$vap -n -c \"$params\" $all_files[0]  | grep -v filename | grep -v -x \"\" |";
359            open(VAP,$vap_cmd);
360            my $vap_out;
361            while ($vap_out = <VAP>) {
362                $vap_out =~ s/^\s+//g; #remove any leading space
363                @result_params = split(/\s+/, $vap_out);
364                print "RESULT_PARAMS:\n@result_params\n";
365            }
366           
367            my $total_length=0;
368            my $file;
369            foreach $file (@all_files) { #loop over all files in directory to get total obs time.
370                my @tmp_params;
371                $vap_cmd = "$vap -n -c \"$params\" $file  | grep -v filename | grep -v -x \"\" |";
372                open(VAP,$vap_cmd);
373                while ($vap_out = <VAP>) {
374                    $vap_out =~ s/^\s+//g; #remove any leading space
375                    @tmp_params = split(/\s+/, $vap_out);
376                }
377                if((scalar(@tmp_params)>0) && !($tmp_params[0] eq "failed")){
378                    $total_length += $tmp_params[$params_LENGTH];
379                }
380            }
381
382            print "total length = $total_length\n";
383            $result_params[$params_LENGTH] = $total_length;
384
385           
386
387           
388            populate_database(@result_params,$dir);
389
390        }   
391    }
392
393}
394
395#######################################################
396
397sub populate_database {
398   
399# Usage: populate_database(file, $result_params), where file has the full path to the file/directory in question,
400#   and $result_params have been filled with the vap results
401#
402#  This subroutine calculates all necessary parameters (such as data type, decimal positions, beam, file size, ...)
403#   and writes them to the database.
404
405    my @results = @_;
406    my $full_filename = $results[$#results];
407#    @results = shift @results;
408    print "full_filename = $full_filename\nRESULT_PARAMS (POP):\n@results\n";
409
410    my @dirs = split(/\//, $full_filename);
411    my $path = $dirs[0];
412    my $i;
413    for ($i=1; $i < $#dirs; $i++){
414        $path .= "/".$dirs[$i];
415    }
416    my $filename = $dirs[$#dirs];
417    print "path = $path, filename = $filename\n";
418   
419    if( $filename =~ /[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}:[0-9]{2}:[0-9]{2}/ ) {
420        # Case of CPSR2 data file
421        $data_type = $CPSR2;   
422       
423        my @time_date = split('-', $filename);
424        $results[$params_DATE] = $time_date[0]."-".$time_date[1]."-".$time_date[2];
425        $results[$params_TIME] = $time_date[3];
426       
427        my $prefix = substr $results[0], 0, 1;
428        print "vap filename = $results[0] and the prefix = $prefix\n";
429
430        my $du = `du -sb $full_filename/$prefix*fb | awk 'BEGIN{sum=0}{sum+=\$1}END{print sum}'`;
431        print "Running du: -> du -sb $full_filename/$prefix*fb | awk 'BEGIN{sum=0}{sum+=\$1}END{print sum}'\n";
432        my @file_details = split(/\s+/, $du);
433        $file_size = $file_details[0];
434    }
435    else{
436        if ($filename =~ /^a.*\.rf/ || $filename =~ /^a.*\.cf/ ||
437            $filename =~ /^r.*\.rf/ || $filename =~ /^r.*\.cf/) {
438            $data_type = $DFB;
439        }
440        elsif ($filename =~ /^w.*\.rf/ || $filename =~ /^w.*\.cf/) {
441            $data_type = $WBC;
442        }
443        else {
444            $data_type = $NULLFIELD;
445        }
446       
447        my $ls = `ls -l $full_filename`;
448        my @file_details = split(/\s+/, $ls);
449        $file_size = $file_details[$FILE_SIZE_INDEX];
450    }
451
452    print "RESULT_PARAMS (POP 2):\n@results\n";
453   
454    #### Calculate the RA and Dec in decimal degrees.
455
456    ($raH,$raM,$raS) = split(':', $results[$params_RAJ]);
457    $rajd = ($raH + $raM/60. + $raS/3600.) * 15.;
458   
459    ($decD,$decM,$decS) = split(':', $results[$params_DECJ]);
460    $decjd = (abs($decD) + $decM/60. + $decS/3600.);
461    my @decstring = split(/ */,$results[$params_DECJ]);
462    if($decstring[0] eq '-'){
463        $decjd = -1. * $decjd;
464    }
465
466    ### Calculate the Galactic Longitude and latitude
467    $pi=asin(1) * 2.;
468    $pion180 = $pi/180.;
469    $NGP_RA = 192.859508 * $pion180; # location of NGP
470    $NGP_DEC= 27.128336 * $pion180;
471    $ASC_NODE=32.932;
472   
473    $draR = $rajd*$pion180 - $NGP_RA;
474    $decR = $decjd*$pion180;
475    $sinb = cos($decR) * cos($NGP_DEC) * cos($draR) + sin($decR) * sin($NGP_DEC);
476    $gb = asin($sinb); # this is the latitude, but in radians.
477   
478    $sinl = (sin($decR) * cos($NGP_DEC) - cos($decR) * cos($draR) * sin($NGP_DEC)) / cos($gb);
479    $cosl = cos($decR) * sin($draR) / cos($gb);
480   
481    # Need to get the correct quadrant, as this isn't preserved by
482    # atan, which returns angle between -90 and 90.
483    $gl_raw = atan($sinl/$cosl);
484    if($sinl > 0){
485        if($cosl > 0 ){ $gl = $gl_raw; }
486        else { $gl = $gl_raw + $pi; }
487    }
488    else {
489        if($cosl > 0){ $gl = $gl_raw + 2.*$pi; }
490        else{ $gl = $gl_raw + $pi; }
491    }
492    # Now put them into degrees.
493    $gb = $gb / $pion180;
494    $gl = ($gl / $pion180) + $ASC_NODE;
495   
496    print "rajd = $rajd, decjd=$decjd, gl=$gl, gb=$gb\n";
497
498
499    # Beam information
500    $bmaj = $results[$params_BMAJ];
501    $bmin = $results[$params_BMIN];
502    $bpa = $results[$params_BPA];
503    if($bmaj eq "UNDEF" || $bmaj eq "*" || $bmaj == 0.){
504        # could not get beam info from vap output.
505        # Need to calculate manually.
506        if( $results[$params_FREQ] != 0.){
507            # if the frequency is zero, do not calculate these...
508            $bmaj = (1.2*(299792458./($results[$params_FREQ] * 1.e6))/64.) / $pion180;
509            $bmin = (1.2*(299792458./($results[$params_FREQ] * 1.e6))/64.) / $pion180;
510            $bpa = 0.;
511        }
512        else{
513            $bmaj = $NULLFIELD;
514            $bmin = $NULLFIELD;
515            $bpa = $NULLFIELD;
516        }
517    }
518   
519    if($results[$params_NBITS] eq "UNDEF" || $results[$params_NBITS] == 0){
520        $results[$params_NBITS] = $NULLFIELD;
521    }
522    if($results[$params_TSAMP] eq "UNDEF" || $results[$params_TSAMP] == 0){
523        $results[$params_TSAMP] = $NULLFIELD;
524    }
525
526    # Fix telescope -- always PARKES
527    if($results[$params_TELESCOP] eq "unknown") {
528        $results[$params_TELESCOP] = "PARKES";
529    }
530
531    # Dud things that aren't required for this set of data.
532    $survey = $NULLFIELD;
533    $nbeam = 1;
534       
535    # Clean up any NULL parameters
536    for ($i = 0; $i <= $#results; $i++) {
537        if ($results[$i] eq "N/A" || $results[$i] eq "UNDEF" ||
538            $results[$i] eq "INVALID" ||
539            $results[$i] eq "*error*" || $results[$i] eq "*") {
540
541            $results[$i] = $NULLFIELD;
542
543        }
544    }
545
546
547    # print out the list of parameters
548    print "filename = $filename\n";
549    print "src_name = " . $results[$params_NAME] . "\n";
550    print "projid = " . $results[$params_PROJID] . "\n";
551    print "raj = " . $results[$params_RAJ] . "\n";
552    print "dec = " . $results[$params_DECJ] . "\n";
553    print "data_type = $data_type\n";
554    print "freq = " . $results[$params_FREQ] . "\n";
555    print "bw = " . $results[$params_BW] . "\n";
556    print "scanlen = " . $results[$params_LENGTH] . "\n";
557    print "date = " . $results[$params_DATE] ."\n";
558    print "ut = " . $results[$params_TIME] ."\n";
559    print "MJD = " . $results[$params_MJD] ."\n";
560    print "rajd = " . $rajd ."\n";
561    print "decjd = " . $decjd ."\n";
562    print "gl = " . $gl ."\n";
563    print "gb = " . $gb ."\n";
564    print "bmaj = " . $bmaj ."\n";
565    print "bmin = " . $bmin ."\n";
566    print "bpa = " . $bpa ."\n";
567    print "dm = " . $results[$params_DM] . "\n";
568    print "period = " . $results[$params_PERIOD] . "\n";
569    print "nchan = " . $results[$params_NCHAN] . "\n";
570    print "npol = " . $results[$params_NPOL] . "\n";
571    print "nbin = " . $results[$params_NBIN] . "\n";
572    print "nsub = " . $results[$params_NSUB] . "\n";
573    print "tsamp = " . $results[$params_TSAMP] . "\n";
574    print "nbits = " . $results[$params_NBITS] . "\n";
575    print "nbeam = " . $nbeam . "\n";
576    print "cnfg = " . $results[$params_CNFG] . "\n";
577    print "inst = " . $results[$params_INST] . "\n";
578    print "rcvr = " . $results[$params_RCVR] . "\n";
579    print "hdrver = " . $results[$params_HDRVER] . "\n";
580    print "survey = " . $survey . "\n";
581    print "telescope = " . $results[$params_TELESCOP] . "\n";
582    print "site = " . $results[$params_SITE] . "\n";
583    print "obsrvr = " . $results[$params_OBSRVR] . "\n";
584    print "data_loc = $path\n";
585    print "file_size = $file_size bytes\n";
586
587    # Clean up any NULL parameters ready for SQL import
588    for ($i = 0; $i <= $#results; $i++) {
589        if ($results[$i] eq $NULLFIELD) {
590            $results[$i] = undef;
591        }
592    }
593
594######### Now write the data to the mySQL database
595
596    my %data = ( filename => $filename,
597                 src_name => $results[$params_NAME],
598                 project_id => $results[$params_PROJID],
599                 raj => $results[$params_RAJ],
600                 decj => $results[$params_DECJ],
601                 data_type => $data_type,
602                 obsfreq => $results[$params_FREQ],
603                 bw => $results[$params_BW],
604                 scanlen => $results[$params_LENGTH],
605                 date => $results[$params_DATE],
606                 ut => $results[$params_TIME],
607                 MJD => $results[$params_MJD],
608                 rajd => $rajd,
609                 decjd => $decjd,
610                 gl => $gl,
611                 gb => $gb,
612                 BMAJ => $bmaj,
613                 BMIN => $bmin,
614                 BPA => $bpa,
615                 dm => $results[$params_DM],
616                 period => $results[$params_PERIOD],
617                 nchan => $results[$params_NCHAN],
618                 npol => $results[$params_NPOL],
619                 nbin => $results[$params_NBIN],
620                 nsub => $results[$params_NSUB],
621                 tsamp => $results[$params_TSAMP],
622                 nbits => $results[$params_NBITS],
623                 nbeam => $nbeam,
624                 cnfg => $results[$params_CNFG],
625                 inst => $results[$params_INST],
626                 rcvr => $results[$params_RCVR],
627                 hdrver => $results[$params_HDRVER],
628                 survey => $survey,
629                 telescope => $results[$params_TELESCOP],
630                 site => $results[$params_SITE],
631                 obsrvr => $results[$params_OBSRVR],
632                 data_loc => $path,
633                 file_size_bytes => $file_size
634                 );
635               
636    my $sql = SQL::Abstract->new;
637    my $tableName;
638    if ($path =~ /\_R/ ) {
639        $tableName = 'cals';
640    }
641    else {
642        $tableName = 'observations';
643    }   
644    my($stmt, @bind) = $sql->insert($tableName, \%data);
645
646    my $sth = $db_handle->prepare($stmt);
647    $sth->execute(@bind);
648
649
650
651
652#    if ($path =~ /\_R/ ) {
653#       print "Inserting cal file $filename\n";
654#       
655#       $sql = qq{ INSERT INTO cals
656#                      (filename, src_name, project_id,
657#                       raj, decj, data_type, obsfreq, bw,
658#                       scanlen, date, ut, MJD,
659#                       rajd, decjd, gl, gb, BMAJ, BMIN, BPA,
660#                       dm, period, nchan, npol, nbin, nsub,
661#                       tsamp, nbits, nbeam,
662#                       cnfg, inst, rcvr, hdrver, survey,
663#                       telescope, site, obsrvr,
664#                       data_loc, file_size_bytes
665#                       )
666#                     
667#                      VALUES
668#                      ( ?, ?, ?
669#                        ?, ?, ?, CAST(? AS DECIMAL), CAST(? AS DECIMAL),
670#                        CAST(? AS DECIMAL), ?, ?, CAST(? AS DECIMAL),
671#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL),
672#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
673#                        CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
674#                        ?, ?, ?, ?, ?,
675#                        ?, ?, ?,
676#                        ?, CAST(? AS UNSIGNED)
677#                        )
678##                     ( ?, ?, ?
679##                       ?, ?, ?, ?, ?,
680##                       ?, ?, ?, ?,
681##                       ?, ?, ?, ?, ?, ?, ?,
682##                       ?, ?, ?, ?, ?, ?,
683##                       ?, ?, ?,
684##                       ?, ?, ?, ?, ?,
685##                       ?, ?, ?,
686##                       ?, ?,
687##                       )
688#                  };
689#    }
690#    else {
691#       
692#       print "Inserting obs file $filename\n";
693#       
694#       $sql = qq{ INSERT INTO observations
695#                      (filename, src_name, project_id,
696#                       raj, decj, data_type, obsfreq, bw,
697#                       scanlen, date, ut, MJD,
698#                       rajd, decjd, gl, gb, BMAJ, BMIN, BPA,
699#                       dm, period, nchan, npol, nbin, nsub,
700#                       tsamp, nbits, nbeam,
701#                       cnfg, inst, rcvr, hdrver, survey,
702#                       telescope, site, obsrvr,
703#                       data_loc, file_size_bytes
704#                       )
705#                     
706#                      VALUES
707#                      ( ?, ?, ?
708#                        ?, ?, ?, CAST(? AS DECIMAL), CAST(? AS DECIMAL),
709#                        CAST(? AS DECIMAL), ?, ?, CAST(? AS DECIMAL),
710#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL),
711#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
712#                        CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
713#                        ?, ?, ?, ?, ?,
714#                        ?, ?, ?,
715#                        ?, CAST(? AS UNSIGNED)
716#                        )
717##                     ( ?, ?, ?
718##                       ?, ?, ?, ?, ?,
719##                       ?, ?, ?, ?,
720##                       ?, ?, ?, ?, ?, ?, ?,
721##                       ?, ?, ?, ?, ?, ?,
722##                       ?, ?, ?,
723##                       ?, ?, ?, ?, ?,
724##                       ?, ?, ?,
725##                       ?, ?,
726##                       )
727#                  };
728#    }
729#       my $sth = $db_handle->prepare( $sql );
730##DEBUGGING     
731##    my $sth;
732#   
733#    #for( @records ) {
734#    eval {
735#       
736##      $sth->bind_param( 1,  $filename);
737##      $sth->bind_param( 2,  $results[$params_NAME]);
738##      $sth->bind_param( 3,  $results[$params_PROJID]);
739##      $sth->bind_param( 4,  $results[$params_RAJ]);
740##      $sth->bind_param( 5,  $results[$params_DECJ]);
741##      $sth->bind_param( 6,  $data_type);
742##      $sth->bind_param( 7,  $results[$params_FREQ]);
743##      $sth->bind_param( 8,  $results[$params_BW]);
744##      $sth->bind_param( 9,  $results[$params_LENGTH]);
745##      $sth->bind_param( 10, $results[$params_DATE]);
746##      $sth->bind_param( 11, $results[$params_TIME]);
747##      $sth->bind_param( 12, $results[$params_MJD]);
748##      $sth->bind_param( 13, $rajd);
749##      $sth->bind_param( 14, $decjd);
750##      $sth->bind_param( 15, $gl);
751##      $sth->bind_param( 16, $gb);
752##      $sth->bind_param( 17, $bmaj);
753##      $sth->bind_param( 18, $bmin);
754##      $sth->bind_param( 19, $bpa);
755##      $sth->bind_param( 20, $results[$params_DM]);
756##      $sth->bind_param( 21, $results[$params_PERIOD]);
757##      $sth->bind_param( 22, $results[$params_NCHAN]);
758##      $sth->bind_param( 23, $results[$params_NPOL]);
759##      $sth->bind_param( 24, $results[$params_NBIN]);
760##      $sth->bind_param( 25, $results[$params_NSUB]);
761##      $sth->bind_param( 26, $results[$params_TSAMP]);
762##      $sth->bind_param( 27, $results[$params_NBITS]);
763##      $sth->bind_param( 28, $nbeam);
764##      $sth->bind_param( 29, $results[$params_CNFG]);
765##      $sth->bind_param( 30, $results[$params_INST]);
766##      $sth->bind_param( 31, $results[$params_RCVR]);
767##      $sth->bind_param( 32, $results[$params_HDRVER]);
768##      $sth->bind_param( 33, $survey);
769##      $sth->bind_param( 34, $results[$params_TELESCOP]);
770##      $sth->bind_param( 35, $results[$params_SITE]);
771##      $sth->bind_param( 36, $results[$params_OBSRVR]);
772##      $sth->bind_param( 37, $path);
773##      $sth->bind_param( 38, $file_size);
774#
775##      $sth->bind_param( 1,  $filename,                  {TYPE => SQL_VARCHAR } );
776##      $sth->bind_param( 2,  $results[$params_NAME],     {TYPE => SQL_VARCHAR } );
777##      $sth->bind_param( 3,  $results[$params_PROJID],   {TYPE => SQL_VARCHAR } );
778##      $sth->bind_param( 4,  $results[$params_RAJ],      {TYPE => SQL_VARCHAR } );
779##      $sth->bind_param( 5,  $results[$params_DECJ],     {TYPE => SQL_VARCHAR } );
780##      $sth->bind_param( 6,  $data_type,                 {TYPE => SQL_VARCHAR } );
781##      $sth->bind_param( 7,  $results[$params_FREQ],     {TYPE => SQL_DOUBLE } );
782##      $sth->bind_param( 8,  $results[$params_BW],       {TYPE => SQL_DOUBLE } );
783##      $sth->bind_param( 9,  $results[$params_LENGTH],   {TYPE => SQL_DOUBLE } );
784##      $sth->bind_param( 10, $results[$params_DATE],     {TYPE => SQL_VARCHAR } );
785##      $sth->bind_param( 11, $results[$params_TIME],     {TYPE => SQL_VARCHAR } );
786##      $sth->bind_param( 12, $results[$params_MJD],      {TYPE => SQL_DOUBLE } );
787##      $sth->bind_param( 13, $rajd,                      {TYPE => SQL_DOUBLE } );
788##      $sth->bind_param( 14, $decjd,                     {TYPE => SQL_DOUBLE } );
789##      $sth->bind_param( 15, $gl,                        {TYPE => SQL_DOUBLE } );
790##      $sth->bind_param( 16, $gb,                        {TYPE => SQL_DOUBLE } );
791##      $sth->bind_param( 17, $bmaj,                      {TYPE => SQL_FLOAT } );
792##      $sth->bind_param( 18, $bmin,                      {TYPE => SQL_FLOAT } );
793##      $sth->bind_param( 19, $bpa,                       {TYPE => SQL_FLOAT } );
794##      $sth->bind_param( 20, $results[$params_DM],       {TYPE => SQL_DOUBLE } );
795##      $sth->bind_param( 21, $results[$params_PERIOD],   {TYPE => SQL_DOUBLE } );   
796##      $sth->bind_param( 22, $results[$params_NCHAN],    {TYPE => SQL_INTEGER } );
797##      $sth->bind_param( 23, $results[$params_NPOL],     {TYPE => SQL_INTEGER } );
798##      $sth->bind_param( 24, $results[$params_NBIN],     {TYPE => SQL_INTEGER } );   
799##      $sth->bind_param( 25, $results[$params_NSUB],     {TYPE => SQL_INTEGER } );
800##      $sth->bind_param( 26, $results[$params_TSAMP],    {TYPE => SQL_FLOAT } );
801##      $sth->bind_param( 27, $results[$params_NBITS],    {TYPE => SQL_TINYINT } );
802##      $sth->bind_param( 28, $nbeam,                     {TYPE => SQL_TINYINT } );
803##      $sth->bind_param( 29, $results[$params_CNFG],     {TYPE => SQL_VARCHAR } );
804##      $sth->bind_param( 30, $results[$params_INST],     {TYPE => SQL_VARCHAR } );
805##      $sth->bind_param( 31, $results[$params_RCVR],     {TYPE => SQL_VARCHAR } );
806##      $sth->bind_param( 32, $results[$params_HDRVER],   {TYPE => SQL_VARCHAR } );
807##      $sth->bind_param( 33, $survey,                    {TYPE => SQL_VARCHAR } );
808##      $sth->bind_param( 34, $results[$params_TELESCOP], {TYPE => SQL_VARCHAR } );
809##      $sth->bind_param( 35, $results[$params_SITE],     {TYPE => SQL_VARCHAR } );
810##      $sth->bind_param( 36, $results[$params_OBSRVR],   {TYPE => SQL_VARCHAR } );
811##      $sth->bind_param( 37, $path,                      {TYPE => SQL_VARCHAR } );
812##      $sth->bind_param( 38, $file_size,                 {TYPE => SQL_INTEGER } );
813#
814#       $sth->execute();
815##DEBUGGING
816##      DBI::dump_results($sth);
817##      $rows = $sth->dump_results();
818##      print "$rows\n";
819#       print "$sth\n";
820#
821#    # $db_handle->commit(); # Autocommit is already ON so no need for a manual commit
822#    };
823
824    if( $@ ) {
825        print "warning statement\n";
826        warn "Database error: $DBI::errstr\n";
827        $db_handle->rollback(); #just die if rollback is failing
828    }
829       
830       
831#DEBUGGING
832    $sth->finish();
833
834}
835
836#######################################################
837
838sub isDuplicate {
839
840#DEBUGGING
841#    return 0;
842
843    my $filename = $_[0];
844    my $search_data_loc;
845    my $search_filename;
846    my $pulsar_name;
847   
848    if ( $filename =~ m/(.*(J[0-9]{4}[+-][0-9]{1,4}.*))[\/]+([a-z][0-9]{6}_[0-9]{6}\.([a-z]{2,}))$/i ) {
849        $search_data_loc = $1;
850        $search_filename = $3;
851        $pulsar_name = $2;
852    }
853    elsif ($filename =~ m/(.*(J[0-9]{4}[+-][0-9]{1,4}.*))[\/]+([0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}:[0-9]{2}:[0-9]{2})$/i ) {
854        $search_data_loc = $1;
855        $search_filename = $3;
856        $pulsar_name = $2;
857    }
858    else {
859        print "$filename is an oddball -- not doing...";
860        return 1;
861    }
862   
863    my $sql;
864    if ($pulsar_name =~ m/.*_R$/) {
865       
866        $sql = qq{ SELECT * from cals
867                       WHERE data_loc LIKE ? AND
868                       filename    = ?
869                   };
870    }
871    else {
872        print "This is an observation\n";
873        $sql = qq{ SELECT * from observations
874                       WHERE data_loc LIKE ? AND
875                       filename    = ?
876                   };
877    }
878   
879    my $sth = $db_handle->prepare( $sql );
880   
881    eval {
882        print "like $search_data_loc and filename = $search_filename \n";
883       
884        $sth->bind_param( 1, $search_data_loc, SQL_VARCHAR );
885        $sth->bind_param( 2, $search_filename, SQL_VARCHAR );
886        $sth->execute();
887       
888    };
889   
890    if( $@ ) {
891        warn "Database error: $DBI::errstr\n";
892        $db_handle->rollback(); #just die if rollback is failing
893    }
894   
895    # read the records
896    my @data = $sth->fetchrow_array();
897   
898    $sth->finish();
899   
900    if (scalar(@data) > 0) {
901        return 1;
902    }
903   
904    return 0;
905
906}
907#######################################################
908
909# Removes
910sub deleteStaleRecords {
911
912    my $sql, my $sth, my @data, my $file;
913   
914    $sql = qq{ SELECT data_loc, filename from cals };
915    $sth = $db_handle->prepare( $sql );
916   
917    eval {
918       
919        $sth->execute();
920       
921    };
922   
923    if( $@ ) {
924        warn "Database error: $DBI::errstr\n";
925        $db_handle->rollback(); #just die if rollback is failing
926    }
927   
928    my @cals2delete;
929   
930    while (@data = $sth->fetchrow_array()) {
931       
932        if (scalar(@data) == 2) {
933            $file = $data[0]."/".$data[1];
934           
935            if (!-e $file) {
936                print "$file does NOT exist\n\n";
937               
938                # Push the row into the cals to delete
939                # So array will look like:
940                # {/full/path/to/dir, filename.cf, /full/path/to/dir, filename2.cf, etc..}
941                push (@cals2delete, @data);
942            }
943        }
944       
945    }
946   
947    $sql = qq{ SELECT data_loc, filename from observations };
948   
949    $sth = $db_handle->prepare( $sql );
950    eval {
951       
952        $sth->execute();
953       
954    };
955   
956    if( $@ ) {
957        warn "Database error: $DBI::errstr\n";
958        $db_handle->rollback(); #just die if rollback is failing
959    }
960   
961    my @obs2delete;
962   
963    while (@data = $sth->fetchrow_array()) {
964       
965        if (scalar(@data) == 2) {
966            $file = $data[0]."/".$data[1];
967           
968            if (!-e $file) {
969                print "$file does NOT exist\n\n";
970               
971                # Push the row into the cals to delete
972                # So array will look like:
973                # {/full/path/to/dir, filename.cf, /full/path/to/dir, filename2.cf, etc..}
974                push (@obs2delete, @data);
975            }
976        }                 
977    }
978   
979    print "There are " . scalar(@cals2delete)/2 . " stale cals and " .
980        scalar(@obs2delete)/2 . " stale obs to delete\n";
981   
982    # Delete the rows
983    my $data_loc, my $filename;
984   
985    while (@cals2delete) {
986        $data_loc = shift @cals2delete;
987        $filename = shift @cals2delete;
988       
989        $sql = qq{ DELETE FROM cals WHERE data_loc = ? AND filename = ?};
990       
991        $sth = $db_handle->prepare( $sql );
992        eval {
993           
994            $sth->bind_param( 1, $data_loc, SQL_VARCHAR );
995            $sth->bind_param( 2, $filename, SQL_VARCHAR );
996            $sth->execute();
997           
998        };
999       
1000        if( $@ ) {
1001            warn "Database error: $DBI::errstr\n";
1002            $db_handle->rollback(); #just die if rollback is failing
1003        }
1004        else {
1005            print "Successfully deleted $data_loc/$filename\n";
1006        }
1007       
1008    }
1009   
1010    while (@obs2delete) {
1011        $data_loc = shift @obs2delete;
1012        $filename = shift @obs2delete;
1013       
1014        $sql = qq{ DELETE FROM observations WHERE data_loc = ? AND filename = ?};
1015       
1016        $sth = $db_handle->prepare( $sql );
1017       
1018        eval {
1019           
1020            $sth->bind_param( 1, $data_loc, SQL_VARCHAR );
1021            $sth->bind_param( 2, $filename, SQL_VARCHAR );
1022            $sth->execute();
1023           
1024        };
1025       
1026        if( $@ ) {
1027            warn "Database error: $DBI::errstr\n";
1028            $db_handle->rollback(); #just die if rollback is failing
1029        }
1030        else {
1031            print "Successfully deleted $data_loc/$filename\n";
1032        }
1033    }
1034   
1035    $sth->finish();
1036   
1037}
1038
1039
1040
1041#######################################################
1042
1043sub connectdb {
1044       
1045    # Connect to the MySQL server
1046    my $dbh = DBI->connect("dbi:mysql:database=psrchive;host=localhost", "psrdba", "lighthouse")
1047        or die "Couldn't connect to database: $DBI::errstr\n";
1048   
1049    print "Successfully connected to db\n" if $dbh;
1050    return $dbh;
1051
1052# DEBUGGING     
1053#    print "We would normally connect to the DB here\n";
1054#    return 1;
1055}
Note: See TracBrowser for help on using the repository browser.