source: trunk/htdocs/perl/find_archives.pl

Last change on this file was 259, checked in by MatthewWhiting, 16 years ago

Some minor fixes to the find_archives script.

  • Property svn:executable set to *
File size: 30.4 KB
Line 
1#! /usr/bin/perl -w
2
3# find_archives.pl
4# Author: Matthew Whiting
5# Date: January 2008
6# ATNF, CSIRO
7
8# A modified version of Albert's find_archives.pl that hopefully works
9# a bit more consistently for the different data types.
10
11# Crawls through data archive directories (given as an input parameter),
12# searches for all observation and/or calibration files, enquires about
13# their pulsar parameters and inputs these details as well as their
14# location into a mySQL database
15
16use strict;
17#use lib "/DATA/SITAR_1/whi550/VO/PulsarVO/SQL-Abstract-1.22/blib/lib";
18use File::Find ();
19use DBI qw(:sql_types);
20use SQL::Abstract;
21use POSIX;
22
23#  foreach (@{ $DBI::EXPORT_TAGS{sql_types} }) {
24#      printf "%s=%d\n", $_, &{"DBI::$_"};
25#  }
26
27# for the convenience of &wanted calls, including -eval statements:
28use vars qw/*name *dir *prune @archive_dirs $archive_dir @files
29    $vap_cmd $vap_out $params $num_vap_params $db_handle
30    $filename $raj $decj $dm $period $bw $cnfg
31    $freq $inst $mjdint $mjdfrac $npol $nchan $nbin $nsub
32    $rcvr $site $length $obsrvr $data_loc $data_type
33    $file_size $MJD $raH $raM $raS $decD $decM $decS $rajd $decjd $decstring
34    $pi $pion180 $NGP_RA $NGP_DEC $ASC_NODE $draR $decR
35    $sinb $sinl $cosl $gb $gl $gl_raw
36    $bmaj $bmin $bpa $hdrver $survey $nbeam
37    $output $vap $archive_extensions $ext       
38    $default
39    @result_params $FILE_SIZE_INDEX $FB $DFB $WBC $CPSR2 $NULLFIELD
40    $params_NAME   
41    $params_PROJID 
42    $params_RAJ     
43    $params_DECJ   
44    $params_FREQ   
45    $params_BW     
46    $params_LENGTH 
47    $params_DATE   
48    $params_TIME   
49    $params_MJD
50    $params_BMAJ   
51    $params_BMIN   
52    $params_BPA     
53    $params_DM     
54    $params_PERIOD 
55    $params_NCHAN   
56    $params_NPOL   
57    $params_NBIN   
58    $params_NSUB   
59    $params_NBITS   
60    $params_TSAMP   
61    $params_NBEAM
62    $params_CNFG   
63    $params_INST   
64    $params_RCVR   
65    $params_HDRVER
66    $params_TELESCOP
67    $params_SITE   
68    $params_OBSRVR 
69    $params_FILENAME
70    /;  # $machine
71                                               
72*name   = *File::Find::name;
73*dir    = *File::Find::dir;
74*prune  = *File::Find::prune;
75
76# The data types
77$FB = "FB";
78$WBC = "WBC";
79$DFB = "DFB";
80$CPSR2 = "CPSR2";
81
82# filename extensions
83$archive_extensions = "rf cf cfb fb";
84
85# How to write NULL fields to the database
86#$NULLFIELD = undef;
87$NULLFIELD = "NULL";
88
89# Pulsar parameters (listed in "vap -H")
90$params  = "name projid ra dec freq bw length stt_date stt_time mjd bmaj bmin bpa dm period nchan npol nbin nsub nbits tsamp nbeam beconfig backend rcvr hdrver telescop asite observer";
91
92# The location of vap
93$vap = "/pulsar/psr/linux/bin/vap -n";
94
95# vap result parameter indices. Used to locate the resulting
96# parameter's location
97# Note that we start at 1, since the vap output has the filename in the first position
98$params_NAME    =  1;
99$params_PROJID  =  2;
100$params_RAJ     =  3;
101$params_DECJ    =  4;
102$params_FREQ    =  5;
103$params_BW      =  6;
104$params_LENGTH  =  7;
105$params_DATE    =  8;
106$params_TIME    =  9;
107$params_MJD     = 10;
108$params_BMAJ    = 11;
109$params_BMIN    = 12;
110$params_BPA     = 13;
111$params_DM      = 14;
112$params_PERIOD  = 15;
113$params_NCHAN   = 16;
114$params_NPOL    = 17;
115$params_NBIN    = 18;
116$params_NSUB    = 19;
117$params_NBITS   = 20;
118$params_TSAMP   = 21;
119$params_NBEAM   = 22;
120$params_CNFG    = 23;
121$params_INST    = 24;
122$params_RCVR    = 25;
123$params_HDRVER  = 26;
124$params_TELESCOP= 27;
125$params_SITE    = 28;
126$params_OBSRVR  = 29;
127$params_FILENAME= 30;
128
129
130#####################
131# Begin main method
132#
133my @vap_params = split(/ /, $params);
134$num_vap_params = scalar(@vap_params);
135
136# The column number (starting from 0) of the file
137# size when calling ls -l
138$FILE_SIZE_INDEX = 4;
139
140# Use the default operation which is to firstly delete any
141# stale records, then crawl through and insert new records
142$default = 1;
143
144if (scalar(@ARGV) == 0) {
145        print
146"find_archives.pl: A crawler that populates a relational database\
147                with indexes to observational data along with their respective\
148                cal files.\n\
149 Usage: find_archives.pl [options] directory1 [directory2 ...]\
150        If no options are provided, it will default to do both -d and -p\
151 Options:\
152 \t-d Delete stale records\
153 \t-p Populate database with new records\n";
154       
155}
156
157# Connect to the database
158$db_handle = connectdb();
159
160#########################################################
161# Step 1. First remove any stale entries in the database
162#         (remove file locations that no longer exist)
163#########################################################
164
165if ($ARGV[0] eq "-d") {
166        deleteStaleRecords();
167        $default = 0;
168        shift @ARGV;
169}
170
171
172#########################################################
173# Step 2. Crawl through each input directory and insert
174#         any new archives
175
176if ($ARGV[0] eq "-p") {
177        shift @ARGV;
178        @archive_dirs = @ARGV;
179
180        foreach $archive_dir (@archive_dirs) {
181
182                # Traverse desired filesystems
183                File::Find::find({wanted => \&wanted}, $archive_dir);
184        }
185        $default = 0;
186}
187
188# Else do both
189if ($default) {
190
191        deleteStaleRecords();
192
193        @archive_dirs = @ARGV;
194
195        foreach $archive_dir (@archive_dirs) {
196               
197                # Check that the input directory is legit
198               
199                # Traverse desired filesystems
200                File::Find::find({wanted => \&wanted}, $archive_dir);
201        }
202
203}
204#if ($db_handle->disconnect()) {
205#       print "Successfully Disconnected from database\n";
206#}
207#else {
208#       print "Error: Failed to disconnect from the database\n";
209#       exit;
210#}
211
212######################################################################
213
214######################################################################
215# Subroutine definitions
216#
217######################################################################
218
219#######################################################
220
221sub wanted {
222 
223       
224        if (-d and /^[a-zA-Z]{0,1}[0-9]{4}[+-][0-9]{4}.*/s ) {
225
226            if ($File::Find::name !~ /.*S$/) { # ignore scrunch directories
227
228                print "\n\nIn directory $_\n\n";
229                print "\$File::Find::name = $File::Find::name\n";
230               
231                my $search_str;
232                foreach $ext (split (/\s+/, $archive_extensions) ) {
233                        $search_str .= $File::Find::name . "/*.$ext "
234                }
235
236                my @all_files = glob($search_str);
237
238                print "Number of files found = " . scalar(@all_files) . "\n";
239               
240                if (scalar(@all_files) == 0) {
241                    # No data files means it's likely to be a CPSR2 directory
242
243                    # Foreach observation
244                    my @cpsr2_obs_dirs = glob($File::Find::name . "/????-??-??-??:??:??");
245                   
246                    print "Number of CPSR2 obs found = " . scalar(@cpsr2_obs_dirs) . "\n";
247               
248                    my $cpsr2_obs_dir;
249                    foreach $cpsr2_obs_dir (@cpsr2_obs_dirs) {
250                        print "dir is $cpsr2_obs_dir\n";
251                       
252                        # Need to check if this entry already exists
253                        $cpsr2_obs_dir =~ s/\s//g;
254                       
255                        if (isDuplicate($cpsr2_obs_dir) == 1) {
256                            print "$cpsr2_obs_dir is a duplicate entry. Skipping....\n\n";
257                            next;
258                        }                               
259                       
260                        print "$cpsr2_obs_dir NOT a duplicate. Extracting data...\n";
261                        find_data_cpsr2($cpsr2_obs_dir);                       
262                       
263                    }
264                   
265                }
266                else{
267
268                    my $file;
269                    foreach $file (@all_files) {
270                       
271                        # Need to check if this entry already exists
272                        $file =~ s/\s//g;
273                       
274                        if (isDuplicate($file) == 1) {
275                            print "$file is a duplicate entry. Skipping....\n\n";
276                            next;
277                        }                               
278                       
279                        print "$file NOT a duplicate. Inserting into database here ...\n";
280                        find_data($file);
281                       
282                    }
283                   
284                }
285               
286            }
287           
288        }
289    }
290               
291
292
293#######################################################
294
295sub find_data {
296   
297# Usage: find_data(filename)
298# Runs vap on the filename, and extracts parameters, then sends them to populate_database() to be written to the database
299#
300# NOTE: this is for non-CPSR2 data
301
302    my $file = $_[0];
303
304    my $vap_cmd = "$vap -c \"$params\" $file  | grep -v filename | grep -v -x \"\" |";
305    print "Calling vap...$vap_cmd\n";
306    open(VAP,$vap_cmd);
307    while ($vap_out = <VAP>) {
308        $vap_out =~ s/^\s+//g; #remove any leading space
309        my @result_params = split(/\s+/, $vap_out);
310       
311        populate_database(@result_params,$file);
312#       if (scalar(@result_params) == ($num_vap_params + 1)) {    # +1 because first column is filename
313#           print "@result_params\n";
314#           populate_observations($file);
315#       }
316#       else {
317#           print "Incorrect number of cols = ".scalar(@result_params).". Expected " . $num_vap_params+1 ."\n";
318#           print "@result_params\n";
319#       }
320    }
321    print "\n";
322 
323}
324
325#######################################################
326
327sub find_data_cpsr2 {
328   
329# Usage: find_data_cpsr2(filename) where filename = ????-??-??-??:??:??
330# Extracts parameters for the observation using vap, then sends them to populate_database() to be written to the database
331#
332# NOTE: this is only for CPSR2 data
333
334    my $dir = $_[0];
335    my $ext;
336    my @result_params;
337    my @tmp_params;
338    my $prefix = "m n o";
339
340    my @all_files_fb = glob($dir."/*.fb");
341    my @all_files_cfb = glob($dir."/*.cfb");
342    if ( scalar(@all_files_fb)!=0 ){
343        $ext = ".fb";
344    }
345    else{
346        $ext = ".cfb";
347    }
348    print "Searching in $dir for $ext files\n";
349
350    my $pre;
351    foreach $pre (split (/\s+/, ($prefix))) {
352
353        my @all_files;
354        my $search_str = $dir."/".$pre."*".$ext;
355        print "$pre: Searching for files like: $search_str\n";
356        @all_files = glob($search_str);
357        print "Found ". scalar(@all_files) . "\n";
358
359        if(scalar(@all_files) != 0) {
360   
361            my $total_length=0;
362            my $file;
363            my $firstGoodOne = 0;
364            foreach $file (@all_files) { #loop over all files in directory to get total obs time.
365                my @tmp_params;
366                my $vap_cmd = "$vap -n -c \"$params\" $file  | grep -v filename | grep -v -x \"\" |";
367                open(VAP,$vap_cmd);
368                my $vap_out;
369                while ($vap_out = <VAP>) {
370                    $vap_out =~ s/^\s+//g; #remove any leading space
371                    @tmp_params = split(/\s+/, $vap_out);
372                }
373
374#               print "filename = $file\ttmp_params[0] = $tmp_params[0]\tfirstGoodOne = $firstGoodOne\n";
375
376                my @tmpdirs = split(/\//, $file);
377                my $tmpfilename = $tmpdirs[$#tmpdirs];
378
379                if((scalar(@tmp_params)>0) && ($tmpfilename eq $tmp_params[0])){
380
381                    if($firstGoodOne == 0){
382                        # Keep most of the parameters for the first good file
383                        @result_params = @tmp_params;
384                        $firstGoodOne = 1;
385                    }
386                    $total_length += $tmp_params[$params_LENGTH];
387                }
388            }
389
390            if($firstGoodOne != 0){
391                print "total length = $total_length\n";
392                $result_params[$params_LENGTH] = $total_length;
393           
394                populate_database(@result_params,$dir);
395            }
396        }   
397    }
398
399}
400
401#######################################################
402
403sub populate_database {
404   
405# Usage: populate_database(file, $result_params), where file has the full path to the file/directory in question,
406#   and $result_params have been filled with the vap results
407#
408#  This subroutine calculates all necessary parameters (such as data type, decimal positions, beam, file size, ...)
409#   and writes them to the database.
410
411    my @results = @_;
412    my $full_filename = $results[$#results];
413#    @results = shift @results;
414    print "full_filename = $full_filename\nRESULT_PARAMS (POP):\n@results\n";
415
416    my @dirs = split(/\//, $full_filename);
417    my $path = $dirs[0];
418    my $i;
419    for ($i=1; $i < $#dirs; $i++){
420        $path .= "/".$dirs[$i];
421    }
422    my $filename = $dirs[$#dirs];
423    print "path = $path, filename = $filename\n";
424   
425    if( $filename =~ /[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}:[0-9]{2}:[0-9]{2}/ ) {
426        # Case of CPSR2 data file
427        $data_type = $CPSR2;   
428       
429        my @time_date = split('-', $filename);
430        $results[$params_DATE] = $time_date[0]."-".$time_date[1]."-".$time_date[2];
431        $results[$params_TIME] = $time_date[3];
432       
433        my $prefix = substr $results[0], 0, 1;
434        print "vap filename = $results[0] and the prefix = $prefix\n";
435
436        my $du = `du -sb $full_filename/$prefix*fb | awk 'BEGIN{sum=0}{sum+=\$1}END{print sum}'`;
437        print "Running du: -> du -sb $full_filename/$prefix*fb | awk 'BEGIN{sum=0}{sum+=\$1}END{print sum}'\n";
438        my @file_details = split(/\s+/, $du);
439        $file_size = $file_details[0];
440    }
441    else{
442        if ($filename =~ /^a.*\.rf/ || $filename =~ /^a.*\.cf/ ||
443            $filename =~ /^r.*\.rf/ || $filename =~ /^r.*\.cf/) {
444            $data_type = $DFB;
445        }
446        elsif ($filename =~ /^w.*\.rf/ || $filename =~ /^w.*\.cf/) {
447            $data_type = $WBC;
448        }
449        else {
450            $data_type = $NULLFIELD;
451        }
452       
453        my $ls = `ls -l $full_filename`;
454        my @file_details = split(/\s+/, $ls);
455        $file_size = $file_details[$FILE_SIZE_INDEX];
456    }
457
458    print "RESULT_PARAMS (POP 2):\n@results\n";
459   
460    #### Calculate the RA and Dec in decimal degrees.
461
462    ($raH,$raM,$raS) = split(':', $results[$params_RAJ]);
463    $rajd = ($raH + $raM/60. + $raS/3600.) * 15.;
464   
465    ($decD,$decM,$decS) = split(':', $results[$params_DECJ]);
466    $decjd = (abs($decD) + $decM/60. + $decS/3600.);
467    my @decstring = split(/ */,$results[$params_DECJ]);
468    if($decstring[0] eq '-'){
469        $decjd = -1. * $decjd;
470    }
471
472    ### Calculate the Galactic Longitude and latitude
473    $pi=asin(1) * 2.;
474    $pion180 = $pi/180.;
475    $NGP_RA = 192.859508 * $pion180; # location of NGP
476    $NGP_DEC= 27.128336 * $pion180;
477    $ASC_NODE=32.932;
478   
479    $draR = $rajd*$pion180 - $NGP_RA;
480    $decR = $decjd*$pion180;
481    $sinb = cos($decR) * cos($NGP_DEC) * cos($draR) + sin($decR) * sin($NGP_DEC);
482    $gb = asin($sinb); # this is the latitude, but in radians.
483   
484    $sinl = (sin($decR) * cos($NGP_DEC) - cos($decR) * cos($draR) * sin($NGP_DEC)) / cos($gb);
485    $cosl = cos($decR) * sin($draR) / cos($gb);
486   
487    # Need to get the correct quadrant, as this isn't preserved by
488    # atan, which returns angle between -90 and 90.
489    $gl_raw = atan($sinl/$cosl);
490    if($sinl > 0){
491        if($cosl > 0 ){ $gl = $gl_raw; }
492        else { $gl = $gl_raw + $pi; }
493    }
494    else {
495        if($cosl > 0){ $gl = $gl_raw + 2.*$pi; }
496        else{ $gl = $gl_raw + $pi; }
497    }
498    # Now put them into degrees.
499    $gb = $gb / $pion180;
500    $gl = ($gl / $pion180) + $ASC_NODE;
501   
502    print "rajd = $rajd, decjd=$decjd, gl=$gl, gb=$gb\n";
503
504
505    # Beam information
506    $bmaj = $results[$params_BMAJ];
507    $bmin = $results[$params_BMIN];
508    $bpa = $results[$params_BPA];
509    if($bmaj eq "UNDEF" || $bmaj eq "*" || $bmaj == 0.){
510        # could not get beam info from vap output.
511        # Need to calculate manually.
512        if( $results[$params_FREQ] != 0.){
513            # if the frequency is zero, do not calculate these...
514            $bmaj = (1.2*(299792458./($results[$params_FREQ] * 1.e6))/64.) / $pion180;
515            $bmin = (1.2*(299792458./($results[$params_FREQ] * 1.e6))/64.) / $pion180;
516            $bpa = 0.;
517        }
518        else{
519            $bmaj = $NULLFIELD;
520            $bmin = $NULLFIELD;
521            $bpa = $NULLFIELD;
522        }
523    }
524   
525    if($results[$params_NBITS] eq "UNDEF" || $results[$params_NBITS] == 0){
526        $results[$params_NBITS] = $NULLFIELD;
527    }
528    if($results[$params_TSAMP] eq "UNDEF" || $results[$params_TSAMP] == 0){
529        $results[$params_TSAMP] = $NULLFIELD;
530    }
531
532    # Fix telescope -- always PARKES
533    if($results[$params_TELESCOP] eq "unknown") {
534        $results[$params_TELESCOP] = "PARKES";
535    }
536
537    # Dud things that aren't required for this set of data.
538    $survey = $NULLFIELD;
539    $nbeam = 1;
540       
541    # Clean up any NULL parameters
542    for ($i = 0; $i <= $#results; $i++) {
543        if ($results[$i] eq "N/A" || $results[$i] eq "UNDEF" ||
544            $results[$i] eq "INVALID" ||
545            $results[$i] eq "*error*" || $results[$i] eq "*") {
546
547            $results[$i] = $NULLFIELD;
548
549        }
550    }
551
552
553    # print out the list of parameters
554    print "filename = $filename\n";
555    print "src_name = " . $results[$params_NAME] . "\n";
556    print "projid = " . $results[$params_PROJID] . "\n";
557    print "raj = " . $results[$params_RAJ] . "\n";
558    print "dec = " . $results[$params_DECJ] . "\n";
559    print "data_type = $data_type\n";
560    print "freq = " . $results[$params_FREQ] . "\n";
561    print "bw = " . $results[$params_BW] . "\n";
562    print "scanlen = " . $results[$params_LENGTH] . "\n";
563    print "date = " . $results[$params_DATE] ."\n";
564    print "ut = " . $results[$params_TIME] ."\n";
565    print "MJD = " . $results[$params_MJD] ."\n";
566    print "rajd = " . $rajd ."\n";
567    print "decjd = " . $decjd ."\n";
568    print "gl = " . $gl ."\n";
569    print "gb = " . $gb ."\n";
570    print "bmaj = " . $bmaj ."\n";
571    print "bmin = " . $bmin ."\n";
572    print "bpa = " . $bpa ."\n";
573    print "dm = " . $results[$params_DM] . "\n";
574    print "period = " . $results[$params_PERIOD] . "\n";
575    print "nchan = " . $results[$params_NCHAN] . "\n";
576    print "npol = " . $results[$params_NPOL] . "\n";
577    print "nbin = " . $results[$params_NBIN] . "\n";
578    print "nsub = " . $results[$params_NSUB] . "\n";
579    print "tsamp = " . $results[$params_TSAMP] . "\n";
580    print "nbits = " . $results[$params_NBITS] . "\n";
581    print "nbeam = " . $nbeam . "\n";
582    print "cnfg = " . $results[$params_CNFG] . "\n";
583    print "inst = " . $results[$params_INST] . "\n";
584    print "rcvr = " . $results[$params_RCVR] . "\n";
585    print "hdrver = " . $results[$params_HDRVER] . "\n";
586    print "survey = " . $survey . "\n";
587    print "telescope = " . $results[$params_TELESCOP] . "\n";
588    print "site = " . $results[$params_SITE] . "\n";
589    print "obsrvr = " . $results[$params_OBSRVR] . "\n";
590    print "data_loc = $path\n";
591    print "file_size = $file_size bytes\n";
592
593    # Clean up any NULL parameters ready for SQL import
594    for ($i = 0; $i <= $#results; $i++) {
595        if ($results[$i] eq $NULLFIELD) {
596            $results[$i] = undef;
597        }
598    }
599
600######### Now write the data to the mySQL database
601
602    my %data = ( filename => $filename,
603                 src_name => $results[$params_NAME],
604                 project_id => $results[$params_PROJID],
605                 raj => $results[$params_RAJ],
606                 decj => $results[$params_DECJ],
607                 data_type => $data_type,
608                 obsfreq => $results[$params_FREQ],
609                 bw => $results[$params_BW],
610                 scanlen => $results[$params_LENGTH],
611                 date => $results[$params_DATE],
612                 ut => $results[$params_TIME],
613                 MJD => $results[$params_MJD],
614                 rajd => $rajd,
615                 decjd => $decjd,
616                 gl => $gl,
617                 gb => $gb,
618                 BMAJ => $bmaj,
619                 BMIN => $bmin,
620                 BPA => $bpa,
621                 dm => $results[$params_DM],
622                 period => $results[$params_PERIOD],
623                 nchan => $results[$params_NCHAN],
624                 npol => $results[$params_NPOL],
625                 nbin => $results[$params_NBIN],
626                 nsub => $results[$params_NSUB],
627                 tsamp => $results[$params_TSAMP],
628                 nbits => $results[$params_NBITS],
629                 nbeam => $nbeam,
630                 cnfg => $results[$params_CNFG],
631                 inst => $results[$params_INST],
632                 rcvr => $results[$params_RCVR],
633                 hdrver => $results[$params_HDRVER],
634                 survey => $survey,
635                 telescope => $results[$params_TELESCOP],
636                 site => $results[$params_SITE],
637                 obsrvr => $results[$params_OBSRVR],
638                 data_loc => $path,
639                 file_size_bytes => $file_size
640                 );
641               
642    my $sql = SQL::Abstract->new;
643    my $tableName;
644    if ($path =~ /\_R/ ) {
645        $tableName = 'cals';
646    }
647    else {
648        $tableName = 'observations';
649    }   
650    my($stmt, @bind) = $sql->insert($tableName, \%data);
651
652    my $sth = $db_handle->prepare($stmt);
653    $sth->execute(@bind);
654
655
656
657
658#    if ($path =~ /\_R/ ) {
659#       print "Inserting cal file $filename\n";
660#       
661#       $sql = qq{ INSERT INTO cals
662#                      (filename, src_name, project_id,
663#                       raj, decj, data_type, obsfreq, bw,
664#                       scanlen, date, ut, MJD,
665#                       rajd, decjd, gl, gb, BMAJ, BMIN, BPA,
666#                       dm, period, nchan, npol, nbin, nsub,
667#                       tsamp, nbits, nbeam,
668#                       cnfg, inst, rcvr, hdrver, survey,
669#                       telescope, site, obsrvr,
670#                       data_loc, file_size_bytes
671#                       )
672#                     
673#                      VALUES
674#                      ( ?, ?, ?
675#                        ?, ?, ?, CAST(? AS DECIMAL), CAST(? AS DECIMAL),
676#                        CAST(? AS DECIMAL), ?, ?, CAST(? AS DECIMAL),
677#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL),
678#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
679#                        CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
680#                        ?, ?, ?, ?, ?,
681#                        ?, ?, ?,
682#                        ?, CAST(? AS UNSIGNED)
683#                        )
684##                     ( ?, ?, ?
685##                       ?, ?, ?, ?, ?,
686##                       ?, ?, ?, ?,
687##                       ?, ?, ?, ?, ?, ?, ?,
688##                       ?, ?, ?, ?, ?, ?,
689##                       ?, ?, ?,
690##                       ?, ?, ?, ?, ?,
691##                       ?, ?, ?,
692##                       ?, ?,
693##                       )
694#                  };
695#    }
696#    else {
697#       
698#       print "Inserting obs file $filename\n";
699#       
700#       $sql = qq{ INSERT INTO observations
701#                      (filename, src_name, project_id,
702#                       raj, decj, data_type, obsfreq, bw,
703#                       scanlen, date, ut, MJD,
704#                       rajd, decjd, gl, gb, BMAJ, BMIN, BPA,
705#                       dm, period, nchan, npol, nbin, nsub,
706#                       tsamp, nbits, nbeam,
707#                       cnfg, inst, rcvr, hdrver, survey,
708#                       telescope, site, obsrvr,
709#                       data_loc, file_size_bytes
710#                       )
711#                     
712#                      VALUES
713#                      ( ?, ?, ?
714#                        ?, ?, ?, CAST(? AS DECIMAL), CAST(? AS DECIMAL),
715#                        CAST(? AS DECIMAL), ?, ?, CAST(? AS DECIMAL),
716#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS DECIMAL),
717#                        CAST(? AS DECIMAL), CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
718#                        CAST(? AS DECIMAL), CAST(? AS UNSIGNED), CAST(? AS UNSIGNED),
719#                        ?, ?, ?, ?, ?,
720#                        ?, ?, ?,
721#                        ?, CAST(? AS UNSIGNED)
722#                        )
723##                     ( ?, ?, ?
724##                       ?, ?, ?, ?, ?,
725##                       ?, ?, ?, ?,
726##                       ?, ?, ?, ?, ?, ?, ?,
727##                       ?, ?, ?, ?, ?, ?,
728##                       ?, ?, ?,
729##                       ?, ?, ?, ?, ?,
730##                       ?, ?, ?,
731##                       ?, ?,
732##                       )
733#                  };
734#    }
735#       my $sth = $db_handle->prepare( $sql );
736##DEBUGGING     
737##    my $sth;
738#   
739#    #for( @records ) {
740#    eval {
741#       
742##      $sth->bind_param( 1,  $filename);
743##      $sth->bind_param( 2,  $results[$params_NAME]);
744##      $sth->bind_param( 3,  $results[$params_PROJID]);
745##      $sth->bind_param( 4,  $results[$params_RAJ]);
746##      $sth->bind_param( 5,  $results[$params_DECJ]);
747##      $sth->bind_param( 6,  $data_type);
748##      $sth->bind_param( 7,  $results[$params_FREQ]);
749##      $sth->bind_param( 8,  $results[$params_BW]);
750##      $sth->bind_param( 9,  $results[$params_LENGTH]);
751##      $sth->bind_param( 10, $results[$params_DATE]);
752##      $sth->bind_param( 11, $results[$params_TIME]);
753##      $sth->bind_param( 12, $results[$params_MJD]);
754##      $sth->bind_param( 13, $rajd);
755##      $sth->bind_param( 14, $decjd);
756##      $sth->bind_param( 15, $gl);
757##      $sth->bind_param( 16, $gb);
758##      $sth->bind_param( 17, $bmaj);
759##      $sth->bind_param( 18, $bmin);
760##      $sth->bind_param( 19, $bpa);
761##      $sth->bind_param( 20, $results[$params_DM]);
762##      $sth->bind_param( 21, $results[$params_PERIOD]);
763##      $sth->bind_param( 22, $results[$params_NCHAN]);
764##      $sth->bind_param( 23, $results[$params_NPOL]);
765##      $sth->bind_param( 24, $results[$params_NBIN]);
766##      $sth->bind_param( 25, $results[$params_NSUB]);
767##      $sth->bind_param( 26, $results[$params_TSAMP]);
768##      $sth->bind_param( 27, $results[$params_NBITS]);
769##      $sth->bind_param( 28, $nbeam);
770##      $sth->bind_param( 29, $results[$params_CNFG]);
771##      $sth->bind_param( 30, $results[$params_INST]);
772##      $sth->bind_param( 31, $results[$params_RCVR]);
773##      $sth->bind_param( 32, $results[$params_HDRVER]);
774##      $sth->bind_param( 33, $survey);
775##      $sth->bind_param( 34, $results[$params_TELESCOP]);
776##      $sth->bind_param( 35, $results[$params_SITE]);
777##      $sth->bind_param( 36, $results[$params_OBSRVR]);
778##      $sth->bind_param( 37, $path);
779##      $sth->bind_param( 38, $file_size);
780#
781##      $sth->bind_param( 1,  $filename,                  {TYPE => SQL_VARCHAR } );
782##      $sth->bind_param( 2,  $results[$params_NAME],     {TYPE => SQL_VARCHAR } );
783##      $sth->bind_param( 3,  $results[$params_PROJID],   {TYPE => SQL_VARCHAR } );
784##      $sth->bind_param( 4,  $results[$params_RAJ],      {TYPE => SQL_VARCHAR } );
785##      $sth->bind_param( 5,  $results[$params_DECJ],     {TYPE => SQL_VARCHAR } );
786##      $sth->bind_param( 6,  $data_type,                 {TYPE => SQL_VARCHAR } );
787##      $sth->bind_param( 7,  $results[$params_FREQ],     {TYPE => SQL_DOUBLE } );
788##      $sth->bind_param( 8,  $results[$params_BW],       {TYPE => SQL_DOUBLE } );
789##      $sth->bind_param( 9,  $results[$params_LENGTH],   {TYPE => SQL_DOUBLE } );
790##      $sth->bind_param( 10, $results[$params_DATE],     {TYPE => SQL_VARCHAR } );
791##      $sth->bind_param( 11, $results[$params_TIME],     {TYPE => SQL_VARCHAR } );
792##      $sth->bind_param( 12, $results[$params_MJD],      {TYPE => SQL_DOUBLE } );
793##      $sth->bind_param( 13, $rajd,                      {TYPE => SQL_DOUBLE } );
794##      $sth->bind_param( 14, $decjd,                     {TYPE => SQL_DOUBLE } );
795##      $sth->bind_param( 15, $gl,                        {TYPE => SQL_DOUBLE } );
796##      $sth->bind_param( 16, $gb,                        {TYPE => SQL_DOUBLE } );
797##      $sth->bind_param( 17, $bmaj,                      {TYPE => SQL_FLOAT } );
798##      $sth->bind_param( 18, $bmin,                      {TYPE => SQL_FLOAT } );
799##      $sth->bind_param( 19, $bpa,                       {TYPE => SQL_FLOAT } );
800##      $sth->bind_param( 20, $results[$params_DM],       {TYPE => SQL_DOUBLE } );
801##      $sth->bind_param( 21, $results[$params_PERIOD],   {TYPE => SQL_DOUBLE } );   
802##      $sth->bind_param( 22, $results[$params_NCHAN],    {TYPE => SQL_INTEGER } );
803##      $sth->bind_param( 23, $results[$params_NPOL],     {TYPE => SQL_INTEGER } );
804##      $sth->bind_param( 24, $results[$params_NBIN],     {TYPE => SQL_INTEGER } );   
805##      $sth->bind_param( 25, $results[$params_NSUB],     {TYPE => SQL_INTEGER } );
806##      $sth->bind_param( 26, $results[$params_TSAMP],    {TYPE => SQL_FLOAT } );
807##      $sth->bind_param( 27, $results[$params_NBITS],    {TYPE => SQL_TINYINT } );
808##      $sth->bind_param( 28, $nbeam,                     {TYPE => SQL_TINYINT } );
809##      $sth->bind_param( 29, $results[$params_CNFG],     {TYPE => SQL_VARCHAR } );
810##      $sth->bind_param( 30, $results[$params_INST],     {TYPE => SQL_VARCHAR } );
811##      $sth->bind_param( 31, $results[$params_RCVR],     {TYPE => SQL_VARCHAR } );
812##      $sth->bind_param( 32, $results[$params_HDRVER],   {TYPE => SQL_VARCHAR } );
813##      $sth->bind_param( 33, $survey,                    {TYPE => SQL_VARCHAR } );
814##      $sth->bind_param( 34, $results[$params_TELESCOP], {TYPE => SQL_VARCHAR } );
815##      $sth->bind_param( 35, $results[$params_SITE],     {TYPE => SQL_VARCHAR } );
816##      $sth->bind_param( 36, $results[$params_OBSRVR],   {TYPE => SQL_VARCHAR } );
817##      $sth->bind_param( 37, $path,                      {TYPE => SQL_VARCHAR } );
818##      $sth->bind_param( 38, $file_size,                 {TYPE => SQL_INTEGER } );
819#
820#       $sth->execute();
821##DEBUGGING
822##      DBI::dump_results($sth);
823##      $rows = $sth->dump_results();
824##      print "$rows\n";
825#       print "$sth\n";
826#
827#    # $db_handle->commit(); # Autocommit is already ON so no need for a manual commit
828#    };
829
830    if( $@ ) {
831        print "warning statement\n";
832        warn "Database error: $DBI::errstr\n";
833        $db_handle->rollback(); #just die if rollback is failing
834    }
835       
836       
837#DEBUGGING
838    $sth->finish();
839
840}
841
842#######################################################
843
844sub isDuplicate {
845
846#DEBUGGING
847#    return 0;
848
849    my $filename = $_[0];
850    my $search_data_loc;
851    my $search_filename;
852    my $pulsar_name;
853   
854    if ( $filename =~ m/(.*([J,B]{0,1}[0-9]{4}[+-][0-9]{1,4}.*))[\/]+([a-z][0-9]{6}_[0-9]{6}\.([a-z]{2,}))$/i ) {
855        $search_data_loc = $1;
856        $search_filename = $3;
857        $pulsar_name = $2;
858    }
859    elsif ($filename =~ m/(.*([J,B]{0,1}[0-9]{4}[+-][0-9]{1,4}.*))[\/]+([0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}:[0-9]{2}:[0-9]{2})$/i ) {
860        $search_data_loc = $1;
861        $search_filename = $3;
862        $pulsar_name = $2;
863    }
864    else {
865        print "$filename is an oddball -- not doing...\n";
866        return 1;
867    }
868   
869    my $sql;
870    if ($pulsar_name =~ m/.*_R$/) {
871       
872        $sql = qq{ SELECT * from cals
873                       WHERE data_loc LIKE ? AND
874                       filename    = ?
875                   };
876    }
877    else {
878        print "This is an observation\n";
879        $sql = qq{ SELECT * from observations
880                       WHERE data_loc LIKE ? AND
881                       filename    = ?
882                   };
883    }
884   
885    my $sth = $db_handle->prepare( $sql );
886   
887    eval {
888        print "like $search_data_loc and filename = $search_filename \n";
889       
890        $sth->bind_param( 1, $search_data_loc, SQL_VARCHAR );
891        $sth->bind_param( 2, $search_filename, SQL_VARCHAR );
892        $sth->execute();
893       
894    };
895   
896    if( $@ ) {
897        warn "Database error: $DBI::errstr\n";
898        $db_handle->rollback(); #just die if rollback is failing
899    }
900   
901    # read the records
902    my @data = $sth->fetchrow_array();
903   
904    $sth->finish();
905   
906    if (scalar(@data) > 0) {
907        return 1;
908    }
909   
910    return 0;
911
912}
913#######################################################
914
915# Removes
916sub deleteStaleRecords {
917
918    my $sql, my $sth, my @data, my $file;
919   
920    $sql = qq{ SELECT data_loc, filename from cals };
921    $sth = $db_handle->prepare( $sql );
922   
923    eval {
924       
925        $sth->execute();
926       
927    };
928   
929    if( $@ ) {
930        warn "Database error: $DBI::errstr\n";
931        $db_handle->rollback(); #just die if rollback is failing
932    }
933   
934    my @cals2delete;
935   
936    while (@data = $sth->fetchrow_array()) {
937       
938        if (scalar(@data) == 2) {
939            $file = $data[0]."/".$data[1];
940           
941            if (!-e $file) {
942                print "$file does NOT exist\n\n";
943               
944                # Push the row into the cals to delete
945                # So array will look like:
946                # {/full/path/to/dir, filename.cf, /full/path/to/dir, filename2.cf, etc..}
947                push (@cals2delete, @data);
948            }
949        }
950       
951    }
952   
953    $sql = qq{ SELECT data_loc, filename from observations };
954   
955    $sth = $db_handle->prepare( $sql );
956    eval {
957       
958        $sth->execute();
959       
960    };
961   
962    if( $@ ) {
963        warn "Database error: $DBI::errstr\n";
964        $db_handle->rollback(); #just die if rollback is failing
965    }
966   
967    my @obs2delete;
968   
969    while (@data = $sth->fetchrow_array()) {
970       
971        if (scalar(@data) == 2) {
972            $file = $data[0]."/".$data[1];
973           
974            if (!-e $file) {
975                print "$file does NOT exist\n\n";
976               
977                # Push the row into the cals to delete
978                # So array will look like:
979                # {/full/path/to/dir, filename.cf, /full/path/to/dir, filename2.cf, etc..}
980                push (@obs2delete, @data);
981            }
982        }                 
983    }
984   
985    print "There are " . scalar(@cals2delete)/2 . " stale cals and " .
986        scalar(@obs2delete)/2 . " stale obs to delete\n";
987   
988    # Delete the rows
989    my $data_loc, my $filename;
990   
991    while (@cals2delete) {
992        $data_loc = shift @cals2delete;
993        $filename = shift @cals2delete;
994       
995        $sql = qq{ DELETE FROM cals WHERE data_loc = ? AND filename = ?};
996       
997        $sth = $db_handle->prepare( $sql );
998        eval {
999           
1000            $sth->bind_param( 1, $data_loc, SQL_VARCHAR );
1001            $sth->bind_param( 2, $filename, SQL_VARCHAR );
1002            $sth->execute();
1003           
1004        };
1005       
1006        if( $@ ) {
1007            warn "Database error: $DBI::errstr\n";
1008            $db_handle->rollback(); #just die if rollback is failing
1009        }
1010        else {
1011            print "Successfully deleted $data_loc/$filename\n";
1012        }
1013       
1014    }
1015   
1016    while (@obs2delete) {
1017        $data_loc = shift @obs2delete;
1018        $filename = shift @obs2delete;
1019       
1020        $sql = qq{ DELETE FROM observations WHERE data_loc = ? AND filename = ?};
1021       
1022        $sth = $db_handle->prepare( $sql );
1023       
1024        eval {
1025           
1026            $sth->bind_param( 1, $data_loc, SQL_VARCHAR );
1027            $sth->bind_param( 2, $filename, SQL_VARCHAR );
1028            $sth->execute();
1029           
1030        };
1031       
1032        if( $@ ) {
1033            warn "Database error: $DBI::errstr\n";
1034            $db_handle->rollback(); #just die if rollback is failing
1035        }
1036        else {
1037            print "Successfully deleted $data_loc/$filename\n";
1038        }
1039    }
1040   
1041    $sth->finish();
1042   
1043}
1044
1045
1046
1047#######################################################
1048
1049sub connectdb {
1050       
1051    # Connect to the MySQL server
1052    my $dbh = DBI->connect("dbi:mysql:database=psrchive;host=localhost", "psrdba", "lighthouse")
1053        or die "Couldn't connect to database: $DBI::errstr\n";
1054   
1055    print "Successfully connected to db\n" if $dbh;
1056    return $dbh;
1057
1058# DEBUGGING     
1059#    print "We would normally connect to the DB here\n";
1060#    return 1;
1061}
Note: See TracBrowser for help on using the repository browser.