fix default spot qrq dupe granularity
[spider.git] / perl / Spot.pm
index ebc5d92c18279db5e0d7ecdf7ae9405567f05872..1dbd96f088bad8c9f84cb138c05c4a55f485a7fb 100644 (file)
@@ -18,6 +18,9 @@ use Prefix;
 use DXDupe;
 use Data::Dumper;
 use QSL;
+use DXSql;
+use Time::HiRes qw(gettimeofday tv_interval);
+use Math::Round qw(nearest nearest_floor);
 
 use strict;
 
@@ -34,27 +37,53 @@ $duplth = 20;                                       # the length of text to use in the deduping
 $dupage = 1*3600;               # the length of time to hold spot dups
 $maxcalllth = 12;                               # the max length of call to take into account for dupes
 $filterdef = bless ([
-                         # tag, sort, field, priv, special parser 
-                         ['freq', 'r', 0, 0, \&decodefreq],
-                         ['on', 'r', 0, 0, \&decodefreq],
-                         ['call', 'c', 1],
-                         ['info', 't', 3],
-                         ['by', 'c', 4],
-                         ['call_dxcc', 'nc', 5],
-                         ['by_dxcc', 'nc', 6],
-                         ['origin', 'c', 7, 9],
-                         ['call_itu', 'ni', 8],
-                         ['call_zone', 'nz', 9],
-                         ['by_itu', 'ni', 10],
-                         ['by_zone', 'nz', 11],
-                         ['call_state', 'ns', 12],
-                         ['by_state', 'ns', 13],
-                         ['channel', 'c', 14],
-                                        
-                        ], 'Filter::Cmd');
+                                        # tag, sort, field, priv, special parser 
+                                        ['freq', 'r', 0, 0, \&decodefreq],
+                                        ['on', 'r', 0, 0, \&decodefreq],
+                                        ['call', 'c', 1],
+                                        ['info', 't', 3],
+                                        ['spotter', 'c', 4],
+                                        ['by', 'c', 4],
+                                        ['dxcc', 'nc', 5],
+                                        ['call_dxcc', 'nc', 5],
+                                        ['by_dxcc', 'nc', 6],
+                                        ['origin', 'c', 7, 9],
+                                        ['call_itu', 'ni', 8],
+                                        ['itu', 'ni', 8],
+                                        ['call_zone', 'nz', 9],
+                                        ['cq', 'nz', 9],
+                                        ['zone', 'nz', 9],
+                                        ['by_itu', 'ni', 10],
+                                        ['byitu', 'ni', 10],
+                                        ['by_zone', 'nz', 11],
+                                        ['byzone', 'nz', 11],
+                                        ['bycq', 'nz', 11],
+                                        ['call_state', 'ns', 12],
+                                        ['state', 'ns', 12],
+                                        ['by_state', 'ns', 13],
+                                        ['bystate', 'ns', 13],
+                                        ['ip', 'c', 14],
+#                                       ['channel', 'c', 15],
+#                                       ['rbn', 'a', 4, 0, \&filterrbnspot],
+                                       ], 'Filter::Cmd');
 $totalspots = $hfspots = $vhfspots = 0;
 $use_db_for_search = 0;
 
+our %spotcache;                                        # the cache of data within the last $spotcachedays 0 or 2+ days
+our $spotcachedays = 2;                        # default 2 days worth
+our $minselfspotqrg = 1240000; # minimum freq above which self spotting is allowed
+
+our $readback = $main::is_win ? 0 : 1; # don't read spot files backwards if it's windows
+our $qrggranularity = 1000;    # normalise the qrg to this number of hz (default: 100khz), so tough luck if you have a fumble fingers moment
+our $timegranularity = 600;            # ditto to the nearest 100 seconds 
+our $oldstyle = 0;                             # revert to traditional dupe key format
+
+
+if ($readback) {
+       $readback = `which tac`;
+       chomp $readback;
+}
+
 # create a Spot Object
 sub new
 {
@@ -73,7 +102,7 @@ sub decodefreq
        
        foreach $f (@f) {
                my ($a, $b); 
-               if (m{^\d+/\d+$}) {
+               if ($f =~ m{^\d+/\d+$}) {
                        push @out, $f;
                } elsif (($a, $b) = $f =~ m{^(\w+)(?:/(\w+))?$}) {
                        $b = lc $b if $b;
@@ -94,11 +123,19 @@ sub decodefreq
        return (0, join(',', @out));                     
 }
 
+# filter setup for rbn spot so return the regex to detect it
+sub filterrbnspot
+{
+       my $dxchan = shift;
+       return ('-#$');
+}
+
 sub init
 {
        mkdir "$dirprefix", 0777 if !-e "$dirprefix";
        $fp = DXLog::new($dirprefix, "dat", 'd');
        $statp = DXLog::new($dirprefix, "dys", 'd');
+       my $today = Julian::Day->new(time);
 
        # load up any old spots 
        if ($main::dbh) {
@@ -109,7 +146,6 @@ sub init
                        $main::dbh->spot_create_table;
                        
                        my $now = Julian::Day->alloc(1995, 0);
-                       my $today = Julian::Day->new(time);
                        my $sth = $main::dbh->spot_insert_prepare;
                        while ($now->cmp($today) <= 0) {
                                my $fh = $fp->open($now);
@@ -152,6 +188,39 @@ sub init
                        dbg("added ipaddr field to spot table");
                }
        }
+
+       # initialise the cache if required
+       if ($spotcachedays > 0) {
+               my $t0 = [gettimeofday];
+               $spotcachedays = 2 if $spotcachedays < 2;
+               dbg "Spot::init - reading in $spotcachedays days of spots into cache"; 
+               for (my $i = 0; $i < $spotcachedays; ++$i) {
+                       my $now = $today->sub($i);
+                       my $fh = $fp->open($now);
+                       if ($fh) {
+                               my @in;
+                               my $rec;
+                               for ($rec = 0; <$fh>; ++$rec) {
+                                       chomp;
+                                       my @s = split /\^/;
+                                       if (@s < 14) {
+                                               my @a = (Prefix::cty_data($s[1]))[1..3];
+                                               my @b = (Prefix::cty_data($s[4]))[1..3];
+                                               push @s, $b[1] if @s < 7;
+                                               push @s, '' if @s < 8;
+                                               push @s, @a[0,1], @b[0,1] if @s < 12;
+                                               push @s,  $a[2], $b[2] if @s < 14;
+                                       }
+                                       unshift @in, \@s; 
+                               }
+                               $fh->close;
+                               dbg("Spot::init read $rec spots from " . _cachek($now));
+                               $spotcache{_cachek($now)} = \@in;
+                       }
+                       $now->add(1);
+               }
+               dbg("Spot::init $spotcachedays files of spots read into cache in " . _diffms($t0) . "mS")
+       }
 }
 
 sub prefix
@@ -160,6 +229,8 @@ sub prefix
 }
 
 # fix up the full spot data from the basic spot data
+# input is
+# freq, call, time, comment, spotter, origin[, ip_address]
 sub prepare
 {
        # $freq, $call, $t, $comment, $spotter, node, ip address = @_
@@ -171,10 +242,9 @@ sub prepare
        # remove ssids and /xxx if present on spotter
        $out[4] =~ s/-\d+$//o;
 
-       # remove leading and trailing spaces
+       # remove leading and trailing spaces from comment field
        $out[3] = unpad($out[3]);
        
-       
        # add the 'dxcc' country on the end for both spotted and spotter, then the cluster call
        my @spd = Prefix::cty_data($out[1]);
        push @out, $spd[0];
@@ -185,7 +255,7 @@ sub prepare
        push @out, $_[6] if $_[6] && is_ipaddr($_[6]);
 
        # thus we now have:
-       # freq, call, time, comment, spotter, call country code, call itu, call cqzone, spotter country code, spotter itu, spotter cqzone, call state, spotter state, node, spotter ip address
+       # freq, call, time, comment, spotter, call country code, spotter country code, origin, call itu, call cqzone, spotter itu, spotter cqzone, call state, spotter state, spotter ip address
        return @out;
 }
 
@@ -193,6 +263,12 @@ sub add
 {
        my $buf = join('^', @_);
        $fp->writeunix($_[2], $buf);
+       if ($spotcachedays > 0) {
+               my $now = Julian::Day->new($_[2]);
+               my $day = _cachek($now);
+               my $r = (exists $spotcache{$day}) ? $spotcache{$day} : ($spotcache{$day} = []);
+               unshift @$r, \@_;
+       }
        if ($main::dbh) {
                $main::dbh->begin_work;
                $main::dbh->spot_insert(\@_);
@@ -224,7 +300,13 @@ sub add
 #   $f5 = spotted dxcc country
 #   $f6 = spotter dxcc country
 #   $f7 = origin
-#
+#   $f8 = spotted itu
+#   $f9 = spotted cq zone
+#   $f10 = spotter itu
+#   $f11 = spotter cq zone
+#   $f12 = spotted us state
+#   $f13 = spotter us state
+#   $f14 = ip address
 #
 # In addition you can specify a range of days, this means that it will start searching
 # from <n> days less than today to <m> days less than today
@@ -237,8 +319,7 @@ sub add
 
 sub search
 {
-       my ($expr, $dayfrom, $dayto, $from, $to, $hint, $dxchan) = @_;
-       my $eval;
+       my ($expr, $dayfrom, $dayto, $from, $to, $hint, $dofilter, $dxchan) = @_;
        my @out;
        my $ref;
        my $i;
@@ -260,70 +341,91 @@ sub search
        $to = $from + $maxspots if $to - $from > $maxspots || $to - $from <= 0;
 
        if ($main::dbh && $use_db_for_search) {
-               return $main::dbh->spot_search($expr, $dayfrom, $dayto, $to-$from, $dxchan);
+               return $main::dbh->spot_search($expr, $dayfrom, $dayto, $from, $to, $hint, $dofilter, $dxchan);
        }
 
-       $expr =~ s/\$f(\d\d?)/\$ref->[$1]/g; # swap the letter n for the correct field name
+       #       $expr =~ s/\$f(\d\d?)/\$ref->[$1]/g; # swap the letter n for the correct field name
        #  $expr =~ s/\$f(\d)/\$spots[$1]/g;               # swap the letter n for the correct field name
   
-       my $checkfilter;
-       $checkfilter = qq (
-                      if (\@s < 9) {
-                          my \@a = (Prefix::cty_data(\$s[1]))[1..3];
-                          my \@b = (Prefix::cty_data(\$s[4]))[1..3];
-                          push \@s, \@a[0,1], \@b[0,1], \$a[2], \$a[2];  
-                      } else {
-                          \$s[12] ||= ' ';
-                          \$s[13] ||= ' ';
-                      }
-                         my (\$filter, \$hops) = \$dxchan->{spotsfilter}->it(\@s);
-                         next unless (\$filter);
-                      ) if $dxchan;
-       $checkfilter ||= ' ';
-       
-       dbg("hint='$hint', expr='$expr', spotno=$from-$to, day=$dayfrom-$dayto\n") if isdbg('search');
+
+       dbg("Spot::search hint='$hint', expr='$expr', spotno=$from-$to, day=$dayfrom-$dayto\n") if isdbg('search');
   
        # build up eval to execute
-       $eval = qq(
-                          while (<\$fh>) {
-                                  $hint;
-                                  chomp;
-                                  my \@s = split /\\^/;
-                   $checkfilter;
-                   push \@spots, \\\@s;
-                          }
-                          my \$c;
-                          my \$ref;
-                          for (\$c = \$#spots; \$c >= 0; \$c--) {
-                                       \$ref = \$spots[\$c];
-                                       if ($expr) {
-                                               \$count++;
-                                               next if \$count < \$from; # wait until from 
-                                               push(\@out, \$ref);
-                                               last if \$count >= \$to; # stop after to
-                                       }
-                               }
-                         );
-    
-       dbg("Spot eval: $eval") if isdbg('searcheval');
+
+       dbg("Spot::search Spot eval: $expr") if isdbg('searcheval');
+       $expr =~ s/\$r/\$_[0]/g;
+       my $eval = qq{ sub { return $expr; } };
+       dbg("Spot::search Spot eval: $eval") if isdbg('searcheval');
+       my $ecode = eval $eval;
+       return ("Spot search error", $@) if $@;
+       
+       my $fh;
+       my $now = $fromdate;
+       my $today = Julian::Day->new($main::systime);
        
+       for ($i = $count = 0; $count < $to && $i < $maxdays; ++$i) { # look thru $maxdays worth of files only
+               last if $now->cmp($todate) <= 0;
 
-       $fp->close;                                     # close any open files
 
-       for ($i = $count = 0; $i < $maxdays; ++$i) {    # look thru $maxdays worth of files only
-               my $now = $fromdate->sub($i); # but you can pick which $maxdays worth
-               last if $now->cmp($todate) <= 0;         
-       
-               my @spots = ();
-               my $fh = $fp->open($now); # get the next file
-               if ($fh) {
-                       my $in;
-                       eval $eval;                     # do the search on this file
-                       last if $count >= $to; # stop after to
-                       return ("Spot search error", $@) if $@;
+               my $this = $now->sub($i);
+               my $fn = $fp->fn($this);
+               my $cachekey = _cachek($this); 
+               my $rec = 0;
+
+               if ($spotcachedays > 0 && $spotcache{$cachekey}) {
+                       foreach my $r (@{$spotcache{$cachekey}}) {
+                               ++$rec;
+                               if ($dofilter && $dxchan && $dxchan->{spotsfilter}) {
+                                       my ($gotone, undef) = $dxchan->{spotsfilter}->it(@$r);
+                                       next unless $gotone;
+                               }
+                               if (&$ecode($r)) {
+                                       ++$count;
+                                       next if $count < $from;
+                                       push @out, $r;
+                                       last if $count >= $to;
+                               }
+                       }
+                       dbg("Spot::search cache recs read: $rec") if isdbg('search');
+               } else {
+                       if ($readback) {
+                               dbg("Spot::search search using tac fn: $fn $i") if isdbg('search');
+                               $fh = IO::File->new("$readback $fn |");
+                       }
+                       else {
+                               dbg("Spot::search search fn: $fp->{fn} $i") if isdbg('search');
+                               $fh = $fp->open($now->sub($i)); # get the next file
+                       }
+                       if ($fh) {
+                               my $in;
+                               while (<$fh>) {
+                                       chomp;
+                                       my @r = split /\^/;
+                                       ++$rec;
+                                       if ($dofilter && $dxchan && $dxchan->{spotsfilter}) {
+                                               my ($gotone, undef) = $dxchan->{spotsfilter}->it(@r);
+                                               next unless $gotone;
+                                       }
+                                       if (&$ecode(\@r)) {
+                                               ++$count;
+                                               next if $count < $from;
+                                               if ($readback) {
+                                                       push @out, \@r;
+                                                       last if $count >= $to;
+                                               } else {
+                                                       push @out, \@r;
+                                                       shift @out if $count >= $to;
+                                               }
+                                       }
+                               }
+                               dbg("Spot::search file recs read: $rec") if isdbg('search');
+                               last if $count >= $to; # stop after to
+                       }
                }
        }
+       return ("Spot search error", $@) if $@;
 
+       @out = sort {$b->[2] <=> $a->[2]} @out if @out;
        return @out;
 }
 
@@ -359,76 +461,80 @@ sub ftor
 # format a spot for user output in list mode
 sub formatl
 {
-       my $t = ztime($_[2]);
-       my $d = cldate($_[2]);
-       return sprintf "%8.1f  %-11s %s %s  %-28.28s%7s>", $_[0], $_[1], $d, $t, ($_[3]||''), "<$_[4]" ;
-}
-
-#
-# return all the spots from a day's file as an array of references
-# the parameter passed is a julian day
-sub readfile($)
-{
-       my @spots;
+       my $t = ztime($_[3]);
+       my $d = cldate($_[3]);
+       my $spotter = "<$_[5]>";
+       my $comment = $_[4] || '';
+       $comment =~ s/\t+/ /g;
+       my $cl = length $comment;
+       my $s = sprintf "%9.1f %-11s %s %s", $_[1], $_[2], $d, $t;
+       my $width = ($_[0] ? $_[0] : 80) - length($spotter) - length($s) - 4;
        
-       my $fh = $fp->open(shift); 
-       if ($fh) {
-               my $in;
-               while (<$fh>) {
-                       chomp;
-                       push @spots, [ split '\^' ];
-               }
-       }
-       return @spots;
+       $comment = substr $comment, 0, $width if $cl > $width;
+       $comment .= ' ' x ($width-$cl) if $cl < $width;
+
+#      return sprintf "%8.1f  %-11s %s %s  %-28.28s%7s>", $_[0], $_[1], $d, $t, ($_[3]||''), "<$_[4]" ;
+       return "$s $comment$spotter";
 }
 
 # enter the spot for dup checking and return true if it is already a dup
 sub dup
 {
-       my ($freq, $call, $d, $text, $by, $cty) = @_; 
+       my ($freq, $call, $d, $text, $by, $node, $just_find) = @_;
+
+       dbg("Spot::dup: freq=$freq call=$call d=$d text='$text' by=$by node=$node" . ($just_find ? " jf=$just_find" : "")) if isdbg('spotdup');
 
        # dump if too old
        return 2 if $d < $main::systime - $dupage;
-       
+
        # turn the time into minutes (should be already but...)
        $d = int ($d / 60);
        $d *= 60;
 
+       my $nd = nearest($timegranularity, $d);
+
        # remove SSID or area
        $by =~ s|[-/]\d+$||;
        
 #      $freq = sprintf "%.1f", $freq;       # normalise frequency
        $freq = int $freq;       # normalise frequency
+
+       my $qrg = nearest($qrggranularity, $freq); # to the nearest however many hz
+       
        $call = substr($call, 0, $maxcalllth) if length $call > $maxcalllth;
 
+       
        chomp $text;
        $text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg;
        $text = uc unpad($text);
-       if ($cty && $text && length $text <= 4) {
-               unless ($text =~ /^C?Q/ || $text =~ /^[\d\W]+$/) {
-                       my @try = Prefix::cty_data($text);
-                       $text = "" if $cty == $try[0];
-               }
-       }
        my $otext = $text;
 #      $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1);
        $text =~ s/^\+\w+\s*//;                 # remove leading LoTW callsign
        $text =~ s/\s{2,}[\dA-Z]?[A-Z]\d?$// if length $text > 24;
        $text =~ s/[\W\x00-\x2F\x7B-\xFF]//g; # tautology, just to make quite sure!
        $text = substr($text, 0, $duplth) if length $text > $duplth; 
-       my $ldupkey = "X$freq|$call|$by|$text";
+       my $ldupkey = $oldstyle ? "X|$call|$by|$node|$freq|$d|$text" : "X|$call|$by|$node|$qrg|$nd|$text";
+
+       dbg("Spot::dup ldupkey $ldupkey") if isdbg('spotdup');
+       
        my $t = DXDupe::find($ldupkey);
        return 1 if $t && $t - $main::systime > 0;
-       DXDupe::add($ldupkey, $main::systime+$dupage);
+       
+       DXDupe::add($ldupkey, $main::systime+$dupage) unless $just_find;
        $otext = substr($otext, 0, $duplth) if length $otext > $duplth; 
        $otext =~ s/\s+$//;
        if (length $otext && $otext ne $text) {
-               $ldupkey = "X$freq|$call|$by|$otext";
+               $ldupkey = $oldstyle ? "X|$freq|$call|$by|$otext" : "X|$qrg|$call|$by|$otext";
                $t = DXDupe::find($ldupkey);
                return 1 if $t && $t - $main::systime > 0;
-               DXDupe::add($ldupkey, $main::systime+$dupage);
+               DXDupe::add($ldupkey, $main::systime+$dupage) unless $just_find;
        }
-       return 0;
+       return undef;
+}
+
+sub dup_find
+{
+       return dup(@_, 1);
 }
 
 sub listdups
@@ -436,11 +542,11 @@ sub listdups
        return DXDupe::listdups('X', $dupage, @_);
 }
 
-sub genstats($)
+sub genstats
 {
        my $date = shift;
-       my $in = $fp->open($date);
-       my $out = $statp->open($date, 'w');
+       my $in = $fp->open($date) or dbg("Spot::genstats: Cannot open " . $fp->fn($date) . " $!");
+       my $out = $statp->open($date, 'w') or dbg("Spot::genstats: Cannot open " . $statp->fn($date) . " $!");
        my @freq;
        my %list;
        my @tot;
@@ -483,7 +589,7 @@ sub genstats($)
 }
 
 # return true if the stat file is newer than than the spot file
-sub checkstats($)
+sub checkstats
 {
        my $date = shift;
        my $in = $fp->mtime($date);
@@ -496,6 +602,26 @@ sub daily
 {
        my $date = Julian::Day->new($main::systime)->sub(1);
        genstats($date) unless checkstats($date);
+       clean_cache();
+}
+
+sub _cachek
+{
+       return "$_[0]->[0]|$_[0]->[1]";
+}
+
+sub clean_cache
+{
+       if ($spotcachedays > 0) {
+               my $now = Julian::Day->new($main::systime);
+               for (my $i = $spotcachedays; $i < $spotcachedays + 5; ++$i ) {
+                       my $k = _cachek($now->sub($i));
+                       if (exists $spotcache{$k}) {
+                               dbg("Spot::spotcache deleting day $k, more than $spotcachedays days old");
+                               delete $spotcache{$k};
+                       }
+               }
+       }
 }
 1;