experimental branch to improve spot query
[spider.git] / perl / Spot.pm
1 #
2 # the dx spot handler
3 #
4 # Copyright (c) - 1998 Dirk Koopman G1TLH
5 #
6 #
7 #
8
9 package Spot;
10
11 use IO::File;
12 use DXVars;
13 use DXDebug;
14 use DXUtil;
15 use DXLog;
16 use Julian;
17 use Prefix;
18 use DXDupe;
19 use Data::Dumper;
20 use QSL;
21 use DXSql;
22 use Julian;
23
24 use strict;
25
26 use vars qw($fp $statp $maxspots $defaultspots $maxdays $dirprefix $duplth $dupage $filterdef
27                         $totalspots $hfspots $vhfspots $maxcalllth $can_encode $use_db_for_search);
28
29 $fp = undef;
30 $statp = undef;
31 $maxspots = 100;                                        # maximum spots to return
32 $defaultspots = 10;                             # normal number of spots to return
33 $maxdays = 100;                         # normal maximum no of days to go back
34 $dirprefix = "spots";
35 $duplth = 20;                                   # the length of text to use in the deduping
36 $dupage = 1*3600;               # the length of time to hold spot dups
37 $maxcalllth = 12;                               # the max length of call to take into account for dupes
38 $filterdef = bless ([
39                                          # tag, sort, field, priv, special parser 
40                                          ['freq', 'r', 0, 0, \&decodefreq],
41                                          ['on', 'r', 0, 0, \&decodefreq],
42                                          ['call', 'c', 1],
43                                          ['info', 't', 3],
44                                          ['spotter', 'c', 4],
45                                          ['by', 'c', 4],
46                                          ['dxcc', 'nc', 5],
47                                          ['call_dxcc', 'nc', 5],
48                                          ['by_dxcc', 'nc', 6],
49                                          ['origin', 'c', 7, 9],
50                                          ['call_itu', 'ni', 8],
51                                          ['itu', 'ni', 8],
52                                          ['call_zone', 'nz', 9],
53                                          ['cq', 'nz', 9],
54                                          ['zone', 'nz', 9],
55                                          ['by_itu', 'ni', 10],
56                                          ['byitu', 'ni', 10],
57                                          ['by_zone', 'nz', 11],
58                                          ['byzone', 'nz', 11],
59                                          ['bycq', 'nz', 11],
60                                          ['call_state', 'ns', 12],
61                                          ['state', 'ns', 12],
62                                          ['by_state', 'ns', 13],
63                                          ['bystate', 'ns', 13],
64                                          ['ip', 'c', 14],
65 #                                        ['channel', 'c', 15],
66 #                                        ['rbn', 'a', 4, 0, \&filterrbnspot],
67                                         ], 'Filter::Cmd');
68 $totalspots = $hfspots = $vhfspots = 0;
69 $use_db_for_search = 0;
70
71 our %spotcache;                                 # the cache of data within the last $spotcachedays 0 or 2+ days
72 our $spotcachedays = 0;
73
74
75 our $readback = 1;
76
77 if ($readback) {
78         $readback = `which tac`;
79         chomp $readback;
80 }
81
82 # create a Spot Object
83 sub new
84 {
85         my $class = shift;
86         my $self = [ @_ ];
87         return bless $self, $class;
88 }
89
90 sub decodefreq
91 {
92         my $dxchan = shift;
93         my $l = shift;
94         my @f = split /,/, $l;
95         my @out;
96         my $f;
97         
98         foreach $f (@f) {
99                 my ($a, $b); 
100                 if ($f =~ m{^\d+/\d+$}) {
101                         push @out, $f;
102                 } elsif (($a, $b) = $f =~ m{^(\w+)(?:/(\w+))?$}) {
103                         $b = lc $b if $b;
104                         my @fr = Bands::get_freq(lc $a, $b);
105                         if (@fr) {
106                                 while (@fr) {
107                                         $a = shift @fr;
108                                         $b = shift @fr;
109                                         push @out, "$a/$b";  # add them as ranges
110                                 }
111                         } else {
112                                 return ('dfreq', $dxchan->msg('dfreq1', $f));
113                         }
114                 } else {
115                         return ('dfreq', $dxchan->msg('e20', $f));
116                 }
117         }
118         return (0, join(',', @out));                     
119 }
120
121 # filter setup for rbn spot so return the regex to detect it
122 sub filterrbnspot
123 {
124         my $dxchan = shift;
125         return ('-#$');
126 }
127
128 sub init
129 {
130         mkdir "$dirprefix", 0777 if !-e "$dirprefix";
131         $fp = DXLog::new($dirprefix, "dat", 'd');
132         $statp = DXLog::new($dirprefix, "dys", 'd');
133         my $today = Julian::Day->new(time);
134
135         # load up any old spots 
136         if ($main::dbh) {
137                 unless (grep $_ eq 'spot', $main::dbh->show_tables) {
138                         dbg('initialising spot tables');
139                         my $t = time;
140                         my $total;
141                         $main::dbh->spot_create_table;
142                         
143                         my $now = Julian::Day->alloc(1995, 0);
144                         my $sth = $main::dbh->spot_insert_prepare;
145                         while ($now->cmp($today) <= 0) {
146                                 my $fh = $fp->open($now);
147                                 if ($fh) {
148 #                                       $main::dbh->{RaiseError} = 0;
149                                         $main::dbh->begin_work;
150                                         my $count = 0;
151                                         while (<$fh>) {
152                                                 chomp;
153                                                 my @s = split /\^/;
154                                                 if (@s < 14) {
155                                                         my @a = (Prefix::cty_data($s[1]))[1..3];
156                                                         my @b = (Prefix::cty_data($s[4]))[1..3];
157                                                         push @s, $b[1] if @s < 7;
158                                                         push @s, '' if @s < 8;
159                                                         push @s, @a[0,1], @b[0,1] if @s < 12;
160                                                         push @s,  $a[2], $b[2] if @s < 14;
161                                                 } 
162                                                 $main::dbh->spot_insert(\@s, $sth);
163                                                 $count++;
164                                         }
165                                         $main::dbh->commit;
166                                         dbg("inserted $count spots from $now->[0] $now->[1]");
167                                         $fh->close;
168                                         $total += $count;
169                                 }
170                                 $now = $now->add(1);
171                         }
172                         $main::dbh->begin_work;
173                         $main::dbh->spot_add_indexes;
174                         $main::dbh->commit;
175 #                       $main::dbh->{RaiseError} = 1;
176                         $t = time - $t;
177                         my $min = int($t / 60);
178                         my $sec = $t % 60;
179                         dbg("$total spots converted in $min:$sec");
180                 }
181                 unless ($main::dbh->has_ipaddr) {
182                         $main::dbh->add_ipaddr;
183                         dbg("added ipaddr field to spot table");
184                 }
185         }
186
187         # initialise the cache if required
188         if ($spotcachedays) {
189                 $spotcachedays = 2 if $spotcachedays < 2;
190                 my $now = $today->sub($spotcachedays);
191                 while ($now->cmp($today) >= 0) {
192                         my $fh = $fp->open($now);
193                         if ($fh) {
194                                 my @in;
195                                 while (<$fh>) {
196                                         chomp;
197                                         my @s = split /\^/;
198                                         if (@s < 14) {
199                                                 my @a = (Prefix::cty_data($s[1]))[1..3];
200                                                 my @b = (Prefix::cty_data($s[4]))[1..3];
201                                                 push @s, $b[1] if @s < 7;
202                                                 push @s, '' if @s < 8;
203                                                 push @s, @a[0,1], @b[0,1] if @s < 12;
204                                                 push @s,  $a[2], $b[2] if @s < 14;
205                                         }
206                                         push @in, \@s; 
207                                 }
208                                 $fh->close;
209                                 $spotcache{"$now->[0]|$now->[1]"} = \@in;
210                         }
211                         $now->add(1);
212                 }
213         }
214 }
215
216 sub prefix
217 {
218         return $fp->{prefix};
219 }
220
221 # fix up the full spot data from the basic spot data
222 # input is
223 # freq, call, time, comment, spotter, origin[, ip_address]
224 sub prepare
225 {
226         # $freq, $call, $t, $comment, $spotter, node, ip address = @_
227         my @out = @_[0..4];      # just up to the spotter
228
229         # normalise frequency
230         $out[0] = sprintf "%.1f", $out[0];
231   
232         # remove ssids and /xxx if present on spotter
233         $out[4] =~ s/-\d+$//o;
234
235         # remove leading and trailing spaces
236         $out[3] = unpad($out[3]);
237         
238         
239         # add the 'dxcc' country on the end for both spotted and spotter, then the cluster call
240         my @spd = Prefix::cty_data($out[1]);
241         push @out, $spd[0];
242         my @spt = Prefix::cty_data($out[4]);
243         push @out, $spt[0];
244         push @out, $_[5];
245         push @out, @spd[1,2], @spt[1,2], $spd[3], $spt[3];
246         push @out, $_[6] if $_[6] && is_ipaddr($_[6]);
247
248         # thus we now have:
249         # freq, call, time, comment, spotter, call country code, spotter country code, origin, call itu, call cqzone, spotter itu, spotter cqzone, call state, spotter state, spotter ip address
250         return @out;
251 }
252
253 sub add
254 {
255         my $buf = join('^', @_);
256         $fp->writeunix($_[2], $buf);
257         if ($spotcachedays) {
258                 my $now = Julian::Day->new($_[2]);
259                 my $day = "$now->[0]|$now->[1]";
260                 my $r = exists $spotcache{$day} ? $spotcache{$day} : $spotcache{$day} = [];
261                 unshift @$r, @_;
262
263                 # remove old days
264                 while (keys %spotcache > $spotcachedays+1) {
265                         while (sort keys %spotcache > $spotcachedays+1) {
266                                 delete $spotcache{$_};
267                         }
268                 }
269         }
270         if ($main::dbh) {
271                 $main::dbh->begin_work;
272                 $main::dbh->spot_insert(\@_);
273                 $main::dbh->commit;
274         }
275         $totalspots++;
276         if ($_[0] <= 30000) {
277                 $hfspots++;
278         } else {
279                 $vhfspots++;
280         }
281         if ($_[3] =~ /(?:QSL|VIA)/i) {
282                 my $q = QSL::get($_[1]) || new QSL $_[1];
283                 $q->update($_[3], $_[2], $_[4]);
284         }
285 }
286
287 # search the spot database for records based on the field no and an expression
288 # this returns a set of references to the spots
289 #
290 # the expression is a legal perl 'if' statement with the possible fields indicated
291 # by $f<n> where :-
292 #
293 #   $f0 = frequency
294 #   $f1 = call
295 #   $f2 = date in unix format
296 #   $f3 = comment
297 #   $f4 = spotter
298 #   $f5 = spotted dxcc country
299 #   $f6 = spotter dxcc country
300 #   $f7 = origin
301 #   $f8 = spotted itu
302 #   $f9 = spotted cq zone
303 #   $f10 = spotter itu
304 #   $f11 = spotter cq zone
305 #   $f12 = spotted us state
306 #   $f13 = spotter us state
307 #   $f14 = ip address
308 #
309 # In addition you can specify a range of days, this means that it will start searching
310 # from <n> days less than today to <m> days less than today
311 #
312 # Also you can select a range of entries so normally you would get the 0th (latest) entry
313 # back to the 5th latest, you can specify a range from the <x>th to the <y>the oldest.
314 #
315 # This routine is designed to be called as Spot::search(..)
316 #
317
318 sub search
319 {
320         my ($expr, $dayfrom, $dayto, $from, $to, $hint, $dofilter, $dxchan) = @_;
321         my @out;
322         my $ref;
323         my $i;
324         my $count;
325         my $today = Julian::Day->new(time());
326         my $fromdate;
327         my $todate;
328
329         $dayfrom = 0 if !$dayfrom;
330         $dayto = $maxdays unless $dayto;
331         $dayto = $dayfrom + $maxdays if $dayto < $dayfrom;
332         $fromdate = $today->sub($dayfrom);
333         $todate = $fromdate->sub($dayto);
334         $from = 0 unless $from;
335         $to = $defaultspots unless $to;
336         $hint = $hint ? "next unless $hint" : "";
337         $expr = "1" unless $expr;
338         
339         $to = $from + $maxspots if $to - $from > $maxspots || $to - $from <= 0;
340
341         if ($main::dbh && $use_db_for_search) {
342                 return $main::dbh->spot_search($expr, $dayfrom, $dayto, $from, $to, $hint, $dofilter, $dxchan);
343         }
344
345         #       $expr =~ s/\$f(\d\d?)/\$ref->[$1]/g; # swap the letter n for the correct field name
346         #  $expr =~ s/\$f(\d)/\$spots[$1]/g;               # swap the letter n for the correct field name
347   
348
349         dbg("Spot::search hint='$hint', expr='$expr', spotno=$from-$to, day=$dayfrom-$dayto\n") if isdbg('search');
350   
351         # build up eval to execute
352         dbg("Spot::search Spot eval: $expr") if isdbg('searcheval');
353         $expr =~ s/\$r/\$_[0]/g;
354         my $eval = qq{ sub { return $expr; } };
355         dbg("Spot::search Spot eval: $eval") if isdbg('searcheval');
356         my $ecode = eval $eval;
357         return ("Spot search error", $@) if $@;
358         
359         
360         my $fh;
361         my $now = $fromdate;
362         my $today = Julian::Day->new($main::systime);
363         
364         for ($i = $count = 0; $count < $to && $i < $maxdays; ++$i) { # look thru $maxdays worth of files only
365                 last if $now->cmp($todate) <= 0;
366
367                 if ($spotcachedays) {
368                         
369                 }
370                 my $fn = $fp->fn($now->sub($i));
371                 if ($readback) {
372                         dbg("Spot::search search using tac fn: $fn $i") if isdbg('search');
373                         $fh = IO::File->new("$readback $fn |");
374                 }
375                 else {
376                         dbg("Spot::search search fn: $fp->{fn} $i") if isdbg('search');
377                         $fh = $fp->open($now->sub($i)); # get the next file
378                 }
379                 if ($fh) {
380                         my $rec = 0;
381                         my $in;
382                         while (<$fh>) {
383                                 chomp;
384                                 my @r = split /\^/;
385                                 ++$rec;
386                                 if ($dofilter && $dxchan && $dxchan->{spotsfilter}) {
387                                         my ($gotone, undef) = $dxchan->{spotsfilter}->it(@r);
388                                         next unless $gotone;
389                                 }
390                                 if (&$ecode(\@r)) {
391                                         ++$count;
392                                         next if $count < $from;
393                                         if ($readback) {
394                                                 push @out, \@r;
395                                                 last if $count >= $to;
396                                         } else {
397                                                 push @out, \@r;
398                                                 shift @out if $count >= $to;
399                                         }
400                                 }
401                         }
402                         dbg("Spot::search recs read: $rec") if isdbg('search');
403                         last if $count >= $to; # stop after to
404                         
405                         return ("Spot search error", $@) if $@;
406                 }
407         }
408         @out = sort {$b->[2] <=> $a->[2]} @out if @out;
409         return @out;
410 }
411
412 # change a freq range->regular expression
413 sub ftor
414 {
415         my ($a, $b) = @_;
416         return undef unless $a < $b;
417         $b--;
418         my $d = $b - $a;
419         my @a = split //, $a;
420         my @b = split //, $b;
421         my $out;
422         while (@b > @a) {
423                 $out .= shift @b;
424         }
425         while (@b) {
426                 my $aa = shift @a;
427                 my $bb = shift @b;
428                 if (@b < (length $d)) {
429                         $out .= '\\d';
430                 } elsif ($aa eq $bb) {
431                         $out .= $aa;
432                 } elsif ($aa < $bb) {
433                         $out .= "[$aa-$bb]";
434                 } else {
435                         $out .= "[0-$bb$aa-9]";
436                 }
437         }
438         return $out;
439 }
440
441 # format a spot for user output in list mode
442 sub formatl
443 {
444         my $t = ztime($_[2]);
445         my $d = cldate($_[2]);
446         return sprintf "%8.1f  %-11s %s %s  %-28.28s%7s>", $_[0], $_[1], $d, $t, ($_[3]||''), "<$_[4]" ;
447 }
448
449 # enter the spot for dup checking and return true if it is already a dup
450 sub dup
451 {
452         my ($freq, $call, $d, $text, $by, $cty) = @_; 
453
454         # dump if too old
455         return 2 if $d < $main::systime - $dupage;
456         
457         # turn the time into minutes (should be already but...)
458         $d = int ($d / 60);
459         $d *= 60;
460
461         # remove SSID or area
462         $by =~ s|[-/]\d+$||;
463         
464 #       $freq = sprintf "%.1f", $freq;       # normalise frequency
465         $freq = int $freq;       # normalise frequency
466         $call = substr($call, 0, $maxcalllth) if length $call > $maxcalllth;
467
468         chomp $text;
469         $text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg;
470         $text = uc unpad($text);
471         if ($cty && $text && length $text <= 4) {
472                 unless ($text =~ /^C?Q/ || $text =~ /^[\d\W]+$/) {
473                         my @try = Prefix::cty_data($text);
474                         $text = "" if $cty == $try[0];
475                 }
476         }
477         my $otext = $text;
478 #       $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1);
479         $text =~ s/^\+\w+\s*//;                 # remove leading LoTW callsign
480         $text =~ s/\s{2,}[\dA-Z]?[A-Z]\d?$// if length $text > 24;
481         $text =~ s/[\W\x00-\x2F\x7B-\xFF]//g; # tautology, just to make quite sure!
482         $text = substr($text, 0, $duplth) if length $text > $duplth; 
483         my $ldupkey = "X$freq|$call|$by|$text";
484         my $t = DXDupe::find($ldupkey);
485         return 1 if $t && $t - $main::systime > 0;
486         DXDupe::add($ldupkey, $main::systime+$dupage);
487         $otext = substr($otext, 0, $duplth) if length $otext > $duplth; 
488         $otext =~ s/\s+$//;
489         if (length $otext && $otext ne $text) {
490                 $ldupkey = "X$freq|$call|$by|$otext";
491                 $t = DXDupe::find($ldupkey);
492                 return 1 if $t && $t - $main::systime > 0;
493                 DXDupe::add($ldupkey, $main::systime+$dupage);
494         }
495         return 0;
496 }
497
498 sub listdups
499 {
500         return DXDupe::listdups('X', $dupage, @_);
501 }
502
503 sub genstats($)
504 {
505         my $date = shift;
506         my $in = $fp->open($date);
507         my $out = $statp->open($date, 'w');
508         my @freq;
509         my %list;
510         my @tot;
511         
512         if ($in && $out) {
513                 my $i = 0;
514                 @freq = map {[$i++, Bands::get_freq($_)]} qw(136khz 160m 80m 60m 40m 30m 20m 17m 15m 12m 10m 6m 4m 2m 220 70cm 23cm 13cm 9cm 6cm 3cm 12mm 6mm);
515                 while (<$in>) {
516                         chomp;
517                         my ($freq, $by, $dxcc) = (split /\^/)[0,4,6];
518                         my $ref = $list{$by} || [0, $dxcc];
519                         for (@freq) {
520                                 next unless defined $_;
521                                 if ($freq >= $_->[1] && $freq <= $_->[2]) {
522                                         $$ref[$_->[0]+2]++;
523                                         $tot[$_->[0]+2]++;
524                                         $$ref[0]++;
525                                         $tot[0]++;
526                                         $list{$by} = $ref;
527                                         last;
528                                 }
529                         }
530                 }
531
532                 for ($i = 0; $i < @freq+2; $i++) {
533                         $tot[$i] ||= 0;
534                 }
535                 $statp->write($date, join('^', 'TOTALS', @tot));
536
537                 for (sort {$list{$b}->[0] <=> $list{$a}->[0]} keys %list) {
538                         my $ref = $list{$_};
539                         my $call = $_;
540                         for ($i = 0; $i < @freq+2; ++$i) {
541                                 $ref->[$i] ||= 0;
542                         }
543                         $statp->write($date, join('^', $call, @$ref));
544                 }
545                 $statp->close;
546         }
547 }
548
549 # return true if the stat file is newer than than the spot file
550 sub checkstats($)
551 {
552         my $date = shift;
553         my $in = $fp->mtime($date);
554         my $out = $statp->mtime($date);
555         return defined $out && defined $in && $out >= $in;
556 }
557
558 # daily processing
559 sub daily
560 {
561         my $date = Julian::Day->new($main::systime)->sub(1);
562         genstats($date) unless checkstats($date);
563 }
564 1;
565
566
567
568