From: Dirk Koopman Date: Sat, 20 Apr 2024 12:48:14 +0000 (+0100) Subject: WIP spot deduping X-Git-Url: http://dxcluster.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=37a9324302cb4de2c25ce0005d697fd9895ea8cd;p=spider.git WIP spot deduping --- diff --git a/data/bands.pl b/data/bands.pl index 1bd60f67..958c3bae 100644 --- a/data/bands.pl +++ b/data/bands.pl @@ -12,6 +12,14 @@ # It is up to YOU to make sure that it makes sense! # # ALL the labels MUST BE in lower case. +# +# Band names change (or I got them wrong in the first place), DO NOT CHANGE THE BAND NAMES +# THAT I USED. If you do then expect to get complaints. Instead just alias then just alias +# them to what you think they should be. +# +# WARNING: if aliasing, the band alias must be declared AFTER the original. +# +# See '550khz' and '630m' as an exemplar. # @@ -22,11 +30,11 @@ '136khz' => bless ( { band => [135, 138], }, 'Bands'), - '500khz' => bless ( { band => [493, 525], + '500khz' => bless ( { band => [472, 479], }, 'Bands'), '160m' => bless( { band => [ 1800, 2000 ], - cw => [ 1800, 1838 ], + cw => [ 1800, 1840 ], rtty => [ 1838, 1841 ], data => [ 1838, 1843], ssb => [ 1831, 2000] @@ -34,7 +42,7 @@ '80m' => bless( { band => [ 3500, 4000 ], cw => [ 3500, 3600 ], - data => [ 3580, 3619 ], + data => [ 3570, 3619 ], rtty => [ 3580, 3619 ], sstv => [ 3730, 3740 ], ssb => [ 3601, 4000 ] @@ -44,11 +52,11 @@ ssb => [5300, 5410], }, 'Bands' ), - '40m' => bless( { band => [ 7000, 7400 ], + '40m' => bless( { band => [ 7000, 7300 ], cw => [ 7000, 7040 ], data => [ 7040, 7100], rtty => [ 7040, 7060], - ssb => [ 7050, 7400 ] + ssb => [ 7050, 7300 ] }, 'Bands'), '30m' => bless( { band => [ 10100, 10150 ], @@ -154,9 +162,20 @@ cw => [47087000, 47089000], ssb => [47087000, 47089000], }, 'Bands'), + '4mm' => bless( { band => [75500000, 81000000], }, 'Bands'), + '122g' => bless( { band => [122250000, 123000000], + }, 'Bands'), + + '134g' => bless( { band => [134000000, 141000000], + }, 'Bands'), + + '248g' => bless( { band => [241000000, 250000000], + }, 'Bands'), + + 'band1' => bless ( { band => [47000, 49999, 52000, 68000], }, 'Bands'), @@ -193,6 +212,17 @@ vhf => bless ( { band => [30000, 299999], }, 'Bands'), ); +# +# fix up some aliases +# + +$bands{'630m'} => $bands{'500khz'}; +$bands{'24g'} => $bands{'12mm'}; +$bands{'47g'} => $bands{'6mm'}; +$bands{'76g'} => $bands{'4mm'}; + + + # # the list of regions # diff --git a/perl/DXDupe.pm b/perl/DXDupe.pm index 012039ee..795346db 100644 --- a/perl/DXDupe.pm +++ b/perl/DXDupe.pm @@ -47,7 +47,7 @@ sub check sub find { return 0 unless $_[0]; - return $d{$_[0]}; + return exists $d{$_[0]} ? $d{$_[0]} : 0; } sub add diff --git a/perl/Filter.pm b/perl/Filter.pm index f4089fcf..1c6b8589 100644 --- a/perl/Filter.pm +++ b/perl/Filter.pm @@ -358,7 +358,8 @@ sub load_dxchan { my $dxchan = shift; my $sort = lc shift; - my $in = shift ? 'in' : ''; + my $in = shift; + $in = $in ? 'in' : ''; # to cope with older perls that did not like a ternary expression with 'shift' as a conditional my $nossid = $dxchan->call; $nossid =~ s/-\d+$//; my $n = "$in$sort" . "filter"; diff --git a/perl/Spot.pm b/perl/Spot.pm index 64db9a72..3012c9e9 100644 --- a/perl/Spot.pm +++ b/perl/Spot.pm @@ -33,7 +33,7 @@ $maxspots = 100; # maximum spots to return $defaultspots = 10; # normal number of spots to return $maxdays = 100; # normal maximum no of days to go back $dirprefix = "spots"; -$duplth = 20; # the length of text to use in the deduping +$duplth = 15; # the length of text to use in the deduping $dupage = 1*3600; # the length of time to hold spot dups $maxcalllth = 12; # the max length of call to take into account for dupes $filterdef = bless ([ @@ -502,29 +502,57 @@ sub dup $call = substr($call, 0, $maxcalllth) if length $call > $maxcalllth; + my $dtext ; + my $l = length $text; + $dtext = qq{original:'$text'($l)} if isdbg('spottext'); + chomp $text; + $text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg; $text = uc unpad($text); + + $l = length $text; + $dtext .= qq{->afterhex: '$text'($l)} if isdbg('spottext'); + my @dubious; + if (isdbg('spottext')) { + (@dubious) = $text =~ /([?\x00-\x08\x0a-\x1F\x7B-\xFF]+)+/; + $dtext .= sprintf q{DUBIOUS '%s'}, join '', @dubious if @dubious; + } + my $otext = $text; # $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1); $text =~ s/^\+\w+\s*//; # remove leading LoTW callsign - $text =~ s/\s{2,}[\dA-Z]?[A-Z]\d?$// if length $text > 24; + $text =~ s/\s{2,}[\dA-Z]?[A-Z]\d?$//g if length $text > 24; + $text =~ s/\x09+//g; $text =~ s/[\W\x00-\x2F\x7B-\xFF]//g; # tautology, just to make quite sure! - $text = substr($text, 0, $duplth) if length $text > $duplth; + $text = substr($text, 0, $duplth) if length $text > $duplth; + + $l = length $text; + $dtext .= qq{->final:'$text'($l)} if isdbg('spottext'); + my $ldupkey = $oldstyle ? "X|$call|$by|$node|$freq|$d|$text" : "X|$call|$by|$node|$qrg|$nd|$text"; - my $t = DXDupe::find($ldupkey); + my $t = 0; + $t = DXDupe::find($ldupkey); dbg("Spot::dup ldupkey $ldupkey t '$t'") if isdbg('spotdup'); - return 1 if $t > 0; + $dtext .= ' DUPE' if $t; + dbg("text transforms: $dtext") if length $text && isdbg('spottext'); + return 1 if $t > 0; DXDupe::add($ldupkey, $main::systime+$dupage) unless $just_find; + $otext = substr($otext, 0, $duplth) if length $otext > $duplth; $otext =~ s/\s+$//; if (length $otext && $otext ne $text) { $ldupkey = $oldstyle ? "X|$freq|$call|$by|$otext" : "X|$qrg|$call|$by|$otext"; $t = DXDupe::find($ldupkey); - dbg("Spot::dup ldupkey $ldupkey t '$t'") if isdbg('spotdup'); + dbg("Spot::dup (OTEXT) ldupkey $ldupkey t '$t'") if isdbg('spotdup'); + if (isdbg('spottext')) { + $dtext .= sprintf q{DUBIOUS '%s'}, join '', @dubious if @dubious; + $dtext .= ' DUPE (OTEXT)' if $t; + dbg("text transforms: $dtext") if length $text; + } return 1 if $t > 0; DXDupe::add($ldupkey, $main::systime+$dupage) unless $just_find; }