From: minima Date: Mon, 29 Aug 2005 17:31:02 +0000 (+0000) Subject: add a literal dupe check for the comments in spots X-Git-Tag: 1.53~128 X-Git-Url: http://dxcluster.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=61edf37f1528fe84f1150c0b31c98d8587424fac;p=spider.git add a literal dupe check for the comments in spots --- diff --git a/Changes b/Changes index 73e31c12..770879b7 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,7 @@ +29Aug05======================================================================= +1. try something else to resolve/prevent more of these spot dupes. It appears +that there is software out there chopping off variable amounts from the ends +of comment fields. This makes the task of detecting dupes that much harder. 10Aug05======================================================================= 1. trim dx spot duplicate length after normalisation 2. Do the same for announces diff --git a/perl/Spot.pm b/perl/Spot.pm index d52575af..f331d105 100644 --- a/perl/Spot.pm +++ b/perl/Spot.pm @@ -343,6 +343,7 @@ sub dup $text = "" if $cty == $try[0]; } } + my $otext = $text; $text = pack("C*", map {$_ & 127} unpack("C*", $text)); $text =~ s/[^\w]//g; $text = substr($text, 0, $duplth) if length $text > $duplth; @@ -350,6 +351,11 @@ sub dup my $t = DXDupe::find($ldupkey); return 1 if $t && $t - $main::systime > 0; DXDupe::add($ldupkey, $main::systime+$dupage); + $otext = substr($otext, 0, $duplth) if length $otext > $duplth; + $ldupkey = "X$freq|$call|$by|$otext"; + $t = DXDupe::find($ldupkey); + return 1 if $t && $t - $main::systime > 0; + DXDupe::add($ldupkey, $main::systime+$dupage); # my $sdupkey = "X$freq|$call|$by"; # $t = DXDupe::find($sdupkey); # return 1 if $t && $t - $main::systime > 0;