X-Git-Url: http://dxcluster.net/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2FBadWords.pm;h=05a41bcc01aa15f1740e9a6545ace2e65d172156;hb=refs%2Fheads%2Fnewpc92timings;hp=b598c385d4e940f45716a4f2f35c303ad9b46e1e;hpb=c77ea09e91a5f7c3052e3e30dfb48fcaad1e0dcd;p=spider.git diff --git a/perl/BadWords.pm b/perl/BadWords.pm index b598c385..05a41bcc 100644 --- a/perl/BadWords.pm +++ b/perl/BadWords.pm @@ -3,7 +3,7 @@ # # Copyright (c) 2000 Dirk Koopman # -# $Id$ +# # package BadWords; @@ -24,17 +24,11 @@ my $regex = "$main::data/badw_regex"; my $bwfn = "$main::data/badword"; # copy issue ones across -filecopy("$regex.issue", $regex) unless -e $regex; +filecopy("$regex.gb.issue", $regex) unless -e $regex; filecopy("$bwfn.issue", $bwfn) unless -e $bwfn; $badword = new DXHash "badword"; -use vars qw($VERSION $BRANCH); -$VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ ); -$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0; -$main::build += $VERSION; -$main::branch += $BRANCH; - # load the badwords file sub load { @@ -75,7 +69,7 @@ sub create_regex my $w = uc $_; my @l = split //, $w; my $e = join '+[\s\W]*', @l; - $s .= "push \@out, \$1 if \$str =~ /($e)/;\n"; + $s .= "push \@out, \$1 if \$str =~ /\\b($e)/;\n"; } } $s .= "return \@out;\n}"; @@ -102,13 +96,11 @@ sub check my $s = uc shift; my @out; - dbg($s) if isdbg('badword'); push @out, &$regexcode($s) if $regexcode; return @out if @out; - for (split(/\s+/, $s)) { - s/\'?S$//; + for (split(/\b/, $s)) { push @out, $_ if $badword->in($_); }