X-Git-Url: http://dxcluster.net/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2Fgen_usdb_data.pl;h=71101bec228df8acc42a1082f514022404bf9009;hb=9c415b19aaaf0497e9cc074f3df56f16fe414e82;hp=153a560153b5ab025e503d2a92e845f707372b5c;hpb=08912ec52dee25bbe00aef10387e1822dcd574bc;p=spider.git diff --git a/perl/gen_usdb_data.pl b/perl/gen_usdb_data.pl index 153a5601..71101bec 100755 --- a/perl/gen_usdb_data.pl +++ b/perl/gen_usdb_data.pl @@ -55,12 +55,12 @@ my $blksize = 1024 * 1024; STDOUT->autoflush(1); -my $dbrawfn = "$main::data/usdbraw"; +my $dbrawfn = "$main::data/usdbraw.gz"; rename "$dbrawfn.oo", "$dbrawfn.ooo"; rename "$dbrawfn.o", "$dbrawfn.oo"; rename "$dbrawfn", "$dbrawfn.o"; -my $gzfh = gzopen($dbrawfn, "wb9") or die "Cannot open $dbrawfn $!"; +my $gzfh = gzopen($dbrawfn, "wb") or die "Cannot open $dbrawfn $!"; my $ctycount; @@ -73,7 +73,6 @@ foreach my $argv (@ARGV) { } $gzfh->gzclose; -print "$ctycount Cities found\n"; exit(0); @@ -96,19 +95,20 @@ sub handleEN $l =~ s/[\r\n]+$//; my ($rt,$usi,$ulsfn,$ebfno,$call,$type,$lid,$name,$first,$middle,$last,$suffix, $phone,$fax,$email,$street,$city,$state,$zip,$pobox,$attl,$sgin,$frn) = split /\|/, $l; - - my $rec = uc join '|', $call,$city,$state if $city && $state; - $buf .= "$rec\n"; - if (length $buf > $blksize) { - $gzfh->gzwrite($buf); - undef $buf; + +# print "ERR: $l\n" unless $call && $city && $state; + + if ($call && $city && $state) { + my $rec = uc join '|', $call,$city,$state if $city && $state; + $buf .= "$rec\n"; + if (length $buf > $blksize) { + $gzfh->gzwrite($buf); + undef $buf; + } + $count++; } - my $c = uc "$city|$state"; - $count++; - } - if (length $buf > $blksize) { - $gzfh->gzwrite($buf); } + $gzfh->gzwrite($buf) if length $buf; print ", $count records\n"; $fh->close; }