From 72346862008f655ed4d70067d67a3759bc81de61 Mon Sep 17 00:00:00 2001 From: minima Date: Sat, 14 May 2005 21:48:19 +0000 Subject: [PATCH 1/1] added a query routine --- perl/ARRL/DX.pm | 183 ++++++++++++++++++++++++++++++++++++++++++------ perl/dbquery.pl | 41 +++++++++++ perl/dbtest.pl | 4 ++ 3 files changed, 207 insertions(+), 21 deletions(-) create mode 100755 perl/dbquery.pl diff --git a/perl/ARRL/DX.pm b/perl/ARRL/DX.pm index 924ed672..b2e52a86 100644 --- a/perl/ARRL/DX.pm +++ b/perl/ARRL/DX.pm @@ -10,7 +10,7 @@ use strict; package ARRL::DX; -use vars qw($VERSION $BRANCH $dbh $dbname %tabledefs $error); +use vars qw($VERSION $BRANCH $dbh $dbname %tabledefs $error %stop $limit); main::mkver($VERSION = q$Revision$) if main->can('mkver'); @@ -30,14 +30,65 @@ $dbname = "$main::root/data/arrldx.db"; refer_t_idx => 'CREATE INDEX refer_t_idx ON refer(t DESC)', ); -sub new +%stop = ( + A => 1, + ACTIVITY => 1, + AND => 1, + ARE => 1, + AS => 1, + AT => 1, + BE => 1, + BUT => 1, + FOR => 1, + FROM => 1, + HAS => 1, + HAVE => 1, + HE => 1, + I => 1, + IF => 1, + IN => 1, + IS => 1, + IT => 1, + LOOK => 1, + LOOKS => 1, + NOT => 1, + OF => 1, + ON => 1, + OR => 1, + OUT => 1, + SHE => 1, + SO => 1, + THAT => 1, + THE => 1, + THEM => 1, + THEY => 1, + THIS => 1, + THIS => 1, + TO => 1, + WAS => 1, + WHERE => 1, + WILL => 1, + WITH => 1, + YOU => 1, + + JANUARY => 1, + FEBRUARY => 1, + MARCH => 1, + APRIL => 1, + MAY => 1, + JUNE => 1, + JULY => 1, + AUGUST => 1, + SEPTEMBER => 1, + OCTOBER => 1, + NOVEMBER => 1, + DECEMBER => 1, + ); + +$limit = 10; + +sub do_connect { - my $pkg = shift; - my $class = ref $pkg || $pkg; - my %args = @_; - - $error = undef; - unless ($dbh) { $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", ""); unless ($dbh) { @@ -46,18 +97,49 @@ sub new $error = $DBI::errstr; return; } - - # check that all the tables are present and correct - my $sth = $dbh->prepare("select name,type from sqlite_master") or $error = $DBI::errstr, return; - $sth->execute or $error = $DBI::errstr, return; - my %f; - while (my @row = $sth->fetchrow_array) { - $f{$row[0]} = $row[1]; - } - foreach my $t (sort keys %tabledefs) { - $dbh->do($tabledefs{$t}) unless exists $f{$t}; - } - $sth->finish; + } + return $dbh; +} + +sub drop +{ + return unless do_connect(); + my $sth = $dbh->prepare("select name,type from sqlite_master where type = 'table'") or $error = $DBI::errstr, return; + $sth->execute or $error = $DBI::errstr, return; + while (my @row = $sth->fetchrow_array) { + $dbh->do("drop table $row[0]"); + } + $sth->finish; +} + +sub create +{ + return unless $dbh; + + # check that all the tables are present and correct + my $sth = $dbh->prepare("select name,type from sqlite_master") or $error = $DBI::errstr, return; + $sth->execute or $error = $DBI::errstr, return; + my %f; + while (my @row = $sth->fetchrow_array) { + $f{$row[0]} = $row[1]; + } + foreach my $t (sort keys %tabledefs) { + $dbh->do($tabledefs{$t}) unless exists $f{$t}; + } + $sth->finish; +} + +sub new +{ + my $pkg = shift; + my $class = ref $pkg || $pkg; + my %args = @_; + + $error = undef; + + unless ($dbh) { + return unless do_connect(); + create(); } my $self = {}; @@ -96,6 +178,7 @@ sub process } elsif ($state eq 'id') { if (/^Newington\s+CT\s+(\w+)\s+(\d+),\s+(\d+)/i) { $state = 'date' ; + $self->{year} = $3; $self->{date} = str2time("$1 $2 $3") if $state eq 'date'; } } elsif ($state eq 'date') { @@ -139,8 +222,66 @@ sub insert { my $self = shift; my $sth = $dbh->prepare("insert into paragraph values(?,?,?)"); - $sth->execute($self->{para}, $self->{date}, $self->{id}); + $sth->execute($self->{para}, $self->{date}, "$self->{year}-$self->{id}"); my $lastrow = $dbh->func('last_insert_rowid'); + $sth->finish; + + +# my @w = split /[.,;:\s"'\$\%!£^&\*\(\)\[\]\{\}\#\<\>+=]+/, $self->{para}; + my @w = split m|[\b\s]+|, $self->{para}; +# print join(' ', @w), "\n"; + $sth = $dbh->prepare("insert into refer values(?,?,?,?)"); + + my $i = 0; + for (@w) { + + # starts with a capital letter that isn't Q + if (/^[A-PR-Z]/ || m|\d+[A-Z][-/A-Z0-9]*$|) { + # not all digits + next if /^\d+$/; + + # isn't a stop word + my $w = uc; + $w =~ s/\W+$//; + unless ($stop{$w}) { + # add it into the word list + $sth->execute($w, $lastrow, $self->{date}, $i); +# print " $w"; + } + } + $i++; + } + $sth->finish; +} + +sub query +{ + my $self = shift; + my %args = @_; + my @out; + + if ($args{'q'}) { + my @w = map { s|[^-/\w]||g; uc $_ } split /\s+/, $args{'q'}; + if (@w) { + my $s = qq{select distinct p, t, bullid from (select distinct rowid from refer where }; + while (@w) { + my $w = shift @w; + $s .= qq{r like '$w\%'}; + $s .= ' or ' if @w; + } + my $l = $args{l}; + $l =~ s/[^\d]//g; + $l ||= $limit; + $s .= qq{ order by t desc limit $l), paragraph where paragraph.ROWID=rowid}; + my $sth = $dbh->prepare($s) or $error = $DBI::errstr, return @out; + $sth->execute; + while (my @row = $sth->fetchrow_array) { + push @out, \@row; + } + $sth->finish; + } + } + return @out; } sub close diff --git a/perl/dbquery.pl b/perl/dbquery.pl new file mode 100755 index 00000000..2ac16d1c --- /dev/null +++ b/perl/dbquery.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl +# test for independent sql servers +# search local then perl directories + +use vars qw($root); + +BEGIN { + # root of directory tree for this system + $root = "/spider"; + $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'}; + + unshift @INC, "$root/perl"; # this IS the right way round! + unshift @INC, "$root/local"; +} + +use DXUtil; +use DXDebug; +use ARRL::DX; + +print "usage:\tdbquery.pl \n\teg: dbquery.pl rtty lebanon\n\n" unless @ARGV; + +my $width = $ENV{'COLUMNS'} || $ENV{'COLS'} || 80; +my $dx = ARRL::DX->new(); +my @out = $dx->query(q=>join(' ', @ARGV)); + +foreach my $ref (@out) { + my $s = cldate($ref->[1]); + for (split /\s+/, "$ref->[0] [$ref->[2]]") { + if (length($s) + length($_) + 1 < $width ) { + $s .= ' ' if length $s; + $s .= $_; + } else { + print "$s\n"; + $s = $_; + } + } + print "$s\n" if length $s; + print "\n"; +} + +exit 0; diff --git a/perl/dbtest.pl b/perl/dbtest.pl index c4f5ca8e..6951c76d 100755 --- a/perl/dbtest.pl +++ b/perl/dbtest.pl @@ -20,6 +20,10 @@ use ARRL::DX; while (@ARGV) { my $fn = shift; + if ($fn eq '-d') { + ARRL::DX::drop(); + next; + } print "Processing $fn "; my $dx = ARRL::DX->new(file=>$fn); my $c = $dx->process; -- 2.43.0