]> dxcluster.net Git - spider.git/commitdiff
get it up and running basically on a test file
authorminima <minima>
Fri, 13 May 2005 18:42:44 +0000 (18:42 +0000)
committerminima <minima>
Fri, 13 May 2005 18:42:44 +0000 (18:42 +0000)
perl/ARRL/DX.pm
perl/dbtest.pl

index 7eaf3eb716e19a9d5fd7cbfc2b433c324bf8d25f..924ed6723b29fcffc1edb6195e4c828bceb5c928 100644 (file)
@@ -12,20 +12,21 @@ package ARRL::DX;
 
 use vars qw($VERSION $BRANCH $dbh $dbname %tabledefs $error);
 
-#main::mkver($VERSION = q$Revision$);
+main::mkver($VERSION = q$Revision$) if main->can('mkver');
 
 use DXLog;
 use DXDebug;
 use DXUtil;
 use DBI;
 use IO::File;
+use Date::Parse;
 
 $dbname = "$main::root/data/arrldx.db";
 %tabledefs = (
-                         paragraph => 'CREATE TABLE paragraph(p text, t int)',
+                         paragraph => 'CREATE TABLE paragraph(p text, t int, bullid text)',
                          paragraph_t_idx => 'CREATE INDEX paragraph_t_idx ON paragraph(t DESC)',
-                         refer => 'CREATE TABLE refer(r text, id int, t int, pos int)',
-                         refer_id_idx => 'CREATE INDEX refer_id_idx ON refer(id)',
+                         refer => 'CREATE TABLE refer(r text, rowid int, t int, pos int)',
+                         refer_id_idx => 'CREATE INDEX refer_id_idx ON refer(rowid)',
                          refer_t_idx => 'CREATE INDEX refer_t_idx ON refer(t DESC)',
                         );
 
@@ -33,7 +34,7 @@ sub new
 {
        my $pkg = shift;
        my $class = ref $pkg || $pkg;
-       my %args = $@;
+       my %args = @_;
        
        $error = undef;
        
@@ -75,12 +76,76 @@ sub new
 sub process
 {
        my $self = shift;
+
+       return unless $self->{f};
+       
+       my $state;
+       my $count;
        
+       $dbh->begin_work;
+       my $f = $self->{f};
+       while (<$f>) {
+#              print;
+               unless ($state) {
+                       $state = 'ZC' if /^ZCZC/; 
+               } elsif ($state eq 'ZC') {
+                       if (/\b(ARLD\d+)\b/) {
+                               $self->{id} = $1;
+                               $state = 'id';
+                       }
+               } elsif ($state eq 'id') {
+                       if (/^Newington\s+CT\s+(\w+)\s+(\d+),\s+(\d+)/i) {
+                               $state = 'date' ;
+                               $self->{date} = str2time("$1 $2 $3") if $state eq 'date';
+                       }
+               } elsif ($state eq 'date') {
+                       if (/^$self->{id}/) {
+                               last unless /DX\s+[Nn]ews\s*$/;
+                               $state = 'week'; 
+                       }
+               } elsif ($state eq 'week') {
+                       $state = 'weekro' if /^This\s+week/;
+               } elsif ($state eq 'weekro') {
+                       if (/^\s*$/) {
+                               $state = 'para';
+                               $self->{para} = "";
+                       }
+               } elsif ($state eq 'para') {
+                       if (/^\s*$/) {
+                               if ($self->{para}) {
+                                       $self->{para} =~ s/^\s+//;
+                                       $self->{para} =~ s/\s+$//;
+                                       $self->{para} =~ s/\s+/ /g;
+                                       $self->insert;
+                                       $self->{para} = "";
+                                       $count++;
+                               }
+                       } elsif (/^THIS\s+WEEKEND/) {
+                               last;
+                       }
+                       chomp;
+                       s/^\s+//;
+                       s/\s+$//;
+                       $self->{para} .= $_ . ' ';
+               }
+       }
+       $dbh->commit;
+       $self->{f}->close;
+       delete $self->{f};
+       return $count;
 }
 
 sub insert
 {
        my $self = shift;
-       
+       my $sth = $dbh->prepare("insert into paragraph values(?,?,?)");
+       $sth->execute($self->{para}, $self->{date}, $self->{id});
+       my $lastrow = $dbh->func('last_insert_rowid');
+}
+
+sub close
+{
+       $dbh->disconnect;
+       undef $dbh;
 }
 1;
index 51a5c4d02f61d17ecec799830d8a00e9dc7db31e..c4f5ca8e9d259a08cd016ce1530dc3b246654306 100755 (executable)
@@ -18,6 +18,14 @@ use DXDebug;
 use ARRL::DX;
 
 
-my $dx = ARRL::DX->new;
+while (@ARGV) {
+       my $fn = shift;
+       print "Processing $fn ";
+       my $dx = ARRL::DX->new(file=>$fn);
+       my $c = $dx->process;
+       print "$c paragraphs\n";
+}
+
+ARRL::DX::close();
 
 exit 0;