]> dxcluster.net Git - spider.git/blob - perl/Buck.pm
fix obadw check on cmd line, new words
[spider.git] / perl / Buck.pm
1 #!/usr/bin/perl -w
2
3 package Buck;
4
5 use HTML::Parser;
6 use Data::Dumper;
7 use DXUtil;
8
9 @ISA = qw( HTML::Parser );
10
11 use strict;
12
13 sub new
14 {
15     my $pkg = shift;
16         my $self = SUPER::new $pkg;
17         $self->{list} = [];
18         $self->{state} = 'pre';
19     $self->{sort} = undef;
20         $self->{debug} = 0;
21     $self->{call} = uc shift;
22         return $self;
23 }
24
25 sub start
26 {
27         my ($self, $tag, $attr, $attrseq, $origtext) = @_;
28         if ($self->{debug}) {
29                 print "$self->{state} $tag";
30         if ($attr) {
31                         my $dd = new Data::Dumper([$attr], [qw(attr)]);
32                         $dd->Terse(1);
33                         $dd->Indent(0);
34                         $dd->Quotekeys(0);
35                         print " ", $dd->Dumpxs;
36                 }
37                 print "\n";
38         }
39         if ($self->{state} eq 'pre' && $tag eq 'table') {
40                 $self->state('t1');
41         } elsif ($self->{state} eq 't1' && $tag eq 'table') {
42                 $self->state('t2');
43         } elsif ($self->{state} eq 't2' && $tag =~ /^h/) {
44                 $self->{addr} = "";
45                 $self->{laddr} = 0;
46                 $self->state('addr');
47         } elsif ($self->{state} eq 'addr') {
48                 if ($tag eq 'br') {
49                         $self->{addr} .= ", " if length $self->{addr} > $self->{laddr};
50                         $self->{laddr} = length $self->{addr};
51                 } elsif ($tag eq 'p') {
52             push @{$self->{list}}, $self->{addr} ? "$self->{call}|addr|$self->{addr}" : "$self->{call}|addr|unknown";
53                         $self->state('semail');
54                 }
55         } elsif ($self->{state} eq 'email') {
56                 if ($tag eq 'a') {
57                         my $email = $attr->{href};
58                         if ($email && $email =~ /mailto/i) {
59                                 $email =~ s/mailto://i;
60                                 push @{$self->{list}}, "$self->{call}|email|$email";
61                         }
62                 } elsif ($tag eq 'br' || $tag eq 'p') {
63                         $self->state('post');
64                 }
65         } elsif ($self->{state} eq 'post' && $tag eq 'form') {
66                 if (exists $self->{pos} && length $self->{pos}) {
67                         push @{$self->{list}}, "$self->{call}|location|$self->{pos}";
68                         $self->state('last');
69                 }
70         }
71 }
72
73 sub text
74 {
75         my ($self, $text) = @_;
76         $text =~ s/^[\s\r\n]+//g;
77         $text =~ s/[\s\r\n]+$//g;
78     print "$self->{state} text $text\n" if $self->{debug};      
79         if (length $text) {
80                 if ($self->{state} eq 'addr') {
81                         $text =~ s/\ //gi;
82                         $self->{addr} .= $text;
83                 } elsif ($self->{state} eq 'semail' && $text =~ /Email/i ) {
84                         $self->state('email');
85                 } elsif ($self->{state} eq 'post') {
86                         if ($text =~ /Latitude/i) {
87                                 $self->state('lat');
88                                 $self->{pos} = "" unless $self->{pos};
89                         } elsif ($text =~ /Longitude/i) {
90                                 $self->state('long');
91                                 $self->{pos} = "" unless $self->{pos};
92                         } elsif ($text =~ /Grid/i) {
93                                 $self->state('grid');
94                                 $self->{pos} = "" unless $self->{pos};
95                         }
96                 } elsif ($self->{state} eq 'lat') {
97                         my ($n, $l) = $text =~ /(\b[\d\.]+\b)\s+([NSns])/;
98                         $n = -$n if $l eq 'S' || $l eq 's';
99                         $self->{pos} = slat($n);
100                         $self->state('post');
101                 } elsif ($self->{state} eq 'long') {
102                         my ($n, $l) = $text =~ /(\b[\d\.]+\b)\s+([EWew])/;
103                         $n = -$n if $l eq 'W' || $l eq 'w';
104                         $self->{pos} .= "|" . slong($n);
105                         $self->state('post');
106                 } elsif ($self->{state} eq 'grid') {
107                         my ($qra) = $text =~ /(\b\w\w\d\d\w\w\b)/;
108                         $self->{pos} .= "|" . uc $qra;
109                         push @{$self->{list}}, "$self->{call}|location|$self->{pos}";
110                         $self->state('last');
111                 } elsif (($self->{state} eq 'pre' || $self->{state} =~ /^t/) && $text =~ /not\s+found/) {
112             push @{$self->{list}}, "$self->{call}|addr|unknown";
113                         $self->state('last');
114                 } elsif ($self->{state} eq 'email' && $text =~ /unknown/i) {
115                         $self->state('post');
116                 }
117         }
118 }
119
120 sub state
121 {
122         my $self = shift;
123         $self->{state} = shift if @_;
124         return $self->{state};
125 }
126
127 sub end
128 {
129         my ($self, $tag, $origtext) = @_;
130     print "$self->{state} /$tag\n" if $self->{debug};
131 }
132
133 sub debug
134 {
135         my ($self, $val) = @_;
136         $self->{debug} = $val;
137 }
138
139 sub answer
140 {
141         my $self = shift;
142         return @{$self->{list}};
143 }
144
145 1;
146