87731185b22fc27b1b4696cd914235454b000b1f
[spider.git] / perl / QRZ.pm
1 #!/usr/bin/perl -w
2
3 package QRZ;
4
5 use HTML::Parser;
6 use Data::Dumper;
7
8 @ISA = qw( HTML::Parser );
9
10 use strict;
11
12
13 use vars qw($VERSION $BRANCH);
14 $VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
15 $BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/  || (0,0));
16 $main::build += $VERSION;
17 $main::branch += $BRANCH;
18
19 sub new
20 {
21     my $pkg = shift;
22         my $self = SUPER::new $pkg;
23         $self->{list} = [];
24         $self->{state} = 'pre';
25     $self->{sort} = undef;
26         $self->{debug} = 0;
27     $self->{call} = uc shift;
28         return $self;
29 }
30
31 sub start
32 {
33         my ($self, $tag, $attr, $attrseq, $origtext) = @_;
34         if ($self->{debug}) {
35                 print "$self->{state} $tag";
36         if ($attr) {
37                         my $dd = new Data::Dumper([$attr], [qw(attr)]);
38                         $dd->Terse(1);
39                         $dd->Indent(0);
40                         $dd->Quotekeys(0);
41                         print " ", $dd->Dumpxs;
42                 }
43                 print "\n";
44         }
45         if ($self->{state} eq 'addr') {
46                 if ($tag eq 'br') {
47                         $self->{addr} .= ", " if length $self->{addr} > $self->{laddr};
48                         $self->{laddr} = length $self->{addr};
49                 } elsif ($tag eq 'p') {
50             push @{$self->{list}}, $self->{addr} ? "$self->{call}|addr|$self->{addr}" : "$self->{call}|addr|unknown";
51                         $self->state('semail');
52                 }
53         } elsif ($self->{state} eq 'email') {
54                 if ($tag eq 'a') {
55                         my $email = $attr->{href};
56                         if ($email) {
57                                 return if $email =~ m{/uedit.html};
58                                 $email =~ s/mailto://i;
59                                 push @{$self->{list}}, "$self->{call}|email|$email";
60                         }
61                 } elsif ($tag eq 'br' || $tag eq 'p') {
62                         $self->state('post');
63                 }
64         }
65 }
66
67 sub text
68 {
69         my ($self, $text) = @_;
70         $text =~ s/^[\s\r\n]+//g;
71         $text =~ s/[\s\r\n]+$//g;
72     print "$self->{state} text $text\n" if $self->{debug};      
73         if (length $text) {
74                 if ($self->{state} eq 'pre' && $text =~ /$self->{call}/i ) {
75                         $self->state('addr');
76                         $self->{addr} = "";
77                         $self->{laddr} = 0;
78                 } elsif ($self->{state} eq 'addr') {
79                         $text =~ s/\ //gi;
80                         $self->{addr} .= $text;
81                 } elsif ($self->{state} eq 'semail' && $text =~ /Email/i ) {
82                         $self->state('email');
83                 }
84         }
85 }
86
87 sub state
88 {
89         my $self = shift;
90         $self->{state} = shift if @_;
91         return $self->{state};
92 }
93
94 sub end
95 {
96         my ($self, $tag, $origtext) = @_;
97     print "$self->{state} /$tag\n" if $self->{debug};
98 }
99
100 sub debug
101 {
102         my ($self, $val) = @_;
103         $self->{debug} = $val;
104 }
105
106 sub answer
107 {
108         my $self = shift;
109         return @{$self->{list}};
110 }
111
112 1;
113