]> dxcluster.net Git - spider.git/blob - perl/QRZ.pm
add CTY-2705 prefixes
[spider.git] / perl / QRZ.pm
1 #!/usr/bin/perl -w
2
3 package QRZ;
4
5 use HTML::Parser;
6 use Data::Dumper;
7
8 @ISA = qw( HTML::Parser );
9
10 use strict;
11
12
13 sub new
14 {
15     my $pkg = shift;
16         my $self = SUPER::new $pkg;
17         $self->{list} = [];
18         $self->{state} = 'pre';
19     $self->{sort} = undef;
20         $self->{debug} = 0;
21     $self->{call} = uc shift;
22         return $self;
23 }
24
25 sub start
26 {
27         my ($self, $tag, $attr, $attrseq, $origtext) = @_;
28         if ($self->{debug}) {
29                 print "$self->{state} $tag";
30         if ($attr) {
31                         my $dd = new Data::Dumper([$attr], [qw(attr)]);
32                         $dd->Terse(1);
33                         $dd->Indent(0);
34                         $dd->Quotekeys(0);
35                         print " ", $dd->Dumpxs;
36                 }
37                 print "\n";
38         }
39         if ($self->{state} eq 'addr') {
40                 if ($tag eq 'br') {
41                         $self->{addr} .= ", " if length $self->{addr} > $self->{laddr};
42                         $self->{laddr} = length $self->{addr};
43                 } elsif ($tag eq 'p') {
44             push @{$self->{list}}, $self->{addr} ? "$self->{call}|addr|$self->{addr}" : "$self->{call}|addr|unknown";
45                         $self->state('semail');
46                 }
47         } elsif ($self->{state} eq 'email') {
48                 if ($tag eq 'a') {
49                         my $email = $attr->{href};
50                         if ($email) {
51                                 return if $email =~ m{/uedit.html};
52                                 $email =~ s/mailto://i;
53                                 push @{$self->{list}}, "$self->{call}|email|$email";
54                         }
55                 } elsif ($tag eq 'br' || $tag eq 'p') {
56                         $self->state('post');
57                 }
58         }
59 }
60
61 sub text
62 {
63         my ($self, $text) = @_;
64         $text =~ s/^[\s\r\n]+//g;
65         $text =~ s/[\s\r\n]+$//g;
66     print "$self->{state} text $text\n" if $self->{debug};      
67         if (length $text) {
68                 if ($self->{state} eq 'pre' && $text =~ /$self->{call}/i ) {
69                         $self->state('addr');
70                         $self->{addr} = "";
71                         $self->{laddr} = 0;
72                 } elsif ($self->{state} eq 'addr') {
73                         $text =~ s/\ //gi;
74                         $self->{addr} .= $text;
75                 } elsif ($self->{state} eq 'semail' && $text =~ /Email/i ) {
76                         $self->state('email');
77                 }
78         }
79 }
80
81 sub state
82 {
83         my $self = shift;
84         $self->{state} = shift if @_;
85         return $self->{state};
86 }
87
88 sub end
89 {
90         my ($self, $tag, $origtext) = @_;
91     print "$self->{state} /$tag\n" if $self->{debug};
92 }
93
94 sub debug
95 {
96         my ($self, $val) = @_;
97         $self->{debug} = $val;
98 }
99
100 sub answer
101 {
102         my $self = shift;
103         return @{$self->{list}};
104 }
105
106 1;
107