]> dxcluster.net Git - spider.git/blob - perl/K4UTE.pm
add log2csv.pl
[spider.git] / perl / K4UTE.pm
1 #!/usr/bin/perl -w
2
3 package K4UTE;
4
5 use HTML::Parser;
6 use Data::Dumper;
7
8 @ISA = qw( HTML::Parser );
9
10 use strict;
11
12 use vars qw($VERSION $BRANCH);
13 $VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
14 $BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/  || (0,0));
15 $main::build += $VERSION;
16 $main::branch += $BRANCH;
17
18 sub new
19 {
20     my $pkg = shift;
21         my $self = SUPER::new $pkg;
22         $self->{list} = [];
23         $self->{state} = 'pre';
24     $self->{sort} = undef;
25         $self->{debug} = 0;
26     $self->{call} = uc shift;
27         return $self;
28 }
29
30 sub start
31 {
32         my ($self, $tag, $attr, $attrseq, $origtext) = @_;
33         if ($self->{debug}) {
34                 print "$self->{state} $tag";
35         if ($attr) {
36                         my $dd = new Data::Dumper([$attr], [qw(attr)]);
37                         $dd->Terse(1);
38                         $dd->Indent(0);
39                         $dd->Quotekeys(0);
40                         print " ", $dd->Dumpxs;
41                 }
42                 print "\n";
43         }
44         if ($tag eq 'tr' ) {
45                 if ($self->{state} eq 't1') {
46                         $self->state('t1r');
47                 } elsif ($self->{state} eq 't1r') {
48                         $self->state('t1d1');
49                 } elsif ($self->{state} eq 't2') {
50                         $self->state('t2r');
51                 } elsif ($self->{state} eq 't2r') {
52                         $self->state('t2d1');
53                 }
54         } 
55 }
56
57 sub text
58 {
59         my ($self, $text) = @_;
60         $text =~ s/^[\s\r\n]+//g;
61         $text =~ s/[\s\r\n]+$//g;
62     print "$self->{state} text $text\n" if $self->{debug};      
63         if (length $text) {
64                 if ($self->{state} eq 'pre' && $text =~ /$self->{call}/i ) {
65                         $self->state('t1');
66                         $self->{addr} = "";
67                         $self->{laddr} = 0;
68                 } elsif ($self->{state} eq 't1d1') {
69                         $self->{dxcall} = $text;
70                         $self->state('t1d2');
71                 } elsif ($self->{state} eq 't1d2') {
72                         $self->{dxmgr} = $text;
73                         $self->state('t1d3');
74                 } elsif ($self->{state} eq 't1d3') {
75                         $self->{dxdate} = amdate($text);
76                         $self->state('t1d4');
77                 } elsif ($self->{state} eq 't1d4') {
78                         push @{$self->{list}}, "$self->{dxcall}|mgr|$self->{dxmgr}|$self->{dxdate}|$text";
79                         $self->state('t1e');
80                 } elsif ($self->{state} eq 't2d1') {
81                         $self->{dxcall} = $text;
82                         $self->state('t2d2');
83                 } elsif ($self->{state} eq 't2d2') {
84                         $self->{dxaddr} = $text;
85                         $self->state('t2d3');
86                 } elsif ($self->{state} eq 't2d3') {
87                         $self->{dxdate} = amdate($text);
88                         $self->state('t2d4');
89                 } elsif ($self->{state} eq 't2d4') {
90                         push @{$self->{list}}, "$self->{dxcall}|addr|$self->{dxaddr}|$self->{dxdate}|$text";
91                         $self->state('t2e');
92                 } elsif ($self->{state} eq 't2' && $text =~ /did\s+not\s+return/i) {
93                         $self->state('last');
94                 }
95         }
96 }
97
98 sub end
99 {
100         my ($self, $tag, $origtext) = @_;
101     print "$self->{state} /$tag\n" if $self->{debug};
102         if ($self->{state} =~ /^t1/ && $tag eq 'table') {
103                 $self->state('t2');
104         } elsif ($self->{state} =~ /^t2/ && $tag eq 'table') {
105                 $self->state('last');
106         }
107 }
108
109 sub amdate
110 {
111         my $text = shift;
112         my ($m, $d, $y) = split m{/}, $text;
113         $y += 1900;
114         $y += 100 if $y < 1990;
115         return sprintf "%02d-%s-%d", $d, (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$m-1], $y;
116 }
117
118 sub state
119 {
120         my $self = shift;
121         $self->{state} = shift if @_;
122         return $self->{state};
123 }
124
125 sub debug
126 {
127         my ($self, $val) = @_;
128         $self->{debug} = $val;
129 }
130
131 sub answer
132 {
133         my $self = shift;
134         return @{$self->{list}};
135 }
136
137 1;
138