3 # an attempt at producing a general purpose 'bot' for going and getting
4 # things orf the web and presenting them to user in a form they want
6 # This program uses LWP::Parallel::UserAgent to do its business
8 # each sub bot has the same structure and calling interface, but the actual
9 # input and output data formats are completely arbitrary
11 # Copyright (c) 1999 - Dirk Koopman, Tobit Computer Co Ltd
21 # root of directory tree for this system
23 $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'};
25 unshift @INC, "$root/perl"; # this IS the right way round!
26 unshift @INC, "$root/local";
31 require LWP::Parallel::UserAgent;
41 use vars qw($version);
47 $SIG{INT} = $SIG{TERM} = 'IGNORE';
55 return 'Continue' if /100/;
56 return 'Switching protocols' if /101/;
59 return 'Created' if /201/;
60 return 'Accepted' if /202/;
61 return 'Non Authoritive' if /203/;
62 return 'No Content' if /204/;
63 return 'Reset Content' if /205/;
64 return 'Partial Content' if /206/;
66 return 'Multiple Choices' if /300/;
67 return 'Moved Permanently' if /301/;
68 return 'Found, redirect' if /302/;
69 return 'See Other' if /303/;
70 return 'Not modified' if /304/;
71 return 'Use proxy' if /305/;
73 return 'Bad request' if /400/;
74 return 'Unauthorized' if /401/;
75 return 'Payment required' if /402/;
76 return 'Forbidden' if /403/;
77 return 'Not Found' if /404/;
78 return 'Method not allowed' if /405/;
79 return 'Not acceptable' if /406/;
80 return 'Proxy authentication required' if /407/;
81 return 'Request timeout' if /408/;
82 return 'Conflict' if /409/;
83 return 'Gone' if /410/;
84 return 'Length required' if /411/;
85 return 'Precondition failed' if /412/;
86 return 'Request entity too large' if /413/;
87 return 'Request-URI too long' if /414/;
88 return 'Unsupported media type' if /415/;
89 return 'Requested range not satifiable' if /416/;
90 return 'Expectation failed' if /417/;
92 return 'Internal server error' if /500/;
93 return 'Not implemented' if /501/;
94 return 'Bad gateway' if /502/;
95 return 'Service unavailable' if /503/;
96 return 'Gateway timeout' if /504/;
97 return 'HTTP version not supported' if /505/;
106 $s =~ s/\b(?:THE|\&|A|AND|OR|NOT)\b//gi;
107 $s =~ s/(?:\(|\))//g;
108 return join('|', split(/\s+/, $s));
111 # qrz specific routines
114 my ($ua, $call, $title) = @_;
115 my $sreq = "http://www.qrz.com/callsign.html?callsign=$call";
117 my $req = HTTP::Request->new('GET', $sreq);
118 return $ua->register($req);
123 my ($fh, $call, $title, $code, $content) = @_;
125 print $fh "QRZ|$code|", trancode($code), "\n";
130 my $r = new QRZ $call;
134 my $l = length $content;
135 for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) {
140 my @lines = $r->answer;
142 print $fh "QRZ|$code|$_\n" if $_;
144 print "lines: ", scalar @lines, "\n";
147 # k4ute specific routines
150 my ($ua, $call, $title) = @_;
151 my $sreq = "http://no4j.com/nfdxa/qsl/index.asp?dx=$call";
153 my $req = HTTP::Request->new('GET', $sreq);
154 return $ua->register($req);
159 my ($fh, $call, $title, $code, $content) = @_;
161 print $fh "UTE|$code|", trancode($code), "\n";
166 my $r = new K4UTE $call;
170 my $l = length $content;
171 for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) {
176 my @lines = $r->answer;
178 print $fh "UTE|$code|$_\n" if $_;
180 print "lines: ", scalar @lines, "\n";
183 # buckmaster specific routines
186 my ($ua, $call, $title) = @_;
187 my $sreq = "http://www.buck.com/cgi-bin/do_hamcallexe";
189 my $req = HTTP::Request->new('POST', $sreq);
190 $req->add_content("entry=$call");
191 return $ua->register($req);
196 my ($fh, $call, $title, $code, $content) = @_;
198 print $fh "BCK|$code|", trancode($code), "\n";
203 my $r = new Buck $call;
207 my $l = length $content;
208 for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) {
213 my @lines = $r->answer;
215 print $fh "BCK|$code|$_\n" if $_;
217 print "lines: ", scalar @lines, "\n";
221 # this is what is called when an incoming request is taken
228 if (defined ($line = <$fh>)) {
229 $line =~ s/[\r\n]+$//g;
235 $line =~ s/^[^[A-Za-z0-9\|]]+//g;
237 my ($call, $title) = split /\|/, $line;
238 return if $call eq 'quit' || $call eq 'QUIT';
240 print "{A = '$call'";
241 print $title ? ", T = '$title'}\n" : "}\n";
243 my $ua = LWP::Parallel::UserAgent->new;
245 # set up various UA things
246 $ua->duplicates(0); # ignore duplicates
248 $ua->redirect(1); # follow 302 redirects
249 $ua->agent("DXSpider callbot $version");
252 my $art = uri_escape($call);
253 my $tit = uri_escape($title);
256 if ($res = req_qrz($ua, $art, $tit)) {
257 print $fh "QRZ|500\n";
260 if ($res = req_buck($ua, $art, $tit)) {
261 print $fh "BCK|500\n";
264 if ($res = req_ute($ua, $art, $tit)) {
265 print $fh "UTE|500\n";
268 # wait for all the results to come back
269 my $entries = $ua->wait();
271 for (keys %$entries) {
272 $res = $entries->{$_}->response;
273 my $uri = $res->request->url;
274 my $code = $res->code;
275 print "url: ", $uri, " code: ", $code, "\n";
277 # now parse each result
279 parse_qrz($fh, $call, $title, $code, $res->content), last if /www.qrz.com/i;
280 parse_buck($fh, $call, $title, $code, $res->content), last if /www.buck.com/i;
281 parse_ute($fh, $call, $title, $code, $res->content), last if /no4j.com/i;
288 $SIG{QUIT} = \&cease;
289 $SIG{HUP} = 'IGNORE';
290 STDOUT->autoflush(1);
292 my $server = new ForkingServer \&child;
294 $server->allow('.*');