3 # Get the TOR exit and relay lists from the net, extract the exit and relay
4 # node ip addresses and store them, one per line, in the standard places
5 # in /spider/local_data.
10 # search local then perl directories
14 # root of directory tree for this system
15 our $root = "/spider";
16 $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'};
18 mkdir "$root/local_data", 02777 unless -d "$root/local_data";
20 unshift @INC, "$root/perl"; # this IS the right way round!
21 unshift @INC, "$root/local";
22 our $data = "$root/data";
40 $ENV{PERL_JSON_BACKEND} = "JSON::XS,JSON::PP";
45 if (@ARGV && $ARGV[0] eq '-x') {
49 my $url = "https://onionoo.torproject.org/details";
50 my $relayfn = localdata('badip.torrelay');
51 my $exitfn = localdata('badip.torexit');
53 my $last_seen_window = 10800;
59 open IN, $fn or die "$0 cannot open file $fn, $!";
63 $content = get($url) or die "$0: connect error on $url, $!\n";
66 die "No TOR content available $!\n" unless $content;
68 my $l = length $content;
69 my $data = decode_json($content);
76 open RELAY, ">$relayfn.$rand" or die "$0: cannot open $relayfn $!";
77 open EXIT, ">$exitfn.$rand" or die "$0: cannot open $exitfn $1";
79 foreach my $e (@{$data->{relays}}) {
81 my $seen = str2time($e->{last_seen});
82 next unless $seen >= $now - $last_seen_window;
84 my @exit = clean_addr(@{$e->{exit_addresses}}) if exists $e->{exit_addresses} ;
85 my @or = clean_addr(@{$e->{or_addresses}}) if exists $e->{or_addresses};
86 my $ors = join ', ', @or;
87 my $es = join ', ', @exit;
88 dbg "$0: $e->{nickname} $e->{last_seen} relays: [$ors] exits: [$es]" if $debug;
112 dbg("$0: $rcount relays $ecount exits $error error(s) found.");
113 move "$relayfn.$rand", $relayfn if $rcount;
114 move "$exitfn.$rand", $exitfn if $ecount;
115 unlink "$relayfn.$rand";
116 unlink "$exitfn.$rand";
126 my ($ipv4) = /^((?:\d+\.){3}\d+)/;
128 next if exists $addr{$ipv4};
133 my ($ipv6) = /^\[([:a-f\d]+)\]/;
135 next if exists $addr{$ipv6};