From: Dirk Koopman Date: Wed, 18 Jan 2023 12:07:43 +0000 (+0000) Subject: add create_master_badip_files.pl X-Git-Url: http://dxcluster.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=81404bc642ad4ebddcfbf69aec1da3a90be75f20;p=spider.git add create_master_badip_files.pl This creates the (master) set of badip data files to be collected by other nodes looking at gb7djk. In other words this should only be used in one place. IE at GB7DJK http://www.dxcluster.org --- diff --git a/perl/create_master_badip_files.pl b/perl/create_master_badip_files.pl new file mode 100755 index 00000000..0a9f5ba6 --- /dev/null +++ b/perl/create_master_badip_files.pl @@ -0,0 +1,119 @@ +#!/usr/bin/env perl +# +# Get the TOR exit and relay lists from the net, extract the exit and relay +# node ip addresses and store them, one per line, in the standard places +# in /spider/local_data. +# + +use 5.16.1; + +# search local then perl directories +BEGIN { + # root of directory tree for this system + $root = "/spider"; + $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'}; + + mkdir "$root/local_data", 02777 unless -d "$root/local_data"; + + unshift @INC, "$root/perl"; # this IS the right way round! + unshift @INC, "$root/local"; + $data = "$root/data"; +} + +use strict; +use DXVars; +use SysVar; + +use DXDebug; +use DXUtil; + +use LWP::Simple; +use JSON; +use Date::Parse; +use File::Copy; + +DXDebug::dbginit(); + +$ENV{PERL_JSON_BACKEND} = "JSON::XS,JSON::PP"; + + +my $debug; + +if (@ARGV && $ARGV[0] eq '-x') { + shift; + $debug = 1; +} +my $url = "https://onionoo.torproject.org/details"; +my $relayfn = localdata('badip.torrelay'); +my $exitfn = localdata('badip.torexit'); + +my $last_seen_window = 10800; +my $content; + +if (@ARGV) { + local $/ = undef; + my $fn = shift; + open IN, $fn or die "$0 cannot open file $fn, $!"; + $content = ; + close IN; +} else { + $content = get($url) or die "$0: connect error on $url, $!\n"; +} + +die "No TOR content available $!\n" unless $content; + +my $l = length $content; +my $data = decode_json($content); +my $now = time; +my $ecount = 0; +my $rcount = 0; + +my $rand = rand; +open RELAY, ">$relayfn.$rand" or die "$0: cannot open $relayfn $!"; +open EXIT, ">$exitfn.$rand" or die "$0: cannot open $exitfn $1"; + +foreach my $e (@{$data->{relays}}) { + + my $seen = str2time($e->{last_seen}); + next unless $seen >= $now - $last_seen_window; + + my @or = clean_addr(@{$e->{or_addresses}}) if exists $e->{or_addresses}; + my @exit = clean_addr(@{$e->{exit_addresses}}) if exists $e->{exit_addresses} ; + my $ors = join ', ', @or; + my $es = join ', ', @exit; + dbg "$0: $e->{nickname} $e->{last_seen} relays: [$ors] exits: [$es]" if $debug; + for (@or) { + print RELAY "$_\n"; + ++$rcount; + } + for (@exit) { + print EXIT "$_\n"; + ++$ecount; + } +} + +close RELAY; +close EXIT; + +dbg("$0: $rcount relays $ecount exits found"); +move "$relayfn.$rand", $relayfn if $rcount; +move "$exitfn.$rand", $exitfn if $ecount; +unlink "$relayfn.$rand"; +unlink "$exitfn.$rand"; + +exit 0; + +sub clean_addr +{ + my @out; + foreach (@_) { + my ($ipv4) = /^((?:\d+\.){3}\d+)/; + if ($ipv4) { + push @out, $ipv4; + next; + } + my ($ipv6) = /^\[([:a-f\d]+)\]/; + push @out, $ipv6 if $ipv6; + } + return @out; +}