First version of HostStats.
Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
parent
73a2d70f16
commit
9ccb915d77
|
@ -20,7 +20,7 @@ BEGIN {
|
||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
|
||||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
|
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
|
||||||
|
|
||||||
use DBI;
|
use DBI;
|
||||||
use Getopt::Long qw(GetOptions);
|
use Getopt::Long qw(GetOptions);
|
||||||
|
@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
|
||||||
# define types of information that can be gathered
|
# define types of information that can be gathered
|
||||||
# all / groups (/ clients / hosts)
|
# all / groups (/ clients / hosts)
|
||||||
my %LegalStats;
|
my %LegalStats;
|
||||||
@LegalStats{('all','groups')} = ();
|
@LegalStats{('all','groups','hosts')} = ();
|
||||||
|
|
||||||
################################# Main program #################################
|
################################# Main program #################################
|
||||||
|
|
||||||
|
@ -110,6 +110,7 @@ if ($Conf{'TLH'}) {
|
||||||
my $DBHandle = InitDB(\%Conf,1);
|
my $DBHandle = InitDB(\%Conf,1);
|
||||||
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
|
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
|
||||||
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
|
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
|
||||||
|
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
|
||||||
|
|
||||||
### get data for each month
|
### get data for each month
|
||||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
||||||
|
@ -121,6 +122,15 @@ foreach my $Month (&ListMonth($Period)) {
|
||||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||||
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
|
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
### HostStats
|
||||||
|
if ($OptStatsType eq 'all' or $OptStatsType eq 'hosts') {
|
||||||
|
# define known hosts using subdomains
|
||||||
|
my @KnownHosts = qw(aioe.org arcor-online.net arcor-ip.de news.astraweb.com read.cnntp.org easynews.com eternal-september.org
|
||||||
|
fernuni-hagen.de free.fr newsread.freenet.ag googlegroups.com
|
||||||
|
news.neostrada.pl newsdawg.com newscene.com news-service.com octanews.com xsnews.nl news.xs4all.nl);
|
||||||
|
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTest,$OptDebug,@KnownHosts);
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
### close handles
|
### close handles
|
||||||
|
@ -215,6 +225,126 @@ sub GroupStats {
|
||||||
};
|
};
|
||||||
### ----------------------------------------------------------------------------
|
### ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
sub HostStats {
|
||||||
|
### ----------------------------------------------------------------------------
|
||||||
|
### collect number of postings per server
|
||||||
|
### IN : $DBHandle : database handle
|
||||||
|
### $DBRaw : database table for raw data (to read from)
|
||||||
|
### $DBHosts : database table for hosts data (to write to)
|
||||||
|
### $Month : current month to do
|
||||||
|
### $Test : test mode
|
||||||
|
### $Debug : debug mode
|
||||||
|
### @KnownHosts : list of known hosts with subdomains
|
||||||
|
### OUT: (nothing)
|
||||||
|
my ($DBHandle,$DBRaw,$DBHosts,$Month,$Test,$Debug,@KnownHosts) = @_;
|
||||||
|
|
||||||
|
# define known hosts using subdomains
|
||||||
|
my %Postings;
|
||||||
|
|
||||||
|
# get raw header data from raw table for given month
|
||||||
|
my $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
|
||||||
|
"WHERE day LIKE ? AND NOT disregard",
|
||||||
|
$DBRaw));
|
||||||
|
$DBQuery->execute($Month.'-%')
|
||||||
|
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
|
||||||
|
"$DBI::errstr\n",$Month,$DBRaw));
|
||||||
|
|
||||||
|
### ----------------------------------------------
|
||||||
|
print "----- HostStats -----\n" if $Debug;
|
||||||
|
### parse headers
|
||||||
|
while (($_) = $DBQuery->fetchrow_array) {
|
||||||
|
my $Host;
|
||||||
|
my %Header = ParseHeaders(split(/\n/,$_));
|
||||||
|
|
||||||
|
# ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname
|
||||||
|
# Injection-Info
|
||||||
|
if($Header{'injection-info'}) {
|
||||||
|
($Host) = $Header{'injection-info'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+);/i;
|
||||||
|
# reset if IP address
|
||||||
|
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||||
|
}
|
||||||
|
# X-Trace
|
||||||
|
if (!$Host && $Header{'x-trace'}) {
|
||||||
|
(undef, $Host) = $Header{'x-trace'} =~ /^(\s|\d)*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
|
||||||
|
# reset if IP address
|
||||||
|
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||||
|
}
|
||||||
|
# NNTP-Posting-Host
|
||||||
|
if (!$Host && $Header{'nntp-posting-host'}) {
|
||||||
|
($Host) = $Header{'nntp-posting-host'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
|
||||||
|
# reset if IP address
|
||||||
|
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||||
|
}
|
||||||
|
# Path
|
||||||
|
if (!$Host) {
|
||||||
|
if ($Header{'path'} =~ /!([^!]+)!.POSTED!/) {
|
||||||
|
$Host = "$1";
|
||||||
|
} elsif ($Header{'path'} =~ /!.POSTED.([^!]+)!/) {
|
||||||
|
$Host = "$1";
|
||||||
|
} else {
|
||||||
|
# iterate on the Path: header until we have a host name or no more
|
||||||
|
# path elements
|
||||||
|
while (!$Host && $Header{'path'} =~ /!/) {
|
||||||
|
($Host) = $Header{'path'} =~ /!?([a-z0-9-_]+\.[a-z0-9-_.]+)![^!]+$/i;
|
||||||
|
undef($Host) if $Host && $Host =~ /\.MISMATCH/;
|
||||||
|
# remove last path element
|
||||||
|
$Header{'path'} =~ s/![^!]+$//;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# special cases
|
||||||
|
$Host = 'news.highwinds-media.com' if $Host =~ /fx\d\d\.\S{3}\.POSTED/
|
||||||
|
or $Host =~ /newsfe\d+\.(iad|ams2)/;
|
||||||
|
$Host = 'newshosting.com' if $Host =~ /post\d*\.iad/;
|
||||||
|
|
||||||
|
# trailing .POSTED
|
||||||
|
($Host) = $Host =~ /(\S+)\.POSTED$/ if $Host =~ /\.POSTED$/;
|
||||||
|
|
||||||
|
# normalize hosts
|
||||||
|
foreach (@KnownHosts) {
|
||||||
|
if ($Host =~ /\.$_$/) {
|
||||||
|
($Host) = $_ ;
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# lowercase
|
||||||
|
$Host = lc($Host);
|
||||||
|
|
||||||
|
# count host
|
||||||
|
if ($Host) {
|
||||||
|
$Postings{$Host}++;
|
||||||
|
} else {
|
||||||
|
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
|
||||||
|
}
|
||||||
|
|
||||||
|
# !DEBUG! printf("%s: %s\n", $Header{'message-id'}, $Host);
|
||||||
|
};
|
||||||
|
|
||||||
|
# delete old data for that month
|
||||||
|
if (!$Test) {
|
||||||
|
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
|
||||||
|
$DBHosts),undef,$Month)
|
||||||
|
or &Bleat(2,sprintf("Can't delete old hosts data for %s from %s: ".
|
||||||
|
"$DBI::errstr\n",$Month,$DBHosts));
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach my $Host (sort keys %Postings) {
|
||||||
|
print "$Host => $Postings{$Host}\n" if $Debug;
|
||||||
|
if (!$Test) {
|
||||||
|
# write to database
|
||||||
|
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
|
||||||
|
"(month,host,postings) ".
|
||||||
|
"VALUES (?, ?, ?)",$DBHosts));
|
||||||
|
$DBQuery->execute($Month, $Host, $Postings{$Host})
|
||||||
|
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
|
||||||
|
"$DBI::errstr\n",$Month,$Host,$DBHosts));
|
||||||
|
$DBQuery->finish;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
__END__
|
__END__
|
||||||
|
|
||||||
################################ Documentation #################################
|
################################ Documentation #################################
|
||||||
|
|
Loading…
Reference in a new issue