First version of HostStats.
Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
parent
73a2d70f16
commit
9ccb915d77
|
@ -20,7 +20,7 @@ BEGIN {
|
|||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
|
||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
|
@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
|
|||
# define types of information that can be gathered
|
||||
# all / groups (/ clients / hosts)
|
||||
my %LegalStats;
|
||||
@LegalStats{('all','groups')} = ();
|
||||
@LegalStats{('all','groups','hosts')} = ();
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
|
@ -110,6 +110,7 @@ if ($Conf{'TLH'}) {
|
|||
my $DBHandle = InitDB(\%Conf,1);
|
||||
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
|
||||
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
|
||||
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
|
||||
|
||||
### get data for each month
|
||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
||||
|
@ -121,6 +122,15 @@ foreach my $Month (&ListMonth($Period)) {
|
|||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
|
||||
};
|
||||
|
||||
### HostStats
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'hosts') {
|
||||
# define known hosts using subdomains
|
||||
my @KnownHosts = qw(aioe.org arcor-online.net arcor-ip.de news.astraweb.com read.cnntp.org easynews.com eternal-september.org
|
||||
fernuni-hagen.de free.fr newsread.freenet.ag googlegroups.com
|
||||
news.neostrada.pl newsdawg.com newscene.com news-service.com octanews.com xsnews.nl news.xs4all.nl);
|
||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTest,$OptDebug,@KnownHosts);
|
||||
};
|
||||
};
|
||||
|
||||
### close handles
|
||||
|
@ -215,6 +225,126 @@ sub GroupStats {
|
|||
};
|
||||
### ----------------------------------------------------------------------------
|
||||
|
||||
sub HostStats {
|
||||
### ----------------------------------------------------------------------------
|
||||
### collect number of postings per server
|
||||
### IN : $DBHandle : database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBHosts : database table for hosts data (to write to)
|
||||
### $Month : current month to do
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### @KnownHosts : list of known hosts with subdomains
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$Test,$Debug,@KnownHosts) = @_;
|
||||
|
||||
# define known hosts using subdomains
|
||||
my %Postings;
|
||||
|
||||
# get raw header data from raw table for given month
|
||||
my $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$DBRaw));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,$DBRaw));
|
||||
|
||||
### ----------------------------------------------
|
||||
print "----- HostStats -----\n" if $Debug;
|
||||
### parse headers
|
||||
while (($_) = $DBQuery->fetchrow_array) {
|
||||
my $Host;
|
||||
my %Header = ParseHeaders(split(/\n/,$_));
|
||||
|
||||
# ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname
|
||||
# Injection-Info
|
||||
if($Header{'injection-info'}) {
|
||||
($Host) = $Header{'injection-info'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+);/i;
|
||||
# reset if IP address
|
||||
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||
}
|
||||
# X-Trace
|
||||
if (!$Host && $Header{'x-trace'}) {
|
||||
(undef, $Host) = $Header{'x-trace'} =~ /^(\s|\d)*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
|
||||
# reset if IP address
|
||||
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||
}
|
||||
# NNTP-Posting-Host
|
||||
if (!$Host && $Header{'nntp-posting-host'}) {
|
||||
($Host) = $Header{'nntp-posting-host'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
|
||||
# reset if IP address
|
||||
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||
}
|
||||
# Path
|
||||
if (!$Host) {
|
||||
if ($Header{'path'} =~ /!([^!]+)!.POSTED!/) {
|
||||
$Host = "$1";
|
||||
} elsif ($Header{'path'} =~ /!.POSTED.([^!]+)!/) {
|
||||
$Host = "$1";
|
||||
} else {
|
||||
# iterate on the Path: header until we have a host name or no more
|
||||
# path elements
|
||||
while (!$Host && $Header{'path'} =~ /!/) {
|
||||
($Host) = $Header{'path'} =~ /!?([a-z0-9-_]+\.[a-z0-9-_.]+)![^!]+$/i;
|
||||
undef($Host) if $Host && $Host =~ /\.MISMATCH/;
|
||||
# remove last path element
|
||||
$Header{'path'} =~ s/![^!]+$//;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
# special cases
|
||||
$Host = 'news.highwinds-media.com' if $Host =~ /fx\d\d\.\S{3}\.POSTED/
|
||||
or $Host =~ /newsfe\d+\.(iad|ams2)/;
|
||||
$Host = 'newshosting.com' if $Host =~ /post\d*\.iad/;
|
||||
|
||||
# trailing .POSTED
|
||||
($Host) = $Host =~ /(\S+)\.POSTED$/ if $Host =~ /\.POSTED$/;
|
||||
|
||||
# normalize hosts
|
||||
foreach (@KnownHosts) {
|
||||
if ($Host =~ /\.$_$/) {
|
||||
($Host) = $_ ;
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# lowercase
|
||||
$Host = lc($Host);
|
||||
|
||||
# count host
|
||||
if ($Host) {
|
||||
$Postings{$Host}++;
|
||||
} else {
|
||||
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
|
||||
}
|
||||
|
||||
# !DEBUG! printf("%s: %s\n", $Header{'message-id'}, $Host);
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$Test) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
|
||||
$DBHosts),undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old hosts data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBHosts));
|
||||
};
|
||||
|
||||
foreach my $Host (sort keys %Postings) {
|
||||
print "$Host => $Postings{$Host}\n" if $Debug;
|
||||
if (!$Test) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
|
||||
"(month,host,postings) ".
|
||||
"VALUES (?, ?, ?)",$DBHosts));
|
||||
$DBQuery->execute($Month, $Host, $Postings{$Host})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
|
||||
"$DBI::errstr\n",$Month,$Host,$DBHosts));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
|
Loading…
Reference in a new issue