gatherstats: implement --hierarchy check.

Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
Thomas Hochstein 2025-05-11 12:11:28 +02:00
parent e40e96a1e2
commit ea493f3da0

View file

@ -263,26 +263,38 @@ sub HostStats {
if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
### ----------------------------------------------
print "----- HostStats -----\n" if $Debug;
### parse headers
while (($_) = $DBQuery->fetchrow_array) {
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
### skip postings with wrong TLH
# remove whitespace from contents of Newsgroups:
chomp($Newsgroups);
$Newsgroups =~ s/\s//;
my $GroupCount;
for (split /,/, $Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH
next if($TLH and !/^$TLH/);
$GroupCount++;
};
next if !$GroupCount;
my $Host;
my %Header = ParseHeaders(split(/\n/,$_));
my %Header = ParseHeaders(split(/\n/,$Headers));
# ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname
# Injection-Info
@ -425,9 +437,8 @@ override that default through the B<--groupsdb> option.
=item B<hosts> (postings from host per month)
B<gatherstats> will examine Injection-Info:, X-Trace: and Path:
headers and try to normalize them.
Filtering on I<TLH> is not yet implemented.
headers and try to normalize them. Groups not in I<TLH> will be
ignored.
Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can
override that default through the B<--hostsdb> option.