diff --git a/bin/gatherstats.pl b/bin/gatherstats.pl index 8359b8a..83f2ee9 100755 --- a/bin/gatherstats.pl +++ b/bin/gatherstats.pl @@ -263,26 +263,38 @@ sub HostStats { if (!$MID) { # get raw header data from raw table for given month - $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ". + $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ". "WHERE day LIKE ? AND NOT disregard", $DBRaw)); $DBQuery->execute($Month.'-%') - or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ". + or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ". "$DBI::errstr\n",$Month,$DBRaw)); } else { - $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ". + $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ". "WHERE mid = ?", $DBRaw)); $DBQuery->execute($MID) - or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ". + or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ". "$DBI::errstr\n",$MID,$DBRaw)); } ### ---------------------------------------------- print "----- HostStats -----\n" if $Debug; ### parse headers - while (($_) = $DBQuery->fetchrow_array) { + while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) { + ### skip postings with wrong TLH + # remove whitespace from contents of Newsgroups: + chomp($Newsgroups); + $Newsgroups =~ s/\s//; + my $GroupCount; + for (split /,/, $Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if($TLH and !/^$TLH/); + $GroupCount++; + }; + next if !$GroupCount; + my $Host; - my %Header = ParseHeaders(split(/\n/,$_)); + my %Header = ParseHeaders(split(/\n/,$Headers)); # ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname # Injection-Info @@ -425,9 +437,8 @@ override that default through the B<--groupsdb> option. =item B (postings from host per month) B will examine Injection-Info:, X-Trace: and Path: -headers and try to normalize them. - -Filtering on I is not yet implemented. +headers and try to normalize them. Groups not in I will be +ignored. Data is written to I (see L); you can override that default through the B<--hostsdb> option.