Refactor and fix TLH check.
Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
parent
3447cdabff
commit
995173456b
|
@ -137,7 +137,7 @@ foreach my $Month (&ListMonth($Period)) {
|
|||
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
|
||||
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
|
||||
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
|
||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -153,10 +153,10 @@ sub GroupStats {
|
|||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBGrps : database table for groups data (to write to)
|
||||
### $Month : current month to do
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $TLH : TLHs to collect
|
||||
### $Checkgroupsfile : filename template for checkgroups file
|
||||
### (expanded to $Checkgroupsfile-$Month)
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### OUT: (nothing)
|
||||
|
@ -251,12 +251,13 @@ sub HostStats {
|
|||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBHosts : database table for hosts data (to write to)
|
||||
### $Month : current month to do
|
||||
### $TLH : TLHs to collect
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### @KnownHosts : list of known hosts with subdomains
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
|
||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$TLH,$MID,$Test,$Debug,@KnownHosts) = @_;
|
||||
|
||||
my (%Postings,$DBQuery);
|
||||
|
||||
|
@ -281,16 +282,7 @@ sub HostStats {
|
|||
### parse headers
|
||||
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
||||
### skip postings with wrong TLH
|
||||
# remove whitespace from contents of Newsgroups:
|
||||
chomp($Newsgroups);
|
||||
$Newsgroups =~ s/\s//;
|
||||
my $GroupCount;
|
||||
for (split /,/, $Newsgroups) {
|
||||
# don't count newsgroup/hierarchy in wrong TLH
|
||||
next if($TLH and !/^$TLH/);
|
||||
$GroupCount++;
|
||||
};
|
||||
next if !$GroupCount;
|
||||
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
|
||||
|
||||
my $Host;
|
||||
my %Header = ParseHeaders(split(/\n/,$Headers));
|
||||
|
@ -380,6 +372,37 @@ sub HostStats {
|
|||
};
|
||||
};
|
||||
|
||||
sub CheckTLH {
|
||||
### ----------------------------------------------------------------------------
|
||||
### count newsgroups from legal TLH(s)
|
||||
### IN : $Newsgroups : comma separated list of newsgroups
|
||||
### $TLH : (reference to an array of) legal TLH(s)
|
||||
### OUT: number of newsgroups from legal TLH(s)
|
||||
my ($Newsgroups,$TLH) = @_;
|
||||
|
||||
my (@TLH,$GroupCount);
|
||||
|
||||
# fill @TLH from $TLH, which can be an array reference or a scalar value
|
||||
if (ref($TLH) eq 'ARRAY') {
|
||||
@TLH = @{$TLH};
|
||||
} else {
|
||||
push @TLH, $TLH;
|
||||
}
|
||||
|
||||
# remove whitespace from contents of Newsgroups:
|
||||
chomp($Newsgroups);
|
||||
$Newsgroups =~ s/\s//;
|
||||
for (split /,/, $Newsgroups) {
|
||||
my $Newsgroup = $_;
|
||||
foreach (@TLH) {
|
||||
# increment $GroupCount if $Newsgroup starts with $TLH
|
||||
$GroupCount++ if $Newsgroup =~ /^$_/;
|
||||
}
|
||||
};
|
||||
|
||||
return $GroupCount;
|
||||
}
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
NewsStats 0.4.0 (unreleased)
|
||||
* Reformat $Conf{TLH} for GroupStats only.
|
||||
* Extract TLH check from HostStats to subroutine, fix no-op check.
|
||||
|
||||
NewsStats 0.3.0 (2025-05-18)
|
||||
* Extract GroupStats (in gatherstats) to subroutine.
|
||||
|
|
Loading…
Reference in a new issue