Refactor and fix TLH check.
Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
parent
3447cdabff
commit
995173456b
|
@ -137,7 +137,7 @@ foreach my $Month (&ListMonth($Period)) {
|
||||||
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
|
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
|
||||||
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
|
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
|
||||||
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
|
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
|
||||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -153,10 +153,10 @@ sub GroupStats {
|
||||||
### $DBRaw : database table for raw data (to read from)
|
### $DBRaw : database table for raw data (to read from)
|
||||||
### $DBGrps : database table for groups data (to write to)
|
### $DBGrps : database table for groups data (to write to)
|
||||||
### $Month : current month to do
|
### $Month : current month to do
|
||||||
### $MID : specific Message-ID to fetch (testing purposes)
|
|
||||||
### $TLH : TLHs to collect
|
### $TLH : TLHs to collect
|
||||||
### $Checkgroupsfile : filename template for checkgroups file
|
### $Checkgroupsfile : filename template for checkgroups file
|
||||||
### (expanded to $Checkgroupsfile-$Month)
|
### (expanded to $Checkgroupsfile-$Month)
|
||||||
|
### $MID : specific Message-ID to fetch (testing purposes)
|
||||||
### $Test : test mode
|
### $Test : test mode
|
||||||
### $Debug : debug mode
|
### $Debug : debug mode
|
||||||
### OUT: (nothing)
|
### OUT: (nothing)
|
||||||
|
@ -251,12 +251,13 @@ sub HostStats {
|
||||||
### $DBRaw : database table for raw data (to read from)
|
### $DBRaw : database table for raw data (to read from)
|
||||||
### $DBHosts : database table for hosts data (to write to)
|
### $DBHosts : database table for hosts data (to write to)
|
||||||
### $Month : current month to do
|
### $Month : current month to do
|
||||||
|
### $TLH : TLHs to collect
|
||||||
### $MID : specific Message-ID to fetch (testing purposes)
|
### $MID : specific Message-ID to fetch (testing purposes)
|
||||||
### $Test : test mode
|
### $Test : test mode
|
||||||
### $Debug : debug mode
|
### $Debug : debug mode
|
||||||
### @KnownHosts : list of known hosts with subdomains
|
### @KnownHosts : list of known hosts with subdomains
|
||||||
### OUT: (nothing)
|
### OUT: (nothing)
|
||||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
|
my ($DBHandle,$DBRaw,$DBHosts,$Month,$TLH,$MID,$Test,$Debug,@KnownHosts) = @_;
|
||||||
|
|
||||||
my (%Postings,$DBQuery);
|
my (%Postings,$DBQuery);
|
||||||
|
|
||||||
|
@ -281,16 +282,7 @@ sub HostStats {
|
||||||
### parse headers
|
### parse headers
|
||||||
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
||||||
### skip postings with wrong TLH
|
### skip postings with wrong TLH
|
||||||
# remove whitespace from contents of Newsgroups:
|
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
|
||||||
chomp($Newsgroups);
|
|
||||||
$Newsgroups =~ s/\s//;
|
|
||||||
my $GroupCount;
|
|
||||||
for (split /,/, $Newsgroups) {
|
|
||||||
# don't count newsgroup/hierarchy in wrong TLH
|
|
||||||
next if($TLH and !/^$TLH/);
|
|
||||||
$GroupCount++;
|
|
||||||
};
|
|
||||||
next if !$GroupCount;
|
|
||||||
|
|
||||||
my $Host;
|
my $Host;
|
||||||
my %Header = ParseHeaders(split(/\n/,$Headers));
|
my %Header = ParseHeaders(split(/\n/,$Headers));
|
||||||
|
@ -380,6 +372,37 @@ sub HostStats {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
sub CheckTLH {
|
||||||
|
### ----------------------------------------------------------------------------
|
||||||
|
### count newsgroups from legal TLH(s)
|
||||||
|
### IN : $Newsgroups : comma separated list of newsgroups
|
||||||
|
### $TLH : (reference to an array of) legal TLH(s)
|
||||||
|
### OUT: number of newsgroups from legal TLH(s)
|
||||||
|
my ($Newsgroups,$TLH) = @_;
|
||||||
|
|
||||||
|
my (@TLH,$GroupCount);
|
||||||
|
|
||||||
|
# fill @TLH from $TLH, which can be an array reference or a scalar value
|
||||||
|
if (ref($TLH) eq 'ARRAY') {
|
||||||
|
@TLH = @{$TLH};
|
||||||
|
} else {
|
||||||
|
push @TLH, $TLH;
|
||||||
|
}
|
||||||
|
|
||||||
|
# remove whitespace from contents of Newsgroups:
|
||||||
|
chomp($Newsgroups);
|
||||||
|
$Newsgroups =~ s/\s//;
|
||||||
|
for (split /,/, $Newsgroups) {
|
||||||
|
my $Newsgroup = $_;
|
||||||
|
foreach (@TLH) {
|
||||||
|
# increment $GroupCount if $Newsgroup starts with $TLH
|
||||||
|
$GroupCount++ if $Newsgroup =~ /^$_/;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return $GroupCount;
|
||||||
|
}
|
||||||
|
|
||||||
__END__
|
__END__
|
||||||
|
|
||||||
################################ Documentation #################################
|
################################ Documentation #################################
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
NewsStats 0.4.0 (unreleased)
|
NewsStats 0.4.0 (unreleased)
|
||||||
* Reformat $Conf{TLH} for GroupStats only.
|
* Reformat $Conf{TLH} for GroupStats only.
|
||||||
|
* Extract TLH check from HostStats to subroutine, fix no-op check.
|
||||||
|
|
||||||
NewsStats 0.3.0 (2025-05-18)
|
NewsStats 0.3.0 (2025-05-18)
|
||||||
* Extract GroupStats (in gatherstats) to subroutine.
|
* Extract GroupStats (in gatherstats) to subroutine.
|
||||||
|
|
Loading…
Reference in a new issue