Factor Groupstats() out from gatherstats main.

Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
Thomas Hochstein 2025-05-10 21:27:40 +02:00
parent a31e86444a
commit 0a0e615ede

View file

@ -7,7 +7,7 @@
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
@ -108,6 +108,8 @@ if ($Conf{'TLH'}) {
### init database
my $DBHandle = InitDB(\%Conf,1);
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
### get data for each month
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
@ -115,90 +117,104 @@ foreach my $Month (&ListMonth($Period)) {
print "---------- $Month ----------\n" if $OptDebug;
### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
# read list of newsgroups from --checkgroups
# into a hash
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
if $OptCheckgroupsFile;
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
"WHERE day LIKE ? AND NOT disregard",
$Conf{'DBDatabase'},
$Conf{'DBTableRaw'}));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
# count postings per group
my %Postings;
while (($_) = $DBQuery->fetchrow_array) {
# get list of newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_,$TLH,
$OptCheckgroupsFile ? \%ValidGroups : '');
# count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) {
$Postings{$_}++;
};
};
# add valid but empty groups if --checkgroups is set
if (%ValidGroups) {
foreach (sort keys %ValidGroups) {
if (!defined($Postings{$_})) {
# add current newsgroup as empty group
$Postings{$_} = 0;
warn (sprintf("ADDED: %s as empty group\n",$_));
# add empty hierarchies for current newsgroup as needed
foreach (ParseHierarchies($_)) {
my $Hierarchy = $_ . '.ALL';
if (!defined($Postings{$Hierarchy})) {
$Postings{$Hierarchy} = 0;
warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
};
};
}
};
};
# delete old data for that month
if (!$OptTest) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
undef,$Month)
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
};
print "----- GroupStats -----\n" if $OptDebug;
foreach my $Newsgroup (sort keys %Postings) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
if (!$OptTest) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
"(month,newsgroup,postings) ".
"VALUES (?, ?, ?)",
$Conf{'DBDatabase'},
$Conf{'DBTableGrps'}));
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
"$DBI::errstr\n",$Month,$Newsgroup,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
$DBQuery->finish;
};
};
} else {
# other types of information go here - later on
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
};
};
### close handles
$DBHandle->disconnect;
################################# Subroutines ##################################
sub GroupStats {
### ----------------------------------------------------------------------------
### collect number of postings per group
### IN : $DBHandle : database handle
### $DBRaw : database table for raw data (to read from)
### $DBGrps : database table for groups data (to write to)
### $Month : current month to do
### $TLH : TLHs to collect
### $Checkgroupsfile : filename template for checkgroups file
### (expanded to $Checkgroupsfile-$Month)
### $Test : test mode
### $Debug : debug mode
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$Test,$Debug) = @_;
# read list of newsgroups from --checkgroups
# into a hash
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))}
if $CheckgroupsFile;
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$Month,
$DBRaw));
# count postings per group
my %Postings;
while (($_) = $DBQuery->fetchrow_array) {
# get list of newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_,$TLH,
$CheckgroupsFile ? \%ValidGroups : '');
# count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) {
$Postings{$_}++;
};
};
# add valid but empty groups if --checkgroups is set
if (%ValidGroups) {
foreach (sort keys %ValidGroups) {
if (!defined($Postings{$_})) {
# add current newsgroup as empty group
$Postings{$_} = 0;
warn (sprintf("ADDED: %s as empty group\n",$_));
# add empty hierarchies for current newsgroup as needed
foreach (ParseHierarchies($_)) {
my $Hierarchy = $_ . '.ALL';
if (!defined($Postings{$Hierarchy})) {
$Postings{$Hierarchy} = 0;
warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
};
};
}
};
};
# delete old data for that month
if (!$Test) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
$DBGrps), undef,$Month)
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBGrps));
};
print "----- GroupStats -----\n" if $Debug;
foreach my $Newsgroup (sort keys %Postings) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $Debug;
if (!$Test) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
"(month,newsgroup,postings) ".
"VALUES (?, ?, ?)",$DBGrps));
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
"$DBI::errstr\n",$Month,$Newsgroup,$DBGrps));
$DBQuery->finish;
};
};
};
### ----------------------------------------------------------------------------
__END__
################################ Documentation #################################