diff --git a/bin/gatherstats.pl b/bin/gatherstats.pl index 60043d1..d360541 100755 --- a/bin/gatherstats.pl +++ b/bin/gatherstats.pl @@ -7,7 +7,7 @@ # # It is part of the NewsStats package. # -# Copyright (c) 2010-2013 Thomas Hochstein +# Copyright (c) 2010-2013, 2025 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -108,6 +108,8 @@ if ($Conf{'TLH'}) { ### init database my $DBHandle = InitDB(\%Conf,1); +my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); +my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); ### get data for each month &Bleat(1,'Test mode. Database is not updated.') if $OptTest; @@ -115,90 +117,104 @@ foreach my $Month (&ListMonth($Period)) { print "---------- $Month ----------\n" if $OptDebug; + ### GroupStats if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') { - # read list of newsgroups from --checkgroups - # into a hash - my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))} - if $OptCheckgroupsFile; - - ### ---------------------------------------------- - ### get groups data (number of postings per group) - # get groups data from raw table for given month - my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ". - "WHERE day LIKE ? AND NOT disregard", - $Conf{'DBDatabase'}, - $Conf{'DBTableRaw'})); - $DBQuery->execute($Month.'-%') - or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". - "$DBI::errstr\n",$Month, - $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - - # count postings per group - my %Postings; - while (($_) = $DBQuery->fetchrow_array) { - # get list of newsgroups and hierarchies from Newsgroups: - my %Newsgroups = ListNewsgroups($_,$TLH, - $OptCheckgroupsFile ? \%ValidGroups : ''); - # count each newsgroup and hierarchy once - foreach (sort keys %Newsgroups) { - $Postings{$_}++; - }; - }; - - # add valid but empty groups if --checkgroups is set - if (%ValidGroups) { - foreach (sort keys %ValidGroups) { - if (!defined($Postings{$_})) { - # add current newsgroup as empty group - $Postings{$_} = 0; - warn (sprintf("ADDED: %s as empty group\n",$_)); - # add empty hierarchies for current newsgroup as needed - foreach (ParseHierarchies($_)) { - my $Hierarchy = $_ . '.ALL'; - if (!defined($Postings{$Hierarchy})) { - $Postings{$Hierarchy} = 0; - warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); - }; - }; - } - }; - }; - - # delete old data for that month - if (!$OptTest) { - $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", - $Conf{'DBDatabase'},$Conf{'DBTableGrps'}), - undef,$Month) - or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ". - "$DBI::errstr\n",$Month, - $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - }; - - print "----- GroupStats -----\n" if $OptDebug; - foreach my $Newsgroup (sort keys %Postings) { - print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug; - if (!$OptTest) { - # write to database - $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ". - "(month,newsgroup,postings) ". - "VALUES (?, ?, ?)", - $Conf{'DBDatabase'}, - $Conf{'DBTableGrps'})); - $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) - or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ". - "$DBI::errstr\n",$Month,$Newsgroup, - $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->finish; - }; - }; - } else { - # other types of information go here - later on + &GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug); }; }; ### close handles $DBHandle->disconnect; +################################# Subroutines ################################## + +sub GroupStats { +### ---------------------------------------------------------------------------- +### collect number of postings per group +### IN : $DBHandle : database handle +### $DBRaw : database table for raw data (to read from) +### $DBGrps : database table for groups data (to write to) +### $Month : current month to do +### $TLH : TLHs to collect +### $Checkgroupsfile : filename template for checkgroups file +### (expanded to $Checkgroupsfile-$Month) +### $Test : test mode +### $Debug : debug mode +### OUT: (nothing) + my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$Test,$Debug) = @_; + + # read list of newsgroups from --checkgroups + # into a hash + my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))} + if $CheckgroupsFile; + + ### ---------------------------------------------- + ### get groups data (number of postings per group) + # get groups data from raw table for given month + my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ". + "WHERE day LIKE ? AND NOT disregard", + $DBRaw)); + $DBQuery->execute($Month.'-%') + or &Bleat(2,sprintf("Can't get groups data for %s from %s: ". + "$DBI::errstr\n",$Month, + $DBRaw)); + + # count postings per group + my %Postings; + while (($_) = $DBQuery->fetchrow_array) { + # get list of newsgroups and hierarchies from Newsgroups: + my %Newsgroups = ListNewsgroups($_,$TLH, + $CheckgroupsFile ? \%ValidGroups : ''); + # count each newsgroup and hierarchy once + foreach (sort keys %Newsgroups) { + $Postings{$_}++; + }; + }; + + # add valid but empty groups if --checkgroups is set + if (%ValidGroups) { + foreach (sort keys %ValidGroups) { + if (!defined($Postings{$_})) { + # add current newsgroup as empty group + $Postings{$_} = 0; + warn (sprintf("ADDED: %s as empty group\n",$_)); + # add empty hierarchies for current newsgroup as needed + foreach (ParseHierarchies($_)) { + my $Hierarchy = $_ . '.ALL'; + if (!defined($Postings{$Hierarchy})) { + $Postings{$Hierarchy} = 0; + warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); + }; + }; + } + }; + }; + + # delete old data for that month + if (!$Test) { + $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?", + $DBGrps), undef,$Month) + or &Bleat(2,sprintf("Can't delete old groups data for %s from %s: ". + "$DBI::errstr\n",$Month,$DBGrps)); + }; + + print "----- GroupStats -----\n" if $Debug; + foreach my $Newsgroup (sort keys %Postings) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $Debug; + if (!$Test) { + # write to database + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ". + "(month,newsgroup,postings) ". + "VALUES (?, ?, ?)",$DBGrps)); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) + or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ". + "$DBI::errstr\n",$Month,$Newsgroup,$DBGrps)); + $DBQuery->finish; + }; + }; +}; +### ---------------------------------------------------------------------------- + __END__ ################################ Documentation #################################