Factor Groupstats() out from gatherstats main.
Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
		
							parent
							
								
									a31e86444a
								
							
						
					
					
						commit
						0a0e615ede
					
				
					 1 changed files with 94 additions and 78 deletions
				
			
		| 
						 | 
				
			
			@ -7,7 +7,7 @@
 | 
			
		|||
#
 | 
			
		||||
# It is part of the NewsStats package.
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
 | 
			
		||||
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
 | 
			
		||||
#
 | 
			
		||||
# It can be redistributed and/or modified under the same terms under
 | 
			
		||||
# which Perl itself is published.
 | 
			
		||||
| 
						 | 
				
			
			@ -108,6 +108,8 @@ if ($Conf{'TLH'}) {
 | 
			
		|||
 | 
			
		||||
### init database
 | 
			
		||||
my $DBHandle = InitDB(\%Conf,1);
 | 
			
		||||
my $DBRaw    = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
 | 
			
		||||
my $DBGrps   = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
 | 
			
		||||
 | 
			
		||||
### get data for each month
 | 
			
		||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
 | 
			
		||||
| 
						 | 
				
			
			@ -115,90 +117,104 @@ foreach my $Month (&ListMonth($Period)) {
 | 
			
		|||
 | 
			
		||||
  print "---------- $Month ----------\n" if $OptDebug;
 | 
			
		||||
 | 
			
		||||
  ### GroupStats
 | 
			
		||||
  if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
 | 
			
		||||
    # read list of newsgroups from --checkgroups
 | 
			
		||||
    # into a hash
 | 
			
		||||
    my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
 | 
			
		||||
      if $OptCheckgroupsFile;
 | 
			
		||||
 | 
			
		||||
    ### ----------------------------------------------
 | 
			
		||||
    ### get groups data (number of postings per group)
 | 
			
		||||
    # get groups data from raw table for given month
 | 
			
		||||
    my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
 | 
			
		||||
                                             "WHERE day LIKE ? AND NOT disregard",
 | 
			
		||||
                                             $Conf{'DBDatabase'},
 | 
			
		||||
                                             $Conf{'DBTableRaw'}));
 | 
			
		||||
    $DBQuery->execute($Month.'-%')
 | 
			
		||||
      or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
 | 
			
		||||
                          "$DBI::errstr\n",$Month,
 | 
			
		||||
                          $Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
 | 
			
		||||
 | 
			
		||||
    # count postings per group
 | 
			
		||||
    my %Postings;
 | 
			
		||||
    while (($_) = $DBQuery->fetchrow_array) {
 | 
			
		||||
      # get list of newsgroups and hierarchies from Newsgroups:
 | 
			
		||||
      my %Newsgroups = ListNewsgroups($_,$TLH,
 | 
			
		||||
                                      $OptCheckgroupsFile ? \%ValidGroups : '');
 | 
			
		||||
      # count each newsgroup and hierarchy once
 | 
			
		||||
      foreach (sort keys %Newsgroups) {
 | 
			
		||||
        $Postings{$_}++;
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    # add valid but empty groups if --checkgroups is set
 | 
			
		||||
    if (%ValidGroups) {
 | 
			
		||||
      foreach (sort keys %ValidGroups) {
 | 
			
		||||
        if (!defined($Postings{$_})) {
 | 
			
		||||
          # add current newsgroup as empty group
 | 
			
		||||
          $Postings{$_} = 0;
 | 
			
		||||
          warn (sprintf("ADDED: %s as empty group\n",$_));
 | 
			
		||||
          # add empty hierarchies for current newsgroup as needed
 | 
			
		||||
          foreach (ParseHierarchies($_)) {
 | 
			
		||||
            my $Hierarchy = $_ . '.ALL';
 | 
			
		||||
            if (!defined($Postings{$Hierarchy})) {
 | 
			
		||||
              $Postings{$Hierarchy} = 0;
 | 
			
		||||
              warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
 | 
			
		||||
            };
 | 
			
		||||
          };
 | 
			
		||||
        }
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    # delete old data for that month
 | 
			
		||||
    if (!$OptTest) {
 | 
			
		||||
      $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
 | 
			
		||||
                                       $Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
 | 
			
		||||
                                       undef,$Month)
 | 
			
		||||
        or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
 | 
			
		||||
                            "$DBI::errstr\n",$Month,
 | 
			
		||||
                            $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    print "----- GroupStats -----\n" if $OptDebug;
 | 
			
		||||
    foreach my $Newsgroup (sort keys %Postings) {
 | 
			
		||||
      print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
 | 
			
		||||
      if (!$OptTest) {
 | 
			
		||||
        # write to database
 | 
			
		||||
        $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
 | 
			
		||||
                                              "(month,newsgroup,postings) ".
 | 
			
		||||
                                              "VALUES (?, ?, ?)",
 | 
			
		||||
                                              $Conf{'DBDatabase'},
 | 
			
		||||
                                              $Conf{'DBTableGrps'}));
 | 
			
		||||
        $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
 | 
			
		||||
          or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
 | 
			
		||||
                              "$DBI::errstr\n",$Month,$Newsgroup,
 | 
			
		||||
                              $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
 | 
			
		||||
        $DBQuery->finish;
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
  } else {
 | 
			
		||||
    # other types of information go here - later on
 | 
			
		||||
    &GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
### close handles
 | 
			
		||||
$DBHandle->disconnect;
 | 
			
		||||
 | 
			
		||||
################################# Subroutines ##################################
 | 
			
		||||
 | 
			
		||||
sub GroupStats {
 | 
			
		||||
### ----------------------------------------------------------------------------
 | 
			
		||||
### collect number of postings per group
 | 
			
		||||
### IN : $DBHandle         : database handle
 | 
			
		||||
###      $DBRaw            : database table for raw data (to read from)
 | 
			
		||||
###      $DBGrps           : database table for groups data (to write to)
 | 
			
		||||
###      $Month            : current month to do
 | 
			
		||||
###      $TLH              : TLHs to collect
 | 
			
		||||
###      $Checkgroupsfile  : filename template for checkgroups file
 | 
			
		||||
###                          (expanded to $Checkgroupsfile-$Month)
 | 
			
		||||
###      $Test             : test mode
 | 
			
		||||
###      $Debug            : debug mode
 | 
			
		||||
### OUT: (nothing)
 | 
			
		||||
  my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$Test,$Debug) = @_;
 | 
			
		||||
 | 
			
		||||
  # read list of newsgroups from --checkgroups
 | 
			
		||||
  # into a hash
 | 
			
		||||
  my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))}
 | 
			
		||||
    if $CheckgroupsFile;
 | 
			
		||||
 | 
			
		||||
  ### ----------------------------------------------
 | 
			
		||||
  ### get groups data (number of postings per group)
 | 
			
		||||
  # get groups data from raw table for given month
 | 
			
		||||
  my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
 | 
			
		||||
                                           "WHERE day LIKE ? AND NOT disregard",
 | 
			
		||||
                                           $DBRaw));
 | 
			
		||||
  $DBQuery->execute($Month.'-%')
 | 
			
		||||
    or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
 | 
			
		||||
                        "$DBI::errstr\n",$Month,
 | 
			
		||||
                        $DBRaw));
 | 
			
		||||
 | 
			
		||||
  # count postings per group
 | 
			
		||||
  my %Postings;
 | 
			
		||||
  while (($_) = $DBQuery->fetchrow_array) {
 | 
			
		||||
    # get list of newsgroups and hierarchies from Newsgroups:
 | 
			
		||||
    my %Newsgroups = ListNewsgroups($_,$TLH,
 | 
			
		||||
                                    $CheckgroupsFile ? \%ValidGroups : '');
 | 
			
		||||
    # count each newsgroup and hierarchy once
 | 
			
		||||
    foreach (sort keys %Newsgroups) {
 | 
			
		||||
      $Postings{$_}++;
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  # add valid but empty groups if --checkgroups is set
 | 
			
		||||
  if (%ValidGroups) {
 | 
			
		||||
    foreach (sort keys %ValidGroups) {
 | 
			
		||||
      if (!defined($Postings{$_})) {
 | 
			
		||||
        # add current newsgroup as empty group
 | 
			
		||||
        $Postings{$_} = 0;
 | 
			
		||||
        warn (sprintf("ADDED: %s as empty group\n",$_));
 | 
			
		||||
        # add empty hierarchies for current newsgroup as needed
 | 
			
		||||
        foreach (ParseHierarchies($_)) {
 | 
			
		||||
          my $Hierarchy = $_ . '.ALL';
 | 
			
		||||
          if (!defined($Postings{$Hierarchy})) {
 | 
			
		||||
            $Postings{$Hierarchy} = 0;
 | 
			
		||||
            warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
 | 
			
		||||
          };
 | 
			
		||||
        };
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  # delete old data for that month
 | 
			
		||||
  if (!$Test) {
 | 
			
		||||
    $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
 | 
			
		||||
                                     $DBGrps), undef,$Month)
 | 
			
		||||
      or &Bleat(2,sprintf("Can't delete old groups data for %s from %s: ".
 | 
			
		||||
                          "$DBI::errstr\n",$Month,$DBGrps));
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  print "----- GroupStats -----\n" if $Debug;
 | 
			
		||||
  foreach my $Newsgroup (sort keys %Postings) {
 | 
			
		||||
    print "$Newsgroup => $Postings{$Newsgroup}\n" if $Debug;
 | 
			
		||||
    if (!$Test) {
 | 
			
		||||
      # write to database
 | 
			
		||||
      $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
 | 
			
		||||
                                            "(month,newsgroup,postings) ".
 | 
			
		||||
                                            "VALUES (?, ?, ?)",$DBGrps));
 | 
			
		||||
      $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
 | 
			
		||||
        or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
 | 
			
		||||
                            "$DBI::errstr\n",$Month,$Newsgroup,$DBGrps));
 | 
			
		||||
      $DBQuery->finish;
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
### ----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
__END__
 | 
			
		||||
 | 
			
		||||
################################ Documentation #################################
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue