diff --git a/NewsStats.pm b/NewsStats.pm
index a04ac0f..bdfdcf5 100644
--- a/NewsStats.pm
+++ b/NewsStats.pm
@@ -343,13 +343,13 @@ sub SplitPeriod {
sub ListMonth {
################################################################################
### return a list of months (YYYY-MM) between start and end month
-### IN : $StartMonth, $EndMonth
-### OUT: @Months: array containing all months from $StartMonth to $EndMonth
- my ($StartMonth, $EndMonth) = @_;
- return (undef,undef)
- if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/);
- # return if $StartMonth = $EndMonth
- return ($StartMonth) if ($StartMonth eq $EndMonth);
+### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
+### OUT: @Months: array containing all months from $MonthExpression enumerated
+ my ($MonthExpression )= @_;
+ # return if single month
+ return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
+ # parse $MonthExpression
+ my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
# set $Year, $Month from $StartMonth
my ($Year, $Month) = split /-/, $StartMonth;
# define @Months
diff --git a/doc/TODO b/doc/TODO
index ce6f795..a30cdcd 100644
--- a/doc/TODO
+++ b/doc/TODO
@@ -42,6 +42,7 @@ Bug numbers refer to the Mantis issue tracker at .
names - would be nice.
+ install/install.pl
+ - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm)
- Add / enhance / test error handling
- General tests and optimisations
@@ -54,7 +55,6 @@ Bug numbers refer to the Mantis issue tracker at .
- General tests and optimisations
+ gatherstats.pl
- - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm)
- Use hierarchy information (see GroupInfo above)
- Add gathering of other stats (clients, hosts, ...)
- better modularisation (code reuse for other reports!)
diff --git a/gatherstats.pl b/gatherstats.pl
index e9ae0f8..b570cd8 100755
--- a/gatherstats.pl
+++ b/gatherstats.pl
@@ -7,7 +7,7 @@
#
# It is part of the NewsStats package.
#
-# Copyright (c) 2010 Thomas Hochstein
+# Copyright (c) 2010-2012 Thomas Hochstein
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
@@ -22,37 +22,56 @@ use strict;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList);
use DBI;
+use Getopt::Long qw(GetOptions);
+Getopt::Long::config ('bundling');
################################# Definitions ##################################
# define types of information that can be gathered
# all / groups (/ clients / hosts)
-my %LegalTypes;
-@LegalTypes{('all','groups')} = ();
+my %LegalStats;
+@LegalStats{('all','groups')} = ();
################################# Main program #################################
### read commandline options
-my %Options = &ReadOptions('dom:p:t:l:n:r:g:c:s:');
+my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
+ $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
+GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
+ 'clientsdb=s' => \$OptClientsDB,
+ 'd|debug!' => \$OptDebug,
+ 'groupsdb=s' => \$OptGroupsDB,
+ 'hierarchy=s' => \$OptTLH,
+ 'hostsdb=s' => \$OptHostsDB,
+ 'm|month=s' => \$OptMonth,
+ 'rawdb=s' => \$OptRawDB,
+ 's|stats=s' => \$OptStatsType,
+ 't|test!' => \$OptTest,
+ 'h|help' => \&ShowPOD,
+ 'V|version' => \&ShowVersion) or exit 1;
### read configuration
-my %Conf = %{ReadConfig('newsstats.conf')};
+my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### override configuration via commandline options
my %ConfOverride;
-$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};
-$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
-$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};
-$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};
-$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};
+$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
+$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
+$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
+$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
+$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride);
### get type of information to gather, defaulting to 'all'
-$Options{'t'} = 'all' if !$Options{'t'};
-die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});
+$OptStatsType = 'all' if !$OptStatsType;
+&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
+ if !exists($LegalStats{$OptStatsType});
-### get time period (-m or -p)
-my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
+### get time period from --month
+# get verbal description of time period, drop SQL code
+my ($Period) = &GetTimePeriod($OptMonth);
+&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
+ "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
### reformat $Conf{'TLH'}
my $TLH;
@@ -67,45 +86,55 @@ if ($Conf{'TLH'}) {
# strip whitespace
$TLH =~ s/\s//g;
# check for illegal characters
- die "$MySelf: E: Config error - illegal characters in TLH definition\n" if ($TLH !~ /^[a-zA-Z0-9:]+$/);
+ &Bleat(2,'Config error - illegal characters in TLH definition!')
+ if ($TLH !~ /^[a-zA-Z0-9:]+$/);
if ($TLH =~ /:/) {
- # reformat $TLH form a:b to (a)|(b)
+ # reformat $TLH from a:b to (a)|(b),
+ # e.g. replace '.' by '|'
$TLH =~ s/:/)|(/g;
$TLH = '(' . $TLH . ')';
};
};
-### read newsgroups list from -l
-my %ValidGroups = %{&ReadGroupList($Options{'l'})} if $Options{'l'};
+# read list of newsgroups from --checkgroups
+# into a hash
+my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile;
### init database
my $DBHandle = InitDB(\%Conf,1);
### get data for each month
-warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};
-foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
+&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
+foreach my $Month (&ListMonth($Period)) {
- print "---------- $Month ----------\n" if $Options{'d'};
+ print "---------- $Month ----------\n" if $OptDebug;
- if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {
+ if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
- my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
- $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
+ my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
+ "WHERE day LIKE ? AND NOT disregard",
+ $Conf{'DBDatabase'},
+ $Conf{'DBTableRaw'}));
+ $DBQuery->execute($Month.'-%')
+ or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
+ "$DBI::errstr\n",$Month,
+ $Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
# count postings per group
my %Postings;
while (($_) = $DBQuery->fetchrow_array) {
# get list oft newsgroups and hierarchies from Newsgroups:
- my %Newsgroups = ListNewsgroups($_,$TLH,$Options{'l'} ? \%ValidGroups : '');
+ my %Newsgroups = ListNewsgroups($_,$TLH,
+ $OptCheckgroupsFile ? \%ValidGroups : '');
# count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) {
$Postings{$_}++;
};
};
- # add valid but empty groups if -l is set
+ # add valid but empty groups if --checkgroups is set
if (%ValidGroups) {
foreach (sort keys %ValidGroups) {
if (!defined($Postings{$_})) {
@@ -116,19 +145,29 @@ foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
};
# delete old data for that month
- if (!$Options{'o'}) {
- $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),undef,$Month)
- or warn sprintf("$MySelf: E: Can't delete old groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
+ if (!$OptTest) {
+ $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
+ undef,$Month)
+ or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
+ "$DBI::errstr\n",$Month,
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
};
- print "----- GroupStats -----\n" if $Options{'d'};
+ print "----- GroupStats -----\n" if $OptDebug;
foreach my $Newsgroup (sort keys %Postings) {
- print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};
- if (!$Options{'o'}) {
+ print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
+ if (!$OptTest) {
# write to database
- $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
- # $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
- $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
+ $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
+ "(month,newsgroup,postings) ".
+ "VALUES (?, ?, ?)",
+ $Conf{'DBDatabase'},
+ $Conf{'DBTableGrps'}));
+ $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
+ or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
+ "$DBI::errstr\n",$Month,$Newsgroup,
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
$DBQuery->finish;
};
};
@@ -150,43 +189,31 @@ gatherstats - process statistical data from a raw source
=head1 SYNOPSIS
-B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-l> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I]
+B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I]
=head1 REQUIREMENTS
-See doc/README: Perl 5.8.x itself and the following modules from CPAN:
-
-=over 2
-
-=item -
-
-Config::Auto
-
-=item -
-
-DBI
-
-=back
+See L.
=head1 DESCRIPTION
This script will extract and process statistical information from a
database table which is fed from F for a given time period
and write its results to (an)other database table(s). Entries marked
-with I<'disregard'> in the database will be ignored; currently, you have
-to set this flag yourself, using your database management tools. You
-can exclude erroneous entries that way (e.g. automatic reposts (think
-of cancels flood and resurrectors); spam; ...).
+with I<'disregard'> in the database will be ignored; currently, you
+have to set this flag yourself, using your database management tools.
+You can exclude erroneous entries that way (e.g. automatic reposts
+(think of cancels flood and resurrectors); spam; ...).
The time period to act on defaults to last month; you can assign
-another month via the B<-m> switch or a time period via the B<-p>
-switch; the latter takes preference.
+another time period or a single month via the B<--month> option (see
+below).
By default B will process all types of information; you
-can change that using the B<-t> switch and assigning the type of
-information to process. Currently only processing of the number of
-postings per group per month is implemented anyway, so that doesn't
-matter yet.
+can change that using the B<--stats> option and assigning the type of
+information to process. Currently that doesn't matter yet as only
+processing of the number of postings per group per month is
+implemented anyway.
Possible information types include:
@@ -205,59 +232,58 @@ respectively. A crossposting to de.alt.test and de.alt.admin, on the
other hand, will be counted for de.alt.test and de.alt.admin each, but
only once for de.alt.ALL and de.ALL.
-Data is written to I (see doc/INSTALL).
+Data is written to I (see L); you can
+override that default through the B<--groupsdb> option.
=back
=head2 Configuration
-F will read its configuration from F
+B will read its configuration from F
which should be present in the same directory via Config::Auto.
-See doc/INSTALL for an overview of possible configuration options.
+See L for an overview of possible configuration options.
-You can override configuration options via the B<-n>, B<-r>, B<-g>,
-B<-c> and B<-s> switches, respectively.
+You can override configuration options via the B<--hierarchy>,
+B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
+respectively.
=head1 OPTIONS
=over 3
-=item B<-V> (version)
+=item B<-V>, B<--version>
-Print out version and copyright information on B and exit.
+Print out version and copyright information and exit.
-=item B<-h> (help)
+=item B<-h>, B<--help>
Print this man page and exit.
-=item B<-d> (debug)
+=item B<-d>, B<--debug>
Output debugging information to STDOUT while processing (number of
postings per group).
-=item B<-o> (output only)
+=item B<-t>, B<--test>
-Do not write results to database. You should use B<-d> in conjunction
-with B<-o> ... everything else seems a bit pointless.
+Do not write results to database. You should use B<--debug> in
+conjunction with B<--test> ... everything else seems a bit pointless.
-=item B<-m> I (month)
+=item B<-m>, B<--month> I
-Set processing period to a month in YYYY-MM format. Ignored if B<-p>
-is set.
+Set processing period to a single month in YYYY-MM format or to a time
+period between two month in YYYY-MM:YYYY-MM format (two month, separated
+by a colon).
-=item B<-p> I (period)
-Set processing period to a time period between two month, each in
-YYYY-MM format, separated by a colon. Overrides B<-m>.
-
-=item B<-t> I (type)
+=item B<-s>, B<--stats> I
Set processing type to one of I and I. Defaults to all
(and is currently rather pointless as only I has been
implemented).
-=item B<-l> I (check against list)
+=item B<-c>, B<--checkgroups> I
Check each group against a list of valid newsgroups read from
I, one group on each line and ignoring everything after the
@@ -268,23 +294,23 @@ Newsgroups not found in I will be dropped (and logged to
STDERR), and newsgroups found in I but having no postings
will be added with a count of 0 (and logged to STDERR).
-=item B<-n> I (newsgroup hierarchy)
+=item B<--hierarchy> I (newsgroup hierarchy)
Override I from F.
-=item B<-r> I (raw data table)
+=item B<--rawdb> I (raw data table)
Override I from F.
-=item B<-g> I (postings per group table)
+=item B<--groupsdb> I (postings per group table)
Override I from F.
-=item B<-c> I (client data table)
+=item B<--clientsdb> I (client data table)
Override I from F.
-=item B<-s> I (server/host data table)
+=item B<--hostsdb> I (host data table)
Override I from F.
@@ -292,7 +318,7 @@ Override I from F.
=head1 INSTALLATION
-See doc/INSTALL.
+See L.
=head1 EXAMPLES
@@ -302,16 +328,16 @@ Process all types of information for lasth month:
Do a dry run, showing results of processing:
- gatherstats -do
+ gatherstats --debug --test
Process all types of information for January of 2010:
- gatherstats -m 2010-01
+ gatherstats --month 2010-01
Process only number of postings for the year of 2010,
checking against checkgroups-2010.txt:
- gatherstats -p 2010-01:2010-12 -t groups -l checkgroups-2010.txt
+ gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt
=head1 FILES
@@ -327,7 +353,7 @@ Library functions for the NewsStats package.
=item F
-Runtime configuration file for B.
+Runtime configuration file.
=back
@@ -342,11 +368,11 @@ bug tracker at L!
=item -
-doc/README
+L
=item -
-doc/INSTALL
+L
=back
@@ -358,7 +384,7 @@ Thomas Hochstein
=head1 COPYRIGHT AND LICENSE
-Copyright (c) 2010 Thomas Hochstein
+Copyright (c) 2010-2012 Thomas Hochstein
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.