Redo directory structure.
* Move all scripts to /bin * Move configuration to /etc * Move NewsStats.pm to /lib * Add new path to NewsStats.pm to all scripts * Set $HomePath to top level directory * Move setting of config file name to ReadConf() Signed-off-by: Thomas Hochstein <thh@inter.net>
This commit is contained in:
parent
07c0b2589a
commit
2ad99c20bc
7 changed files with 33 additions and 24 deletions
268
bin/feedlog.pl
Executable file
268
bin/feedlog.pl
Executable file
|
|
@ -0,0 +1,268 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# feedlog.pl
|
||||
#
|
||||
# This script will log headers and other data to a database
|
||||
# for further analysis by parsing a feed from INN.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats;
|
||||
|
||||
use Sys::Syslog qw(:standard :macros);
|
||||
|
||||
use Date::Format;
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Subroutines ##################################
|
||||
|
||||
sub PrepareDB {
|
||||
### initialise database connection, prepare statement
|
||||
### and catch errors
|
||||
### IN : \%Conf : reference to configuration hash
|
||||
### OUT: $DBHandle: database handle
|
||||
### $DBQuery : prepared statement
|
||||
our ($DBHandle, $DBQuery, $OptQuiet);
|
||||
my ($ConfigR) = @_;
|
||||
my %Conf = %$ConfigR;
|
||||
# drop current database connection - hard, if necessary
|
||||
if ($DBHandle) {
|
||||
$DBHandle->disconnect;
|
||||
undef $DBHandle;
|
||||
};
|
||||
# connect to database; try again every 5 seconds
|
||||
while (!$DBHandle) {
|
||||
$DBHandle = InitDB($ConfigR,0);
|
||||
if (!$DBHandle) {
|
||||
syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
|
||||
sleep(5);
|
||||
} else {;
|
||||
syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet;
|
||||
}
|
||||
};
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
|
||||
timestamp,token,size,peer,path,
|
||||
newsgroups,headers)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?)",
|
||||
$Conf{'DBDatabase'},
|
||||
$Conf{'DBTableRaw'}));
|
||||
return ($DBHandle,$DBQuery);
|
||||
}
|
||||
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptDebug,$OptQuiet);
|
||||
GetOptions ('d|debug!' => \$OptDebug,
|
||||
'q|test!' => \$OptQuiet,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig('')};
|
||||
|
||||
### init syslog
|
||||
openlog($0, 'nofatal,pid', LOG_NEWS);
|
||||
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
|
||||
|
||||
### init database
|
||||
my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
|
||||
|
||||
### main loop
|
||||
while (<>) {
|
||||
chomp;
|
||||
# catch empty lines trailing or leading
|
||||
if ($_ eq '') {
|
||||
next;
|
||||
}
|
||||
# first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
|
||||
my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
|
||||
# remaining lines contain headers
|
||||
my $Headers = "";
|
||||
while (<>) {
|
||||
chomp;
|
||||
# empty line terminates this article
|
||||
if ($_ eq '') {
|
||||
last;
|
||||
}
|
||||
# collect headers
|
||||
$Headers .= $_."\n" ;
|
||||
}
|
||||
|
||||
# parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
|
||||
my $Day = time2str("%Y-%m-%d", $Timestamp);
|
||||
my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
|
||||
|
||||
# write to database
|
||||
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
|
||||
$Path, $Newsgroups, $Headers)) {
|
||||
syslog(LOG_ERR, 'Database error %s while processing %s: %s',
|
||||
$DBI::err, $Mid, $DBI::errstr);
|
||||
# if "MySQL server has gone away", try to recover
|
||||
if ($DBI::err == 2006) {
|
||||
# try to reconnect to database
|
||||
($DBHandle,$DBQuery) = PrepareDB(\%Conf);
|
||||
# try to repeat the write attempt as before
|
||||
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
|
||||
$Path, $Newsgroups, $Headers)) {
|
||||
syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
|
||||
};
|
||||
# otherwise log missing posting
|
||||
} else {
|
||||
syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
|
||||
};
|
||||
};
|
||||
$DBQuery->finish;
|
||||
|
||||
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
|
||||
"Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
|
||||
$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
|
||||
$Newsgroups, $Headers) if $OptDebug;
|
||||
}
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
|
||||
closelog();
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
feedlog - log data from an INN feed to a database
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<feedlog> [B<-Vhdq>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will log overview data and complete headers to a database
|
||||
table for further examination by parsing a feed from INN. It will
|
||||
parse that information and write it to a mysql database table in real
|
||||
time.
|
||||
|
||||
All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
|
||||
fails to initiate a database connection at startup, it will log to
|
||||
I<syslog> with I<CRIT> priority and go in an endless loop, as
|
||||
terminating would only result in a rapid respawn.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<feedlog> will read its configuration from F<newsstats.conf> which
|
||||
should be present in the same directory via Config::Auto.
|
||||
|
||||
See L<doc/INSTALL> for an overview of possible configuration options.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-d>, B<--debug>
|
||||
|
||||
Output debugging information to STDERR while parsing STDIN. You'll
|
||||
find that information most probably in your B<INN> F<errlog> file.
|
||||
|
||||
=item B<-q>, B<--quiet>
|
||||
|
||||
Suppress logging to syslog.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Set up a feed like that in your B<INN> F<newsfeeds> file:
|
||||
|
||||
## gather statistics for NewsStats
|
||||
newsstats!
|
||||
:!*,de.*
|
||||
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
|
||||
|
||||
See L<doc/INSTALL> for further information.
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/feedlog.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
418
bin/gatherstats.pl
Executable file
418
bin/gatherstats.pl
Executable file
|
|
@ -0,0 +1,418 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# gatherstats.pl
|
||||
#
|
||||
# This script will gather statistical information from a database
|
||||
# containing headers and other information from a INN feed.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Definitions ##################################
|
||||
|
||||
# define types of information that can be gathered
|
||||
# all / groups (/ clients / hosts)
|
||||
my %LegalStats;
|
||||
@LegalStats{('all','groups')} = ();
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
|
||||
$OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
|
||||
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
|
||||
'clientsdb=s' => \$OptClientsDB,
|
||||
'd|debug!' => \$OptDebug,
|
||||
'groupsdb=s' => \$OptGroupsDB,
|
||||
'hierarchy=s' => \$OptTLH,
|
||||
'hostsdb=s' => \$OptHostsDB,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'rawdb=s' => \$OptRawDB,
|
||||
's|stats=s' => \$OptStatsType,
|
||||
't|test!' => \$OptTest,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig('')};
|
||||
|
||||
### override configuration via commandline options
|
||||
my %ConfOverride;
|
||||
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
|
||||
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
|
||||
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
|
||||
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
|
||||
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
### get type of information to gather, defaulting to 'all'
|
||||
$OptStatsType = 'all' if !$OptStatsType;
|
||||
&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
|
||||
if !exists($LegalStats{$OptStatsType});
|
||||
|
||||
### get time period from --month
|
||||
# get verbal description of time period, drop SQL code
|
||||
my ($Period) = &GetTimePeriod($OptMonth);
|
||||
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
|
||||
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
|
||||
|
||||
### reformat $Conf{'TLH'}
|
||||
my $TLH;
|
||||
if ($Conf{'TLH'}) {
|
||||
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
||||
# make a flat list again, separated by :
|
||||
if (ref($Conf{'TLH'}) eq 'ARRAY') {
|
||||
$TLH = join(':',@{$Conf{'TLH'}});
|
||||
} else {
|
||||
$TLH = $Conf{'TLH'};
|
||||
}
|
||||
# strip whitespace
|
||||
$TLH =~ s/\s//g;
|
||||
# add trailing dots if none are present yet
|
||||
# (using negative look-behind assertions)
|
||||
$TLH =~ s/(?<!\.):/.:/g;
|
||||
$TLH =~ s/(?<!\.)$/./;
|
||||
# check for illegal characters
|
||||
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
||||
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
|
||||
# escape dots
|
||||
$TLH =~ s/\./\\./g;
|
||||
if ($TLH =~ /:/) {
|
||||
# reformat $TLH from a:b to (a)|(b),
|
||||
# e.g. replace ':' by ')|('
|
||||
$TLH =~ s/:/)|(/g;
|
||||
$TLH = '(' . $TLH . ')';
|
||||
};
|
||||
};
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get data for each month
|
||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
||||
foreach my $Month (&ListMonth($Period)) {
|
||||
|
||||
print "---------- $Month ----------\n" if $OptDebug;
|
||||
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash
|
||||
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
|
||||
if $OptCheckgroupsFile;
|
||||
|
||||
### ----------------------------------------------
|
||||
### get groups data (number of postings per group)
|
||||
# get groups data from raw table for given month
|
||||
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$Conf{'DBDatabase'},
|
||||
$Conf{'DBTableRaw'}));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
|
||||
|
||||
# count postings per group
|
||||
my %Postings;
|
||||
while (($_) = $DBQuery->fetchrow_array) {
|
||||
# get list of newsgroups and hierarchies from Newsgroups:
|
||||
my %Newsgroups = ListNewsgroups($_,$TLH,
|
||||
$OptCheckgroupsFile ? \%ValidGroups : '');
|
||||
# count each newsgroup and hierarchy once
|
||||
foreach (sort keys %Newsgroups) {
|
||||
$Postings{$_}++;
|
||||
};
|
||||
};
|
||||
|
||||
# add valid but empty groups if --checkgroups is set
|
||||
if (%ValidGroups) {
|
||||
foreach (sort keys %ValidGroups) {
|
||||
if (!defined($Postings{$_})) {
|
||||
# add current newsgroup as empty group
|
||||
$Postings{$_} = 0;
|
||||
warn (sprintf("ADDED: %s as empty group\n",$_));
|
||||
# add empty hierarchies for current newsgroup as needed
|
||||
foreach (ParseHierarchies($_)) {
|
||||
my $Hierarchy = $_ . '.ALL';
|
||||
if (!defined($Postings{$Hierarchy})) {
|
||||
$Postings{$Hierarchy} = 0;
|
||||
warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
|
||||
};
|
||||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$OptTest) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
|
||||
undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
|
||||
};
|
||||
|
||||
print "----- GroupStats -----\n" if $OptDebug;
|
||||
foreach my $Newsgroup (sort keys %Postings) {
|
||||
print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
|
||||
if (!$OptTest) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
|
||||
"(month,newsgroup,postings) ".
|
||||
"VALUES (?, ?, ?)",
|
||||
$Conf{'DBDatabase'},
|
||||
$Conf{'DBTableGrps'}));
|
||||
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,$Newsgroup,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
};
|
||||
} else {
|
||||
# other types of information go here - later on
|
||||
};
|
||||
};
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gatherstats - process statistical data from a raw source
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will extract and process statistical information from a
|
||||
database table which is fed from F<feedlog.pl> for a given time period
|
||||
and write its results to (an)other database table(s). Entries marked
|
||||
with I<'disregard'> in the database will be ignored; currently, you
|
||||
have to set this flag yourself, using your database management tools.
|
||||
You can exclude erroneous entries that way (e.g. automatic reposts
|
||||
(think of cancels flood and resurrectors); spam; ...).
|
||||
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another time period or a single month via the B<--month> option (see
|
||||
below).
|
||||
|
||||
By default B<gatherstats> will process all types of information; you
|
||||
can change that using the B<--stats> option and assigning the type of
|
||||
information to process. Currently that doesn't matter yet as only
|
||||
processing of the number of postings per group per month is
|
||||
implemented anyway.
|
||||
|
||||
Possible information types include:
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<groups> (postings per group per month)
|
||||
|
||||
B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
|
||||
counted for each single group they appear in. Groups not in I<TLH>
|
||||
will be ignored.
|
||||
|
||||
B<gatherstats> will also add up the number of postings for each
|
||||
hierarchy level, but only count each posting once. A posting to
|
||||
de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
|
||||
respectively. A crossposting to de.alt.test and de.alt.admin, on the
|
||||
other hand, will be counted for de.alt.test and de.alt.admin each, but
|
||||
only once for de.alt.ALL and de.ALL.
|
||||
|
||||
Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--groupsdb> option.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<gatherstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in the same directory via Config::Auto.
|
||||
|
||||
See L<doc/INSTALL> for an overview of possible configuration options.
|
||||
|
||||
You can override configuration options via the B<--hierarchy>,
|
||||
B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
|
||||
respectively.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-d>, B<--debug>
|
||||
|
||||
Output debugging information to STDOUT while processing (number of
|
||||
postings per group).
|
||||
|
||||
=item B<-t>, B<--test>
|
||||
|
||||
Do not write results to database. You should use B<--debug> in
|
||||
conjunction with B<--test> ... everything else seems a bit pointless.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon).
|
||||
|
||||
=item B<-s>, B<--stats> I<type>
|
||||
|
||||
Set processing type to one of I<all> and I<groups>. Defaults to all
|
||||
(and is currently rather pointless as only I<groups> has been
|
||||
implemented).
|
||||
|
||||
=item B<-c>, B<--checkgroups> I<filename template>
|
||||
|
||||
Check each group against a list of valid newsgroups read from a file,
|
||||
one group on each line and ignoring everything after the first
|
||||
whitespace (so you can use a file in checkgroups format or (part of)
|
||||
your INN active file).
|
||||
|
||||
The filename is taken from I<filename template>, amended by each
|
||||
B<--month> B<gatherstats> is processing in the form of I<template-YYYY-MM>,
|
||||
so that
|
||||
|
||||
gatherstats -m 2010-01:2010-12 -c checkgroups
|
||||
|
||||
will check against F<checkgroups-2010-01> for January 2010, against
|
||||
F<checkgroups-2010-02> for February 2010 and so on.
|
||||
|
||||
Newsgroups not found in the checkgroups file will be dropped (and
|
||||
logged to STDERR), and newsgroups found there but having no postings
|
||||
will be added with a count of 0 (and logged to STDERR).
|
||||
|
||||
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
|
||||
|
||||
Override I<TLH> from F<newsstats.conf>.
|
||||
|
||||
=item B<--rawdb> I<table> (raw data table)
|
||||
|
||||
Override I<DBTableRaw> from F<newsstats.conf>.
|
||||
|
||||
=item B<--groupsdb> I<table> (postings per group table)
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=item B<--clientsdb> I<table> (client data table)
|
||||
|
||||
Override I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--hostsdb> I<table> (host data table)
|
||||
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Process all types of information for lasth month:
|
||||
|
||||
gatherstats
|
||||
|
||||
Do a dry run, showing results of processing:
|
||||
|
||||
gatherstats --debug --test
|
||||
|
||||
Process all types of information for January of 2010:
|
||||
|
||||
gatherstats --month 2010-01
|
||||
|
||||
Process only number of postings for the year of 2010,
|
||||
checking against checkgroups-*:
|
||||
|
||||
gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/gatherstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
689
bin/groupstats.pl
Executable file
689
bin/groupstats.pl
Executable file
|
|
@ -0,0 +1,689 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# groupstats.pl
|
||||
#
|
||||
# This script will get statistical data on newgroup usage
|
||||
# from a database.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
|
||||
$OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
|
||||
$OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
|
||||
GetOptions ('b|boundary=s' => \$OptBoundType,
|
||||
'c|captions!' => \$OptCaptions,
|
||||
'checkgroups=s' => \$OptCheckgroupsFile,
|
||||
'comments!' => \$OptComments,
|
||||
'filetemplate=s' => \$OptFileTemplate,
|
||||
'f|format=s' => \$OptFormat,
|
||||
'g|group-by=s' => \$OptGroupBy,
|
||||
'groupsdb=s' => \$OptGroupsDB,
|
||||
'l|lower=i' => \$LowBound,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'n|newsgroups=s' => \$OptNewsgroups,
|
||||
'o|order-by=s' => \$OptOrderBy,
|
||||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
# parse parameters
|
||||
# $OptComments defaults to TRUE
|
||||
$OptComments = 1 if (!defined($OptComments));
|
||||
# force --nocomments when --filetemplate is used
|
||||
$OptComments = 0 if ($OptFileTemplate);
|
||||
# parse $OptBoundType
|
||||
if ($OptBoundType) {
|
||||
if ($OptBoundType =~ /level/i) {
|
||||
$OptBoundType = 'level';
|
||||
} elsif ($OptBoundType =~ /av(era)?ge?/i) {
|
||||
$OptBoundType = 'average';
|
||||
} elsif ($OptBoundType =~ /sums?/i) {
|
||||
$OptBoundType = 'sum';
|
||||
} else {
|
||||
$OptBoundType = 'default';
|
||||
}
|
||||
}
|
||||
# parse $OptReportType
|
||||
if ($OptReportType) {
|
||||
if ($OptReportType =~ /av(era)?ge?/i) {
|
||||
$OptReportType = 'average';
|
||||
} elsif ($OptReportType =~ /sums?/i) {
|
||||
$OptReportType = 'sum';
|
||||
} else {
|
||||
$OptReportType = 'default';
|
||||
}
|
||||
}
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash reference
|
||||
my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig('')};
|
||||
|
||||
### override configuration via commandline options
|
||||
my %ConfOverride;
|
||||
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get time period and newsgroups, prepare SQL 'WHERE' clause
|
||||
# get time period
|
||||
# and set caption for output and expression for SQL 'WHERE' clause
|
||||
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
||||
# bail out if --month is invalid
|
||||
&Bleat(2,"--month option has an invalid format - ".
|
||||
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
|
||||
# get list of newsgroups and set expression for SQL 'WHERE' clause
|
||||
# with placeholders as well as a list of newsgroup to bind to them
|
||||
my ($SQLWhereNewsgroups,@SQLBindNewsgroups);
|
||||
if ($OptNewsgroups) {
|
||||
($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups);
|
||||
# bail out if --newsgroups is invalid
|
||||
&Bleat(2,"--newsgroups option has an invalid format!")
|
||||
if !$SQLWhereNewsgroups;
|
||||
}
|
||||
|
||||
### build SQL WHERE clause (and HAVING clause, if needed)
|
||||
my ($SQLWhereClause,$SQLHavingClause);
|
||||
# $OptBoundType 'level'
|
||||
if ($OptBoundType and $OptBoundType ne 'default') {
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
|
||||
$SQLWhereNewsgroups,&SQLHierarchies($OptSums));
|
||||
$SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType,
|
||||
$LowBound,$UppBound));
|
||||
# $OptBoundType 'threshold' / 'default' or none
|
||||
} else {
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
|
||||
$SQLWhereNewsgroups,&SQLHierarchies($OptSums),
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
}
|
||||
|
||||
### get sort order and build SQL 'ORDER BY' clause
|
||||
# default to 'newsgroup' for $OptBoundType 'level' or 'average'
|
||||
$OptGroupBy = 'newsgroup' if (!$OptGroupBy and
|
||||
$OptBoundType and $OptBoundType ne 'default');
|
||||
# force to 'month' for $OptReportType 'average' or 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
|
||||
# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
||||
### get report type and build SQL 'SELECT' query
|
||||
my $SQLSelect;
|
||||
my $SQLGroupClause = '';
|
||||
my $Precision = 0; # number of digits right of decimal point for output
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$SQLGroupClause = 'GROUP BY newsgroup';
|
||||
# change $SQLOrderClause: replace everything before 'postings'
|
||||
$SQLOrderClause =~ s/BY.+postings/BY postings/;
|
||||
if ($OptReportType eq 'average') {
|
||||
$SQLSelect = "'All months',newsgroup,AVG(postings)";
|
||||
$Precision = 2;
|
||||
# change $SQLOrderClause: replace 'postings' with 'AVG(postings)'
|
||||
$SQLOrderClause =~ s/postings/AVG(postings)/;
|
||||
} elsif ($OptReportType eq 'sum') {
|
||||
$SQLSelect = "'All months',newsgroup,SUM(postings)";
|
||||
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
|
||||
$SQLOrderClause =~ s/postings/SUM(postings)/;
|
||||
}
|
||||
} else {
|
||||
$SQLSelect = 'month,newsgroup,postings';
|
||||
};
|
||||
|
||||
### get length of longest newsgroup name delivered by query
|
||||
### for formatting purposes
|
||||
my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},
|
||||
$Field,'postings',$SQLWhereClause,
|
||||
$SQLHavingClause,
|
||||
@SQLBindNewsgroups);
|
||||
|
||||
### build and execute SQL query
|
||||
my ($DBQuery);
|
||||
# special query preparation for $OptBoundType 'level', 'average' or 'sums'
|
||||
if ($OptBoundType and $OptBoundType ne 'default') {
|
||||
# prepare and execute first query:
|
||||
# get list of newsgroups meeting level conditions
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '.
|
||||
'GROUP BY newsgroup %s',
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$SQLWhereClause,$SQLHavingClause));
|
||||
$DBQuery->execute(@SQLBindNewsgroups)
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$DBI::errstr));
|
||||
# add newsgroups to a comma-seperated list ready for IN(...) query
|
||||
my $GroupList;
|
||||
while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
|
||||
$GroupList .= ',' if $GroupList;
|
||||
$GroupList .= "'$Newsgroup'";
|
||||
};
|
||||
# enhance $WhereClause
|
||||
if ($GroupList) {
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,
|
||||
sprintf('newsgroup IN (%s)',$GroupList));
|
||||
} else {
|
||||
# condition cannot be satisfied;
|
||||
# force query to fail by adding '0=1'
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1');
|
||||
}
|
||||
}
|
||||
|
||||
# prepare query
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
|
||||
$SQLSelect,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$SQLWhereClause,$SQLGroupClause,
|
||||
$SQLOrderClause));
|
||||
|
||||
# execute query
|
||||
$DBQuery->execute(@SQLBindNewsgroups)
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$DBI::errstr));
|
||||
|
||||
### output results
|
||||
# set default to 'pretty'
|
||||
$OptFormat = 'pretty' if !$OptFormat;
|
||||
# print captions if --caption is set
|
||||
if ($OptCaptions && $OptComments) {
|
||||
# print time period with report type
|
||||
my $CaptionReportType= '(number of postings for each month)';
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$CaptionReportType= '(average number of postings for each month)'
|
||||
if $OptReportType eq 'average';
|
||||
$CaptionReportType= '(number of all postings for that time period)'
|
||||
if $OptReportType eq 'sum';
|
||||
}
|
||||
printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
# print newsgroup list if --newsgroups is set
|
||||
printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
|
||||
if $OptNewsgroups;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only month fulfilling this condition)';
|
||||
if ($OptBoundType and $OptBoundType ne 'default') {
|
||||
$CaptionBoundary= '(every single month)' if $OptBoundType eq 'level';
|
||||
$CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
|
||||
$CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
|
||||
}
|
||||
printf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
if ($LowBound or $UppBound);
|
||||
# print primary and secondary sort order
|
||||
printf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
|
||||
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
|
||||
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
|
||||
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
|
||||
}
|
||||
|
||||
# output data
|
||||
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
|
||||
$OptCheckgroupsFile ? $ValidGroups : '',
|
||||
$OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
groupstats - create reports on newsgroup usage
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings per
|
||||
group per month) taken from result tables created by
|
||||
B<gatherstats.pl>.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
=head3 Time period and newsgroups
|
||||
|
||||
The time period to act on defaults to last month; you can assign another
|
||||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
|
||||
B<groupstats> will process all newsgroups by default; you can limit
|
||||
processing to only some newsgroups by supplying a list of those groups via
|
||||
B<--newsgroups> option (see below). You can include hierarchy levels in
|
||||
the output by adding the B<--sums> switch (see below). Optionally
|
||||
newsgroups not present in a checkgroups file can be excluded from output,
|
||||
sse B<--checkgroups> below.
|
||||
|
||||
=head3 Report type
|
||||
|
||||
You can choose between different B<--report> types: postings per month,
|
||||
average postings per month or all postings summed up; for details, see
|
||||
below.
|
||||
|
||||
=head3 Upper and lower boundaries
|
||||
|
||||
Furthermore you can set an upper and/or lower boundary to exclude some
|
||||
results from output via the B<--lower> and B<--upper> options,
|
||||
respectively. By default, all newsgroups with more and/or less postings
|
||||
per month will be excluded from the result set (i.e. not shown and not
|
||||
considered for average and sum reports). You can change the meaning of
|
||||
those boundaries with the B<--boundary> option. For details, please see
|
||||
below.
|
||||
|
||||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
newsgroup instead via the B<--groupy-by> option. Within those groups, the
|
||||
list of newsgroups (or months) is sorted alphabetically (or
|
||||
chronologically, respectively) ascending. You can change that order (and
|
||||
sort by number of postings) with the B<--order-by> option. For details and
|
||||
exceptions, please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of newsgroups and number of
|
||||
postings with the B<--format> option. Captions will be added by means of
|
||||
the B<--caption> option; all comments (and captions) can be supressed by
|
||||
using B<--nocomments>.
|
||||
|
||||
Last but not least you can redirect all output to a number of files, e.g.
|
||||
one for each month, by submitting the B<--filetemplate> option, see below.
|
||||
Captions and comments are automatically disabled in this case.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<groupstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in the same directory via Config::Auto.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override some configuration options via the B<--groupsdb> option.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). By using the keyword I<all> instead, you can set no
|
||||
processing period to process the whole database.
|
||||
|
||||
=item B<-n>, B<--newsgroups> I<newsgroup(s)>
|
||||
|
||||
Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
|
||||
be a single newsgroup name (de.alt.test), a newsgroup hierarchy
|
||||
(de.alt.*) or a list of either of these, separated by colons, for
|
||||
example
|
||||
|
||||
de.test:de.alt.test:de.newusers.*
|
||||
|
||||
=item B<-s>, B<--sums|--nosums> (sum per hierarchy level)
|
||||
|
||||
Include "virtual" groups for every hierarchy level in output, for
|
||||
example:
|
||||
|
||||
de.alt.ALL 10
|
||||
de.alt.test 5
|
||||
de.alt.admin 7
|
||||
|
||||
See the B<gatherstats> man page for details.
|
||||
|
||||
=item B<--checkgroups> I<filename>
|
||||
|
||||
Restrict output to those newgroups present in a file in checkgroups format
|
||||
(one newgroup name per line; everything after the first whitespace on each
|
||||
line is ignored). All other newsgroups will be removed from output.
|
||||
|
||||
Contrary to B<gatherstats>, I<filename> is not a template, but refers to
|
||||
a single file in checkgroups format.
|
||||
|
||||
=item B<-r>, B<--report> I<default|average|sums>
|
||||
|
||||
Choose the report type: I<default>, I<average> or I<sums>
|
||||
|
||||
By default, B<groupstats> will report the number of postings for each
|
||||
newsgroup in each month. But it can also report the average number of
|
||||
postings per group for all months or the total sum of postings per group
|
||||
for all months.
|
||||
|
||||
For report types I<average> and I<sums>, the B<group-by> option has no
|
||||
meaning and will be silently ignored (see below).
|
||||
|
||||
=item B<-l>, B<--lower> I<lower boundary>
|
||||
|
||||
Set the lower boundary. See B<--boundary> below.
|
||||
|
||||
=item B<-l>, B<--upper> I<upper boundary>
|
||||
|
||||
Set the upper boundary. See B<--boundary> below.
|
||||
|
||||
=item B<-b>, B<--boundary> I<boundary type>
|
||||
|
||||
Set the boundary type to one of I<default>, I<level>, I<average> or
|
||||
I<sums>.
|
||||
|
||||
By default, all newsgroups with more postings per month than the upper
|
||||
boundary and/or less postings per month than the lower boundary will be
|
||||
excluded from further processing. For the default report that means each
|
||||
month only newsgroups with a number of postings between the boundaries
|
||||
will be displayed. For the other report types, newsgroups with a number of
|
||||
postings exceeding the boundaries in all (!) months will not be
|
||||
considered.
|
||||
|
||||
For example, lets take a list of newsgroups like this:
|
||||
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.misc 6
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.misc 8
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
----- 2012-03:
|
||||
de.comp.datenbanken.misc 24
|
||||
de.comp.datenbanken.ms-access 83
|
||||
de.comp.datenbanken.mysql 36
|
||||
|
||||
With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>,
|
||||
you'll get the following result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
de.comp.datenbanken.mysql 124
|
||||
|
||||
de.comp.datenbanken.misc has not been considered even though it has 38
|
||||
postings in total, because it has less than 25 postings in every single
|
||||
month. If you want to list all newsgroups with more than 25 postings
|
||||
I<in total>, you'll have to set the boundary type to I<sum>, see below.
|
||||
|
||||
A boundary type of I<level> will show only those newsgroups - at all -
|
||||
that satisfy the boundaries in each and every single month. With the above
|
||||
list of newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
|
||||
you'll get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
|
||||
de.comp.datenbanken.mysql has not been considered because it had less than
|
||||
25 postings in 2012-02 (only).
|
||||
|
||||
You can use that to get a list of newsgroups that have more (or less) then
|
||||
x postings in every month during the whole reporting period.
|
||||
|
||||
A boundary type of I<average> will show only those newsgroups - at all -that
|
||||
satisfy the boundaries on average. With the above list of newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
|
||||
you'll get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
de.comp.datenbanken.mysql 145
|
||||
|
||||
The average number of postings in the three groups is:
|
||||
|
||||
de.comp.datenbanken.misc 12.67
|
||||
de.comp.datenbanken.ms-access 97.67
|
||||
de.comp.datenbanken.mysql 48.33
|
||||
|
||||
Last but not least, a boundary type of I<sums> will show only those
|
||||
newsgroups - at all - that satisfy the boundaries with the total sum of
|
||||
all postings during the reporting period. With the above list of
|
||||
newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
|
||||
you'll finally get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.misc 38
|
||||
de.comp.datenbanken.ms-access 293
|
||||
de.comp.datenbanken.mysql 145
|
||||
|
||||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
|
||||
|
||||
By default, all results are grouped by month, sorted chronologically in
|
||||
ascending order, like this:
|
||||
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
|
||||
The results can be grouped by newsgroups instead via
|
||||
B<--group-by> I<newsgroup>:
|
||||
|
||||
----- de.comp.datenbanken.ms-access:
|
||||
2012-01 84
|
||||
2012-02 126
|
||||
----- de.comp.datenbanken.mysql:
|
||||
2012-01 88
|
||||
2012-02 21
|
||||
|
||||
By appending I<-desc> to the group-by option parameter, you can reverse
|
||||
the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
|
||||
Average and sums reports (see above) will always be grouped by months;
|
||||
this option will therefore be ignored.
|
||||
|
||||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single newsgroup, see above), the
|
||||
report will be sorted by newsgroup names in ascending alphabetical order
|
||||
by default. You can change the sort order to descending or sort by number
|
||||
of postings instead.
|
||||
|
||||
=item B<-f>, B<--format> I<pretty|list|dump>
|
||||
|
||||
Select the output format, I<pretty> being the default:
|
||||
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
|
||||
I<list> format looks like this:
|
||||
|
||||
2012-01 de.comp.datenbanken.ms-access 84
|
||||
2012-01 de.comp.datenbanken.mysql 88
|
||||
2012-02 de.comp.datenbanken.ms-access 126
|
||||
2012-02 de.comp.datenbanken.mysql 21
|
||||
|
||||
And I<dump> format looks like this:
|
||||
|
||||
# 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
# 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
|
||||
You can remove the comments by using B<--nocomments>, see below.
|
||||
|
||||
=item B<-c>, B<--captions|--nocaptions>
|
||||
|
||||
Add captions to output, like this:
|
||||
|
||||
----- Report for 2012-01 to 2012-02 (number of postings for each month)
|
||||
----- Newsgroups: de.comp.datenbanken.*
|
||||
----- Threshold: 10 => x <= 20 (on average)
|
||||
----- Grouped by Newsgroups (ascending), sorted by number of postings descending
|
||||
|
||||
False by default.
|
||||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by default.
|
||||
|
||||
Use I<--nocomments> to suppress anything except newsgroup names/months and
|
||||
numbers of postings. This is enforced when using B<--filetemplate>, see below.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
|
||||
create one file for each month (or each newsgroup, accordant to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or newsgroup names) to the I<filename template>, for
|
||||
example with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
... and so on
|
||||
|
||||
B<--nocomments> is enforced, see above.
|
||||
|
||||
=item B<--groupsdb> I<database table>
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Show number of postings per group for lasth month in I<pretty> format:
|
||||
|
||||
groupstats
|
||||
|
||||
Show that report for January of 2010 and de.alt.* plus de.test,
|
||||
including display of hierarchy levels:
|
||||
|
||||
groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums
|
||||
|
||||
Only show newsgroups with 30 postings or less last month, ordered
|
||||
by number of postings, descending, in I<pretty> format:
|
||||
|
||||
groupstats --upper 30 --order-by postings-desc
|
||||
|
||||
Show the total of all postings for the year of 2010 for all groups that
|
||||
had 30 postings or less in every single month in that year, ordered by
|
||||
number of postings in descending order:
|
||||
|
||||
groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
|
||||
|
||||
The same for the average number of postings in the year of 2010:
|
||||
|
||||
groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc
|
||||
|
||||
List number of postings per group for eacht month of 2010 and redirect
|
||||
output to one file for each month, namend stats-2010-01 and so on, in
|
||||
machine-readable form (without formatting):
|
||||
|
||||
groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
|
||||
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/groupstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
l>doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
gatherstats -h
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
Loading…
Add table
Add a link
Reference in a new issue