Add comments and POD.
Add comments NewsStats.pm Add POD to install.pl Add POD to feedlog.pl Add POD and comments to gatherstats.pl Add POD and comments to groupstats.pl, beautify code Signed-off-by: Thomas Hochstein <thh@inter.net>
This commit is contained in:
parent
610b5ef492
commit
3430c89868
48
NewsStats.pm
48
NewsStats.pm
|
@ -58,8 +58,8 @@ our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)";
|
|||
################################################################################
|
||||
sub ReadOptions {
|
||||
################################################################################
|
||||
### read commandline options and act on standard options
|
||||
### IN : $Params: containing list of commandline paramaters (without -h and -V)
|
||||
### read commandline options and act on standard options -h and -V
|
||||
### IN : $Params: list of legal commandline paramaters (without -h and -V)
|
||||
### OUT: a hash containing the commandline options
|
||||
$Getopt::Std::STANDARD_HELP_VERSION = 1;
|
||||
|
||||
|
@ -116,7 +116,9 @@ sub OverrideConfig {
|
|||
### $OverrideR: reference to a hash containing overrides
|
||||
my ($ConfigR,$OverrideR) = @_;
|
||||
my %Override = %$OverrideR;
|
||||
# Config hash empty?
|
||||
warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1);
|
||||
# return if no overrides
|
||||
return if (keys %Override < 1 or keys %$ConfigR < 1);
|
||||
foreach my $Key (keys %Override) {
|
||||
$$ConfigR{$Key} = $Override{$Key};
|
||||
|
@ -129,7 +131,7 @@ sub InitDB {
|
|||
################################################################################
|
||||
### initialise database connection
|
||||
### IN : $ConfigR: reference to configuration hash
|
||||
### $Die : if TRUE, die if connection failed
|
||||
### $Die : if TRUE, die if connection fails
|
||||
### OUT: DBHandle
|
||||
my ($ConfigR,$Die) = @_;
|
||||
my %Conf = %$ConfigR;
|
||||
|
@ -147,8 +149,10 @@ sub InitDB {
|
|||
################################################################################
|
||||
sub ListNewsgroups {
|
||||
################################################################################
|
||||
### count each newsgroup and each hierarchy level, but only once
|
||||
### IN : $Newsgroups: a list of newsgroups (content of Newsgroups:)
|
||||
### explode a (scalar) list of newsgroup names to a list of newsgroup and
|
||||
### hierarchy names where every newsgroup and hierarchy appears only once:
|
||||
### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
|
||||
### IN : $Newsgroups: a list of newsgroups (content of Newsgroups: header)
|
||||
### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys
|
||||
my ($Newsgroups) = @_;
|
||||
my %Newsgroups;
|
||||
|
@ -171,7 +175,8 @@ sub ListNewsgroups {
|
|||
################################################################################
|
||||
sub ParseHierarchies {
|
||||
################################################################################
|
||||
### get all hierarchies a newsgroup belongs to
|
||||
### return a list of all hierarchy levels a newsgroup belongs to
|
||||
### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
|
||||
### IN : $Newsgroup : a newsgroup name
|
||||
### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
|
||||
my ($Newsgroup) = @_;
|
||||
|
@ -194,9 +199,11 @@ sub ParseHierarchies {
|
|||
################################################################################
|
||||
sub GetTimePeriod {
|
||||
################################################################################
|
||||
### get time period using -m / -p
|
||||
### get a time period to act on, in order of preference: by default the
|
||||
### last month; or a month submitted by -m YYYY-MM; or a time period submitted
|
||||
### by -p YYYY-MM:YYYY-MM
|
||||
### IN : $Month,$Period: contents of -m and -p
|
||||
### OUT: $StartMonth, $EndMonth
|
||||
### OUT: $StartMonth, $EndMonth (identical if period is just one month)
|
||||
my ($Month,$Period) = @_;
|
||||
# exit if -m is set and not like YYYY-MM
|
||||
die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month);
|
||||
|
@ -220,7 +227,7 @@ sub GetTimePeriod {
|
|||
################################################################################
|
||||
sub LastMonth {
|
||||
################################################################################
|
||||
### get last month from today in YYYY-MM format
|
||||
### get last month from todays date in YYYY-MM format
|
||||
### OUT: last month as YYYY-MM
|
||||
# get today's date
|
||||
my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
|
||||
|
@ -237,7 +244,7 @@ sub LastMonth {
|
|||
################################################################################
|
||||
sub CheckMonth {
|
||||
################################################################################
|
||||
### check for valid month
|
||||
### check if input is a valid month in YYYY-MM form
|
||||
### IN : $Month: month
|
||||
### OUT: TRUE / FALSE
|
||||
my ($Month) = @_;
|
||||
|
@ -248,7 +255,7 @@ sub CheckMonth {
|
|||
################################################################################
|
||||
sub SplitPeriod {
|
||||
################################################################################
|
||||
### split a time period YYYY-MM:YYYY-MM into start and end month
|
||||
### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
|
||||
### IN : $Period: time period
|
||||
### OUT: $StartMonth, Â$EndMonth
|
||||
my ($Period) = @_;
|
||||
|
@ -265,7 +272,7 @@ sub SplitPeriod {
|
|||
################################################################################
|
||||
sub ListMonth {
|
||||
################################################################################
|
||||
### return a list of month (YYYY-MM) between start and end month
|
||||
### return a list of months (YYYY-MM) between start and end month
|
||||
### IN : $StartMonth, $EndMonth
|
||||
### OUT: @Months: array containing all months from $StartMonth to $EndMonth
|
||||
my ($StartMonth, $EndMonth) = @_;
|
||||
|
@ -293,12 +300,11 @@ sub ListMonth {
|
|||
################################################################################
|
||||
sub OutputData {
|
||||
################################################################################
|
||||
### output information with formatting from DBHandle
|
||||
### read database query results from DBHandle and print results with formatting
|
||||
### IN : $Format : format specifier
|
||||
### $DBQuery: database query handle with executed query,
|
||||
### containing $Month, $Key, $Value
|
||||
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
|
||||
### OUT: $Output: formatted output
|
||||
my ($Format, $DBQuery,$PadGroup) = @_;
|
||||
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
|
||||
print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
|
||||
|
@ -308,12 +314,12 @@ sub OutputData {
|
|||
################################################################################
|
||||
sub FormatOutput {
|
||||
################################################################################
|
||||
### format information for output
|
||||
### format information for output according to format specifier
|
||||
### IN : $Format : format specifier
|
||||
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
|
||||
### $Month : month (as YYYY-MM)
|
||||
### $Key : newsgroup, client, ...
|
||||
### $Value : number of postings with that attribute
|
||||
### $PadGroup: padding length for key field (optional) for 'pretty'
|
||||
### OUT: $Output: formatted output
|
||||
my ($Format, $Month, $Key, $Value, $PadGroup) = @_;
|
||||
|
||||
|
@ -324,6 +330,7 @@ sub FormatOutput {
|
|||
die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format});
|
||||
|
||||
my ($Output);
|
||||
# keep last month in mind
|
||||
our ($LastIteration);
|
||||
if ($Format eq 'dump') {
|
||||
# output as dump (ng nnnnn)
|
||||
|
@ -348,7 +355,9 @@ sub FormatOutput {
|
|||
################################################################################
|
||||
sub SQLHierarchies {
|
||||
################################################################################
|
||||
### amend WHERE clause to include hierarchies
|
||||
### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
|
||||
### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
|
||||
### true, accordingly)
|
||||
### IN : $ShowHierarchies: boolean value
|
||||
### OUT: SQL code
|
||||
my ($ShowHierarchies) = @_;
|
||||
|
@ -358,7 +367,7 @@ sub SQLHierarchies {
|
|||
################################################################################
|
||||
sub GetMaxLenght {
|
||||
################################################################################
|
||||
### get length of longest field in query
|
||||
### get length of longest field in future query result
|
||||
### IN : $DBHandle : database handel
|
||||
### $Table : table to query
|
||||
### $Field : field to check
|
||||
|
@ -375,7 +384,8 @@ sub GetMaxLenght {
|
|||
################################################################################
|
||||
sub SQLGroupList {
|
||||
################################################################################
|
||||
### create part of WHERE clause for list of newsgroups separated by :
|
||||
### explode list of newsgroups separated by : (with wildcards) to a SQL WHERE
|
||||
### clause
|
||||
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
||||
### OUT: SQL code, list of newsgroups
|
||||
my ($Newsgroups) = @_;
|
||||
|
|
140
feedlog.pl
140
feedlog.pl
|
@ -85,3 +85,143 @@ $DBHandle->disconnect;
|
|||
syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'};
|
||||
closelog();
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
feedlog - log data from an INN feed to a database
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<feedlog> [B<-Vhdq>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See doc/README: Perl 5.8.x itself and the following modules from CPAN:
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
Config::Auto
|
||||
|
||||
=item -
|
||||
|
||||
Date::Format
|
||||
|
||||
=item -
|
||||
|
||||
DBI
|
||||
|
||||
=back
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will log overview data and complete headers to a database
|
||||
table for further examination by parsing a feed from INN. It will
|
||||
parse that information and write it to a mysql database table in real
|
||||
time.
|
||||
|
||||
All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
|
||||
fails to initiate a database connection at startup, it will log to
|
||||
I<syslog> with I<CRIT> priority and go in an endless loop, as
|
||||
terminating would only result in a rapid respawn.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
F<feedlog.pl> will read its configuration from F<newsstats.conf> which
|
||||
should be present in the same directory via Config::Auto.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V> (version)
|
||||
|
||||
Print out version and copyright information on B<yapfaq> and exit.
|
||||
|
||||
=item B<-h> (help)
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-d> (debug)
|
||||
|
||||
Output debugging information to STDERR while parsing STDIN. You'll
|
||||
find that information most probably in your B<INN> F<errlog> file.
|
||||
|
||||
=item B<-q> (quiet)
|
||||
|
||||
Suppress logging to syslog.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See doc/INSTALL.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Set up a feed like that in your B<INN> F<newsfeeds> file:
|
||||
|
||||
## gather statistics for NewsStats
|
||||
newsstats!
|
||||
:!*,de.*
|
||||
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
|
||||
|
||||
See doc/INSTALL for further information.
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<feedlog.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
|
||||
Runtime configuration file for B<yapfaq>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
doc/README
|
||||
|
||||
=item -
|
||||
|
||||
doc/INSTALL
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
|
|
214
gatherstats.pl
214
gatherstats.pl
|
@ -47,11 +47,11 @@ $ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};
|
|||
$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
### get type of information to gather, default to 'all'
|
||||
### get type of information to gather, defaulting to 'all'
|
||||
$Options{'t'} = 'all' if !$Options{'t'};
|
||||
die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});
|
||||
|
||||
### get time period
|
||||
### get time period (-m or -p)
|
||||
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
|
||||
|
||||
### init database
|
||||
|
@ -94,9 +94,219 @@ foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
|
|||
$DBQuery->finish;
|
||||
};
|
||||
};
|
||||
} else {
|
||||
# other types of information go here - later on
|
||||
};
|
||||
};
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gatherstats - process statistical data from a raw source
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See doc/README: Perl 5.8.x itself and the following modules from CPAN:
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
Config::Auto
|
||||
|
||||
=item -
|
||||
|
||||
DBI
|
||||
|
||||
=back
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will extract and process statistical information from a
|
||||
database table which is fed from F<feedlog.pl> for a given time period
|
||||
and write its results to (an)other database table(s).
|
||||
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another month via the B<-m> switch or a time period via the B<-p>
|
||||
switch; the latter takes preference.
|
||||
|
||||
By default B<gatherstats> will process all types of information; you
|
||||
can change that using the B<-t> switch and assigning the type of
|
||||
information to process. Currently only processing of the number of
|
||||
postings per group per month is implemented anyway, so that doesn't
|
||||
matter yet.
|
||||
|
||||
Possible information types include:
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<groups> (postings per group per month)
|
||||
|
||||
B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
|
||||
counted for each single group they appear in. Groups not in I<TLH>
|
||||
will be ignored.
|
||||
|
||||
B<gatherstats> will also add up the number of postings for each
|
||||
hierarchy level, but only count each posting once. A posting to
|
||||
de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
|
||||
respectively. A crossposting to de.alt.test and de.alt.admin, on the
|
||||
other hand, will be counted for de.alt.test and de.alt.admin each, but
|
||||
only once for de.alt.ALL and de.ALL.
|
||||
|
||||
Data is written to I<DBTableGrps> (see doc/INSTALL).
|
||||
|
||||
=back
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
F<gatherstats.pl> will read its configuration from F<newsstats.conf>
|
||||
which should be present in the same directory via Config::Auto.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override configuration options via the B<-n>, B<-r>, B<-g>,
|
||||
B<-c> and B<-s> switches, respectively.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V> (version)
|
||||
|
||||
Print out version and copyright information on B<yapfaq> and exit.
|
||||
|
||||
=item B<-h> (help)
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-d> (debug)
|
||||
|
||||
Output debugging information to STDOUT while processing (number of
|
||||
postings per group).
|
||||
|
||||
=item B<-o> (output only)
|
||||
|
||||
Do not write results to database. You should use B<-d> in conjunction
|
||||
with B<-o> ... everything else seems a bit pointless.
|
||||
|
||||
=item B<-m> I<YYYY-MM> (month)
|
||||
|
||||
Set processing period to a month in YYYY-MM format. Ignored if B<-p>
|
||||
is set.
|
||||
|
||||
=item B<-p> I<YYYY-MM:YYYY-MM> (period)
|
||||
|
||||
Set processing period to a time period between two month, each in
|
||||
YYYY-MM format, separated by a colon. Overrides B<-m>.
|
||||
|
||||
=item B<-t> I<type> (type)
|
||||
|
||||
Set processing type to one of I<all> and I<groups>. Defaults to all
|
||||
(and is currently rather pointless as only I<groups> has been
|
||||
implemented).
|
||||
|
||||
=item B<-n> I<TLH> (newsgroup hierarchy)
|
||||
|
||||
Override I<TLH> from F<newsstats.conf>.
|
||||
|
||||
=item B<-r> I<table> (raw data table)
|
||||
|
||||
Override I<DBTableRaw> from F<newsstats.conf>.
|
||||
|
||||
=item B<-g> I<table> (postings per group table)
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=item B<-c> I<table> (client data table)
|
||||
|
||||
Override I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<-s> I<table> (server/host data table)
|
||||
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See doc/INSTALL.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Process all types of information for lasth month:
|
||||
|
||||
gatherstats
|
||||
|
||||
Do a dry run, showing results of processing:
|
||||
|
||||
gatherstats -do
|
||||
|
||||
Process all types of information for January of 2010:
|
||||
|
||||
gatherstats -m 2010-01
|
||||
|
||||
Process only number of postings for the year of 2010:
|
||||
|
||||
gatherstats -p 2010-01:2010-12 -t groups
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<gatherstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
|
||||
Runtime configuration file for B<yapfaq>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
doc/README
|
||||
|
||||
=item -
|
||||
|
||||
doc/INSTALL
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
|
|
348
groupstats.pl
348
groupstats.pl
|
@ -23,10 +23,6 @@ use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
|
|||
|
||||
use DBI;
|
||||
|
||||
################################# Definitions ##################################
|
||||
|
||||
# ...
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
|
@ -75,13 +71,9 @@ if ($Options{'l'}) {
|
|||
warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
|
||||
};
|
||||
|
||||
### get query type, default to 'postings'
|
||||
#die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));
|
||||
#die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});
|
||||
|
||||
### get time period
|
||||
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
|
||||
# reset to one month for 'dump' type
|
||||
# reset to one month for 'dump' output type
|
||||
if ($Options{'o'} eq 'dump' and $Options{'p'}) {
|
||||
$StartMonth = $EndMonth;
|
||||
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
|
||||
|
@ -90,80 +82,110 @@ if ($Options{'o'} eq 'dump' and $Options{'p'}) {
|
|||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get data
|
||||
### create report
|
||||
# get list of newsgroups (-n)
|
||||
my ($QueryPart,@GroupList);
|
||||
my $Newsgroups = $Options{'n'};
|
||||
if ($Newsgroups) {
|
||||
# explode list of newsgroups for WHERE clause
|
||||
($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
|
||||
} else {
|
||||
# set to dummy value (always true)
|
||||
$QueryPart = 1;
|
||||
};
|
||||
|
||||
# manage thresholds
|
||||
if (defined($Options{'t'})) {
|
||||
if ($Options{'i'}) {
|
||||
# -i: list groups below threshold
|
||||
$QueryPart .= ' AND postings < ?';
|
||||
} else {
|
||||
# default: list groups above threshold
|
||||
$QueryPart .= ' AND postings > ?';
|
||||
};
|
||||
# push threshold to GroupList to match number of binding vars for DBQuery->execute
|
||||
push @GroupList,$Options{'t'};
|
||||
}
|
||||
|
||||
# construct WHERE clause
|
||||
# $QueryPart is "list of newsgroup" (or 1),
|
||||
# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
|
||||
# according to setting of -s
|
||||
my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
|
||||
|
||||
# get lenght of longest newsgroup delivered by query for formatting purposes
|
||||
# FIXME
|
||||
my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
|
||||
|
||||
my ($OrderClause,$DBQuery);
|
||||
# -b (best of) defined?
|
||||
# -b (best of / top list) defined?
|
||||
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
|
||||
# default: neither -b nor -l
|
||||
# set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
|
||||
# according to -q and -d
|
||||
$OrderClause = 'newsgroup';
|
||||
$OrderClause = 'postings' if $Options{'q'};
|
||||
$OrderClause .= ' DESC' if $Options{'d'};
|
||||
# do query: get number of postings per group from groups table for given months and newsgroups
|
||||
# prepare query: get number of postings per group from groups table for given months and newsgroups
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
|
||||
} elsif ($Options{'b'}) {
|
||||
# -b is set (then -l can't be!)
|
||||
# set sorting order (-i)
|
||||
if ($Options{'i'}) {
|
||||
$OrderClause = 'postings';
|
||||
} else {
|
||||
$OrderClause = 'postings DESC';
|
||||
};
|
||||
# push LIMIT to GroupList to match number of binding vars
|
||||
# push LIMIT to GroupList to match number of binding vars for DBQuery->execute
|
||||
push @GroupList,$Options{'b'};
|
||||
# do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
|
||||
# prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
|
||||
} else { # -l
|
||||
} else {
|
||||
# -l must be set now, as all other cases have been taken care of
|
||||
# set sorting order (-i)
|
||||
if ($Options{'i'}) {
|
||||
$OrderClause = '<';
|
||||
} else {
|
||||
$OrderClause = '>';
|
||||
};
|
||||
# push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars
|
||||
# push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
|
||||
# FIXME -- together with the query (see below)
|
||||
push @GroupList,$Options{'l'};
|
||||
push @GroupList,$StartMonth,$EndMonth;
|
||||
# do query: get number of postings per group from groups table for given months and
|
||||
# prepare query: get number of postings per group from groups table for given months and
|
||||
# FIXME -- this query is ... in dire need of impromevent
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
|
||||
};
|
||||
|
||||
# execute query
|
||||
$DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
|
||||
$DBQuery->execute($StartMonth,$EndMonth,@GroupList)
|
||||
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
|
||||
|
||||
# output result
|
||||
# output results
|
||||
# print caption (-c) with time period if -m or -p is set
|
||||
# FIXME - month or period should handled differently
|
||||
printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
|
||||
# print caption (-c) with newsgroup list if -n is set
|
||||
printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
|
||||
# print caption (-c) with threshold if -t is set, taking -i in account
|
||||
printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
|
||||
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
|
||||
&OutputData($Options{'o'},$DBQuery,$MaxLength);
|
||||
# default: neither -b nor -l
|
||||
&OutputData($Options{'o'},$DBQuery,$MaxLength);
|
||||
} elsif ($Options{'b'}) {
|
||||
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
|
||||
# -b is set (then -l can't be!)
|
||||
# we have to read in the query results ourselves, as they do not have standard layout
|
||||
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
|
||||
# we just assign "top x" or "bottom x" instead of a month for the caption
|
||||
# FIXME
|
||||
print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
|
||||
};
|
||||
} else { # -l
|
||||
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
|
||||
} else {
|
||||
# -l must be set now, as all other cases have been taken care of
|
||||
# we have to read in the query results ourselves, as they do not have standard layout
|
||||
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
|
||||
# we just switch $Newsgroups and $Month for output generation
|
||||
# FIXME
|
||||
print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
|
||||
};
|
||||
};
|
||||
|
@ -171,3 +193,283 @@ if (!defined($Options{'b'}) and !defined($Options{'l'})) {
|
|||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
groupstats - create reports on newsgroup usage
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See doc/README: Perl 5.8.x itself and the following modules from CPAN:
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
Config::Auto
|
||||
|
||||
=item -
|
||||
|
||||
DBI
|
||||
|
||||
=back
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings per
|
||||
group per month) taken from result tables created by
|
||||
F<gatherstats.pl>.
|
||||
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another month via the B<-m> switch or a time period via the B<-p>
|
||||
switch; the latter takes preference.
|
||||
|
||||
B<groupstats> will process all newsgroups by default; you can limit
|
||||
that to only some newsgroups by supplying a list of those groups via
|
||||
B<-n> (see below). You can include hierarchy levels in the output by
|
||||
adding the B<-s> switch (see below).
|
||||
|
||||
Furthermore you can set a threshold via B<-t> so that only newsgroups
|
||||
with more postings per month will be included in the report. You can
|
||||
invert that by the B<-i> switch so only newsgroups with less than
|
||||
I<threshold> postings per month will be included.
|
||||
|
||||
You can sort the output by number of postings per month instead of the
|
||||
default (alphabetical list of newsgroups) by using B<-q>; you can
|
||||
reverse the sorting order (from highest to lowest or in reversed
|
||||
alphabetical order) by using B<-d>.
|
||||
|
||||
Furthermore, you can create a list of newsgroups that had consistently
|
||||
more (or less) than x postings per month during the whole report
|
||||
period by using B<-l> (together with B<i> as needed).
|
||||
|
||||
Last but not least you can create a "best of" list of the top x
|
||||
newsgroups via B<-b> (or a "worst of" list by adding B<i>).
|
||||
|
||||
By default, B<groupstats> will dump a very simple alphabetical list of
|
||||
newsgroups, one per line, followed by the number of postings in that
|
||||
month. This output format of course cannot sensibly be combined with
|
||||
time periods, so you can set the output format by using B<-o> (see
|
||||
below). Captions can be added by setting the B<-c> switch.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
F<groupstats.pl> will read its configuration from F<newsstats.conf>
|
||||
which should be present in the same directory via Config::Auto.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override configuration options via the B<-g> switch.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V> (version)
|
||||
|
||||
Print out version and copyright information on B<yapfaq> and exit.
|
||||
|
||||
=item B<-h> (help)
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-m> I<YYYY-MM> (month)
|
||||
|
||||
Set processing period to a month in YYYY-MM format. Ignored if B<-p>
|
||||
is set.
|
||||
|
||||
=item B<-p> I<YYYY-MM:YYYY-MM> (period)
|
||||
|
||||
Set processing period to a time period between two month, each in
|
||||
YYYY-MM format, separated by a colon. Overrides B<-m>.
|
||||
|
||||
=item B<-n> I<newsgroup(s)> (newsgroups)
|
||||
|
||||
Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
|
||||
be a single newsgroup name (de.alt.test), a newsgroup hierarchy
|
||||
(de.alt.*) or a list of either of these, separated by colons, for
|
||||
example
|
||||
|
||||
de.test:de.alt.test:de.newusers.*
|
||||
|
||||
=item B<-t> I<threshold> (threshold)
|
||||
|
||||
Only include newsgroups with more than I<threshold> postings per
|
||||
month. Can be inverted by the B<-i> switch so that only newsgroups
|
||||
with less than I<threshold> postings will be included.
|
||||
|
||||
This setting will be ignored if B<-l> or B<-b> is set.
|
||||
|
||||
=item B<-l> I<level> (level)
|
||||
|
||||
Only include newsgroups with more than I<level> postings per
|
||||
month, every month during the whole reporting period. Can be inverted
|
||||
by the B<-i> switch so that only newsgroups with less than I<level>
|
||||
postings every single month will be included. Output will be ordered
|
||||
by newsgroup name, followed by month.
|
||||
|
||||
This setting will be ignored if B<-b> is set. Overrides B<-t> and
|
||||
can't be used together with B<-q> or B<-d>.
|
||||
|
||||
=item B<-b> I<n> (best of)
|
||||
|
||||
Create a list of the I<n> newsgroups with the most postings over the
|
||||
whole reporting period. Can be inverted by the B<-i> switch so that a
|
||||
list of the I<n> newsgroups with the least postings over the whole
|
||||
period is generated. Output will be ordered by sum of postings.
|
||||
|
||||
Overrides B<-t> and B<-l> and can't be used together with B<-q> or
|
||||
B<-d>. Output format is set to I<pretty> (see below).
|
||||
|
||||
=item B<-i> (invert)
|
||||
|
||||
Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
|
||||
threshold or level or generate a "bottom list" instead of a top list.
|
||||
|
||||
=item B<-s> (sum per hierarchy level)
|
||||
|
||||
Include "virtual" groups for every hierarchy level in output, for
|
||||
example:
|
||||
|
||||
de.alt.ALL 10
|
||||
de.alt.test 5
|
||||
de.alt.admin 7
|
||||
|
||||
See the B<gatherstats> man page for details.
|
||||
|
||||
=item B<-o> I<output type> (output format)
|
||||
|
||||
Set output format. Default is I<dump>, consisting of an alphabetical
|
||||
list of newsgroups, each on a new line, followed by the number of
|
||||
postings in that month. This default format can't be used with time
|
||||
periods of more than one month.
|
||||
|
||||
I<list> format is like I<dump>, but will print the month in front of
|
||||
the newsgroup name.
|
||||
|
||||
I<dumpgroup> format can only be use with a group list (see B<-n>) of
|
||||
exactly one newsgroup and is like I<dump>, but will output months,
|
||||
followed by the number of postings.
|
||||
|
||||
If you don't need easily parsable output, you'll mostly use I<pretty>
|
||||
format, which will print a header for each new month and try to align
|
||||
newsgroup names and posting counts. Usage of B<-b> will force this
|
||||
format.
|
||||
|
||||
=item B<-c> (captions)
|
||||
|
||||
Add captions to output (reporting period, newsgroups list, threshold).
|
||||
|
||||
=item B<-q> (quantity of postings)
|
||||
|
||||
Sort by number of postings instead of by newsgroup names.
|
||||
|
||||
Cannot be used with B<-l> or B<-b>.
|
||||
|
||||
=item B<-d> (descending)
|
||||
|
||||
Change sort order to descending.
|
||||
|
||||
Cannot be used with B<-l> or B<-b>.
|
||||
|
||||
=item B<-g> I<table> (postings per group table)
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See doc/INSTALL.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Show number of postings per group for lasth month in I<dump> format:
|
||||
|
||||
groupstats
|
||||
|
||||
Show that report for January of 2010 and de.alt.* plus de.test,
|
||||
including display of hierarchy levels:
|
||||
|
||||
groupstats -m 2010-01 -n de.alt.*:de.test -s
|
||||
|
||||
Show that report for the year of 2010 in I<pretty> format:
|
||||
|
||||
groupstats -p 2010-01:2010-12 -o pretty
|
||||
|
||||
Only show newsgroups with less than 30 postings last month, ordered
|
||||
by number of postings, descending, in I<pretty> format:
|
||||
|
||||
groupstats -iqdt 30 -o pretty
|
||||
|
||||
Show top 10 for the first half-year of of 2010 in I<pretty> format:
|
||||
|
||||
groupstats -p 2010-01:2010-06 -b 10 -o pretty
|
||||
|
||||
Report all groups that had less than 30 postings every singele month
|
||||
in the year of 2010 (I<pretty> format is forced)
|
||||
|
||||
groupstats -p 2010-01:2010-12 -il 30
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<groupstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
|
||||
Runtime configuration file for B<yapfaq>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
doc/README
|
||||
|
||||
=item -
|
||||
|
||||
doc/INSTALL
|
||||
|
||||
=item -
|
||||
|
||||
gatherstats -h
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
|
|
|
@ -150,3 +150,108 @@ Enjoy!
|
|||
|
||||
-thh <thh\@inter.net>
|
||||
TODO
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
install - installation script
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<install> [B<-Vh>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See doc/README: Perl 5.8.x itself and the following modules from CPAN:
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
Config::Auto
|
||||
|
||||
=item -
|
||||
|
||||
DBI
|
||||
|
||||
=back
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will create database tables as necessary and configured.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
F<install.pl> will read its configuration from F<newsstats.conf> via
|
||||
Config::Auto.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V> (version)
|
||||
|
||||
Print out version and copyright information on B<yapfaq> and exit.
|
||||
|
||||
=item B<-h> (help)
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=back
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<install.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
|
||||
Runtime configuration file for B<yapfaq>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
doc/README
|
||||
|
||||
=item -
|
||||
|
||||
doc/INSTALL
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
|
|
Loading…
Reference in a new issue