Complete rewrite of groupstats.pl and NewsStats.pm.
* Switch to Getopt::Long, change coding style; limit line length. * Replace 'die' and 'warn' by calls to &Bleat(). * Completely redo options and processing: - merge -m/-p/-a into --month - replace -i/-q/-d with - much more powerful - --group-by/--order-by - replace -t/-l with - much more powerful - --lower/--upper/--boundary - remove -b and replace it with --report Fixes #33. * Add new report types, boundaries and sorting options: - report types 'average' and 'sums' - boundaries 'average' and 'sums' - upper and/or lower boundary - sort output independently Issue #35. Fixes #34, #38. * Add possibility to cross-check newsgroups against checkgroups file. * Complete rewrite of groupstats.pl internal logic: - modularize construction fo SQL queries - remove unnecessary special cases - refactor code into NewsStats.pm functions as much as possible Issue #37. Fixes #36. * Rework output formats, fix padding problem by making use of modularized SQL queries. Fixes #15, #32. * Add some more consistency checks. Issue #12. * Redo documentation. * Update TODO list. Signed-off-by: Thomas Hochstein <thh@inter.net>
This commit is contained in:
parent
3c7a7dfef5
commit
edd250f265
493
NewsStats.pm
493
NewsStats.pm
|
@ -2,7 +2,7 @@
|
|||
#
|
||||
# Library functions for the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
# Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
#
|
||||
# This module can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
@ -16,18 +16,20 @@ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
|
|||
require Exporter;
|
||||
@ISA = qw(Exporter);
|
||||
@EXPORT = qw(
|
||||
$MySelf
|
||||
$MyVersion
|
||||
$PackageVersion
|
||||
ReadOptions
|
||||
$FullPath
|
||||
$HomePath
|
||||
ShowVersion
|
||||
ShowPOD
|
||||
ReadConfig
|
||||
OverrideConfig
|
||||
InitDB
|
||||
Bleat
|
||||
);
|
||||
@EXPORT_OK = qw(
|
||||
GetTimePeriod
|
||||
LastMonth
|
||||
CheckMonth
|
||||
SplitPeriod
|
||||
ListMonth
|
||||
ListNewsgroups
|
||||
|
@ -35,58 +37,47 @@ require Exporter;
|
|||
OutputData
|
||||
FormatOutput
|
||||
SQLHierarchies
|
||||
SQLSortOrder
|
||||
SQLGroupList
|
||||
SQLSetBounds
|
||||
SQLBuildClause
|
||||
GetMaxLength
|
||||
);
|
||||
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)],
|
||||
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
|
||||
ListMonth)],
|
||||
Output => [qw(OutputData FormatOutput)],
|
||||
SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]);
|
||||
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
|
||||
SQLSetBounds SQLBuildClause GetMaxLength)]);
|
||||
$VERSION = '0.01';
|
||||
our $PackageVersion = '0.01';
|
||||
|
||||
use Data::Dumper;
|
||||
use File::Basename;
|
||||
use Getopt::Std;
|
||||
|
||||
use Config::Auto;
|
||||
use DBI;
|
||||
|
||||
#####-------------------------------- Vars --------------------------------#####
|
||||
|
||||
our $MySelf = fileparse($0, '.pl');
|
||||
our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)";
|
||||
# trim the path
|
||||
our $FullPath = $0;
|
||||
our $HomePath = dirname($0);
|
||||
$0 =~ s%.*/%%;
|
||||
# set version string
|
||||
our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
|
||||
|
||||
#####------------------------------- Basics -------------------------------#####
|
||||
|
||||
################################################################################
|
||||
sub ReadOptions {
|
||||
################################################################################
|
||||
### read commandline options and act on standard options -h and -V
|
||||
### IN : $Params: list of legal commandline paramaters (without -h and -V)
|
||||
### OUT: a hash containing the commandline options
|
||||
$Getopt::Std::STANDARD_HELP_VERSION = 1;
|
||||
|
||||
my ($Params) = @_;
|
||||
my %Options;
|
||||
|
||||
getopts('Vh'.$Params, \%Options);
|
||||
|
||||
# -V: display version
|
||||
&ShowVersion if ($Options{'V'});
|
||||
|
||||
# -h: feed myself to perldoc
|
||||
&ShowPOD if ($Options{'h'});
|
||||
|
||||
return %Options;
|
||||
};
|
||||
################################################################################
|
||||
|
||||
################################################################################
|
||||
sub ShowVersion {
|
||||
################################################################################
|
||||
### display version and exit
|
||||
print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n";
|
||||
print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n";
|
||||
print "NewsStats v$PackageVersion\n$MyVersion\n";
|
||||
print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
|
||||
print "This program is free software; you may redistribute it ".
|
||||
"and/or modify it under the same terms as Perl itself.\n";
|
||||
exit(100);
|
||||
};
|
||||
################################################################################
|
||||
|
@ -95,7 +86,7 @@ sub ShowVersion {
|
|||
sub ShowPOD {
|
||||
################################################################################
|
||||
### feed myself to perldoc and exit
|
||||
exec('perldoc', $0);
|
||||
exec('perldoc', $FullPath);
|
||||
exit(100);
|
||||
};
|
||||
################################################################################
|
||||
|
@ -112,7 +103,7 @@ sub ReadConfig {
|
|||
################################################################################
|
||||
|
||||
################################################################################
|
||||
sub OverrideConfig {
|
||||
sub OverrideConfig {
|
||||
################################################################################
|
||||
### override configuration values
|
||||
### IN : $ConfigR : reference to configuration hash
|
||||
|
@ -120,7 +111,8 @@ sub OverrideConfig {
|
|||
my ($ConfigR,$OverrideR) = @_;
|
||||
my %Override = %$OverrideR;
|
||||
# Config hash empty?
|
||||
warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1);
|
||||
&Bleat(1,"Empty configuration hash passed to OverrideConfig()")
|
||||
if ( keys %$ConfigR < 1);
|
||||
# return if no overrides
|
||||
return if (keys %Override < 1 or keys %$ConfigR < 1);
|
||||
foreach my $Key (keys %Override) {
|
||||
|
@ -138,15 +130,35 @@ sub InitDB {
|
|||
### OUT: DBHandle
|
||||
my ($ConfigR,$Die) = @_;
|
||||
my %Conf = %$ConfigR;
|
||||
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 });
|
||||
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
|
||||
$Conf{'DBDriver'},$Conf{'DBDatabase'},
|
||||
$Conf{'DBHost'}), $Conf{'DBUser'},
|
||||
$Conf{'DBPw'}, { PrintError => 0 });
|
||||
if (!$DBHandle) {
|
||||
die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die);
|
||||
warn("$MySelf: W: $DBI::errstr\n");
|
||||
&Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
|
||||
&Bleat(1,$DBI::errstr);
|
||||
};
|
||||
return $DBHandle;
|
||||
};
|
||||
################################################################################
|
||||
|
||||
################################################################################
|
||||
sub Bleat {
|
||||
################################################################################
|
||||
### print warning or error messages and terminate in case of error
|
||||
### IN : $Level : 1 = warning, 2 = error
|
||||
### $Message: warning or error message
|
||||
my ($Level,$Message) = @_;
|
||||
if ($Level == 1) {
|
||||
warn "$0 W: $Message\n"
|
||||
} elsif ($Level == 2) {
|
||||
die "$0 E: $Message\n"
|
||||
} else {
|
||||
print "$0: $Message\n"
|
||||
}
|
||||
};
|
||||
################################################################################
|
||||
|
||||
#####------------------------------ GetStats ------------------------------#####
|
||||
|
||||
################################################################################
|
||||
|
@ -172,7 +184,7 @@ sub ListNewsgroups {
|
|||
next if($TLH and !/^$TLH/);
|
||||
# don't count invalid newsgroups
|
||||
if(%ValidGroups and !defined($ValidGroups{$_})) {
|
||||
warn (sprintf("DROPPED: %s\n",$_));
|
||||
&Bleat(1,sprintf("DROPPED: %s",$_));
|
||||
next;
|
||||
}
|
||||
# add original newsgroup to %Newsgroups
|
||||
|
@ -216,7 +228,7 @@ sub ReadGroupList {
|
|||
### OUT: \%ValidGroups: hash containing all valid newsgroups
|
||||
my ($Filename) = @_;
|
||||
my %ValidGroups;
|
||||
open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n";
|
||||
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
|
||||
while (<$LIST>) {
|
||||
s/^(\S+).*$/$1/;
|
||||
chomp;
|
||||
|
@ -233,29 +245,42 @@ sub ReadGroupList {
|
|||
################################################################################
|
||||
sub GetTimePeriod {
|
||||
################################################################################
|
||||
### get a time period to act on, in order of preference: by default the
|
||||
### last month; or a month submitted by -m YYYY-MM; or a time period submitted
|
||||
### by -p YYYY-MM:YYYY-MM
|
||||
### IN : $Month,$Period: contents of -m and -p
|
||||
### OUT: $StartMonth, $EndMonth (identical if period is just one month)
|
||||
my ($Month,$Period) = @_;
|
||||
# exit if -m is set and not like YYYY-MM
|
||||
die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month);
|
||||
# warn if -m and -p is set
|
||||
warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period);
|
||||
# default: set -m to last month
|
||||
$Month = &LastMonth if (!defined($Month) and !defined($Period));
|
||||
# set $StartMonth, $EndMonth
|
||||
my ($StartMonth, $EndMonth);
|
||||
if ($Period) {
|
||||
# -p: get date range
|
||||
($StartMonth, $EndMonth) = &SplitPeriod($Period);
|
||||
die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth);
|
||||
### get a time period to act on from --month option;
|
||||
### if empty, default to last month
|
||||
### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all'
|
||||
### OUT: $Verbal,$SQL: verbal description and WHERE-clause
|
||||
### of the chosen time period
|
||||
my ($Month) = @_;
|
||||
# define result variables
|
||||
my ($Verbal, $SQL);
|
||||
# define a regular expression for a month
|
||||
my $REMonth = '\d{4}-\d{2}';
|
||||
|
||||
# default to last month if option is not set
|
||||
if(!$Month) {
|
||||
$Month = &LastMonth;
|
||||
}
|
||||
|
||||
# check for valid input
|
||||
if ($Month =~ /^$REMonth$/) {
|
||||
# single month (YYYY-MM)
|
||||
($Month) = &CheckMonth($Month);
|
||||
$Verbal = $Month;
|
||||
$SQL = sprintf("month = '%s'",$Month);
|
||||
} elsif ($Month =~ /^$REMonth:$REMonth$/) {
|
||||
# time period (YYYY-MM:YYYY-MM)
|
||||
$Verbal = sprintf('%s to %s',&SplitPeriod($Month));
|
||||
$SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month));
|
||||
} elsif ($Month =~ /^all$/i) {
|
||||
# special case: ALL
|
||||
$Verbal = 'all time';
|
||||
$SQL = '';
|
||||
} else {
|
||||
# set $StartMonth = $EndMonth = $Month if -p is not set
|
||||
$StartMonth = $EndMonth = $Month;
|
||||
};
|
||||
return ($StartMonth, $EndMonth);
|
||||
# invalid input
|
||||
return (undef,undef);
|
||||
}
|
||||
|
||||
return ($Verbal,$SQL);
|
||||
};
|
||||
|
||||
################################################################################
|
||||
|
@ -278,12 +303,23 @@ sub LastMonth {
|
|||
################################################################################
|
||||
sub CheckMonth {
|
||||
################################################################################
|
||||
### check if input is a valid month in YYYY-MM form
|
||||
### IN : $Month: month
|
||||
### OUT: TRUE / FALSE
|
||||
my ($Month) = @_;
|
||||
return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/);
|
||||
return 1;
|
||||
### check if input (in YYYY-MM form) is valid with MM between 01 and 12;
|
||||
### otherwise, fix it
|
||||
### IN : @Month: array of month
|
||||
### OUT: @Month: a valid month
|
||||
my (@Month) = @_;
|
||||
foreach my $Month (@Month) {
|
||||
my ($OldMonth) = $Month;
|
||||
my ($CalMonth) = substr ($Month, -2);
|
||||
if ($CalMonth < 1 or $CalMonth > 12) {
|
||||
$CalMonth = '12' if $CalMonth > 12;
|
||||
$CalMonth = '01' if $CalMonth < 1;
|
||||
substr($Month, -2) = $CalMonth;
|
||||
&Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ".
|
||||
"and '12'), set to '%s'.",$OldMonth,$Month));
|
||||
}
|
||||
}
|
||||
return @Month;
|
||||
};
|
||||
|
||||
################################################################################
|
||||
|
@ -291,10 +327,10 @@ sub SplitPeriod {
|
|||
################################################################################
|
||||
### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
|
||||
### IN : $Period: time period
|
||||
### OUT: $StartMonth, Â$EndMonth
|
||||
### OUT: $StartMonth, $EndMonth
|
||||
my ($Period) = @_;
|
||||
return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/;
|
||||
my ($StartMonth, $EndMonth) = split /:/, $Period;
|
||||
($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth);
|
||||
# switch parameters as necessary
|
||||
if ($EndMonth gt $StartMonth) {
|
||||
return ($StartMonth, $EndMonth);
|
||||
|
@ -310,7 +346,8 @@ sub ListMonth {
|
|||
### IN : $StartMonth, $EndMonth
|
||||
### OUT: @Months: array containing all months from $StartMonth to $EndMonth
|
||||
my ($StartMonth, $EndMonth) = @_;
|
||||
return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/);
|
||||
return (undef,undef)
|
||||
if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/);
|
||||
# return if $StartMonth = $EndMonth
|
||||
return ($StartMonth) if ($StartMonth eq $EndMonth);
|
||||
# set $Year, $Month from $StartMonth
|
||||
|
@ -335,64 +372,96 @@ sub ListMonth {
|
|||
sub OutputData {
|
||||
################################################################################
|
||||
### read database query results from DBHandle and print results with formatting
|
||||
### IN : $Format : format specifier
|
||||
### $FileName: file name template (-f): filename-YYYY-MM
|
||||
### $DBQuery : database query handle with executed query,
|
||||
### IN : $Format : format specifier
|
||||
### $Comments : print or suppress all comments for machine-readable output
|
||||
### $GroupBy : primary sorting order (month or key)
|
||||
### $Precision: number of digits right of decimal point (0 or 2)
|
||||
### $ValidKeys: reference to a hash containing all valid keys
|
||||
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
|
||||
### $DBQuery : database query handle with executed query,
|
||||
### containing $Month, $Key, $Value
|
||||
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
|
||||
my ($Format, $FileName, $DBQuery, $PadGroup) = @_;
|
||||
my ($Handle, $OUT);
|
||||
### $PadGroup : padding length for key field (optional) for 'pretty'
|
||||
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
|
||||
$DBQuery, $PadGroup) = @_;
|
||||
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
|
||||
my ($FileName, $Handle, $OUT);
|
||||
our $LastIteration;
|
||||
|
||||
# define output types
|
||||
my %LegalOutput;
|
||||
@LegalOutput{('dump',,'list','pretty')} = ();
|
||||
# bail out if format is unknown
|
||||
&Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
|
||||
|
||||
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
|
||||
# don't display invalid keys
|
||||
if(%ValidKeys and !defined($ValidKeys{$Key})) {
|
||||
# FIXME
|
||||
# &Bleat(1,sprintf("DROPPED: %s",$Key));
|
||||
next;
|
||||
};
|
||||
# care for correct sorting order and abstract from month and keys:
|
||||
# $Caption will be $Month or $Key, according to sorting order,
|
||||
# and $Key will be $Key or $Month, respectively
|
||||
my $Caption;
|
||||
if ($GroupBy eq 'key') {
|
||||
$Caption = $Key;
|
||||
$Key = $Month;
|
||||
} else {
|
||||
$Caption = $Month;
|
||||
}
|
||||
# set output file handle
|
||||
if (!$FileName) {
|
||||
if (!$FileTempl) {
|
||||
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
|
||||
} elsif (!defined($LastIteration) or $LastIteration ne $Month) {
|
||||
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
|
||||
close $OUT if ($LastIteration);
|
||||
open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month);
|
||||
# safeguards for filename creation:
|
||||
# replace potential problem characters with '_'
|
||||
$FileName = sprintf('%s-%s',$FileTempl,$Caption);
|
||||
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
||||
open ($OUT,">$FileName")
|
||||
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
|
||||
$FileName));
|
||||
$Handle = $OUT;
|
||||
};
|
||||
print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
|
||||
$LastIteration = $Month;
|
||||
print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
|
||||
$Precision, $PadGroup);
|
||||
$LastIteration = $Caption;
|
||||
};
|
||||
close $OUT if ($FileName);
|
||||
close $OUT if ($FileTempl);
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub FormatOutput {
|
||||
################################################################################
|
||||
### format information for output according to format specifier
|
||||
### IN : $Format : format specifier
|
||||
### $Month : month (as YYYY-MM)
|
||||
### $Key : newsgroup, client, ...
|
||||
### $Value : number of postings with that attribute
|
||||
### $PadGroup: padding length for key field (optional) for 'pretty'
|
||||
### IN : $Format : format specifier
|
||||
### $Comments : print or suppress all comments for machine-readable output
|
||||
### $Caption : month (as YYYY-MM) or $Key, according to sorting order
|
||||
### $Key : newsgroup, client, ... or $Month, as above
|
||||
### $Value : number of postings with that attribute
|
||||
### $Precision: number of digits right of decimal point (0 or 2)
|
||||
### $PadGroup : padding length for key field (optional) for 'pretty'
|
||||
### OUT: $Output: formatted output
|
||||
my ($Format, $Month, $Key, $Value, $PadGroup) = @_;
|
||||
|
||||
# define output types
|
||||
my %LegalOutput;
|
||||
@LegalOutput{('dump','dumpgroup','list','pretty')} = ();
|
||||
# bail out if format is unknown
|
||||
die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format});
|
||||
|
||||
my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_;
|
||||
my ($Output);
|
||||
# keep last month in mind
|
||||
# keep last caption in mind
|
||||
our ($LastIteration);
|
||||
# create one line of output
|
||||
if ($Format eq 'dump') {
|
||||
# output as dump (ng nnnnn)
|
||||
$Output = sprintf ("%s %u\n",$Key,$Value);
|
||||
} elsif ($Format eq 'dumpgroup') {
|
||||
# output as dump (YYYY-NN: nnnnn)
|
||||
$Output = sprintf ("%s: %5u\n",$Month,$Value);
|
||||
# output as dump (key value)
|
||||
$Output = sprintf ("# %s:\n",$Caption)
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
$Output .= sprintf ("%s %u\n",$Key,$Value);
|
||||
} elsif ($Format eq 'list') {
|
||||
# output as list (YYYY-NN: ng nnnnn)
|
||||
$Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value);
|
||||
# output as list (caption key value)
|
||||
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
||||
} elsif ($Format eq 'pretty') {
|
||||
# output as table
|
||||
$Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration);
|
||||
$LastIteration = $Month;
|
||||
$Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value);
|
||||
# output as a table
|
||||
$Output = sprintf ("# ----- %s:\n",$Caption)
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
$Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) :
|
||||
"%s %.*f\n",$Key,$Precision,$Value);
|
||||
};
|
||||
return $Output;
|
||||
};
|
||||
|
@ -408,46 +477,226 @@ sub SQLHierarchies {
|
|||
### IN : $ShowHierarchies: boolean value
|
||||
### OUT: SQL code
|
||||
my ($ShowHierarchies) = @_;
|
||||
return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'";
|
||||
return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub GetMaxLength {
|
||||
################################################################################
|
||||
### get length of longest field in future query result
|
||||
### IN : $DBHandle : database handel
|
||||
### $Table : table to query
|
||||
### $Field : field to check
|
||||
### $WhereClause: WHERE clause
|
||||
### @BindVars : bind variables for WHERE clause
|
||||
### IN : $DBHandle : database handel
|
||||
### $Table : table to query
|
||||
### $Field : field to check
|
||||
### $WhereClause : WHERE clause
|
||||
### $HavingClause: HAVING clause
|
||||
### @BindVars : bind variables for WHERE clause
|
||||
### OUT: $Length: length of longest instnace of $Field
|
||||
my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_;
|
||||
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause));
|
||||
$DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table);
|
||||
my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_;
|
||||
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ".
|
||||
"FROM %s %s %s",$Field,$Table,
|
||||
$WhereClause,$HavingClause ?
|
||||
'GROUP BY newsgroup' . $HavingClause .
|
||||
' ORDER BY LENGTH(newsgroup) '.
|
||||
'DESC LIMIT 1': ''));
|
||||
$DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
|
||||
"for '%s' from table '%s': ".
|
||||
"$DBI::errstr",$Field,$Table));
|
||||
my ($Length) = $DBQuery->fetchrow_array;
|
||||
return $Length;
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLSortOrder {
|
||||
################################################################################
|
||||
### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
|
||||
### $OptOrderBy (secondary sorting), both ascending or descending;
|
||||
### descending sorting order is done by adding '-desc'
|
||||
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
|
||||
### $OrderBy: secondary sort by month/newsgroups (default)
|
||||
### or number of 'postings'
|
||||
### OUT: a SQL ORDER BY clause
|
||||
my ($GroupBy,$OrderBy) = @_;
|
||||
my ($GroupSort,$OrderSort) = ('','');
|
||||
# $GroupBy (primary sorting)
|
||||
if (!$GroupBy) {
|
||||
$GroupBy = 'month';
|
||||
} else {
|
||||
($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
|
||||
if ($GroupBy =~ /group/i) {
|
||||
$GroupBy = 'newsgroup';
|
||||
} else {
|
||||
$GroupBy = 'month';
|
||||
}
|
||||
}
|
||||
my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
|
||||
# $OrderBy (secondary sorting)
|
||||
if (!$OrderBy) {
|
||||
$OrderBy = $Secondary;
|
||||
} else {
|
||||
($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
|
||||
if ($OrderBy =~ /posting/i) {
|
||||
$OrderBy = "postings $OrderSort, $Secondary";
|
||||
} else {
|
||||
$OrderBy = "$Secondary $OrderSort";
|
||||
}
|
||||
}
|
||||
return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLParseOrder {
|
||||
################################################################################
|
||||
### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
|
||||
### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
|
||||
### IN : $OrderOption: order option (see above)
|
||||
### OUT: parameter to sort by,
|
||||
### sort order ('DESC' or nothing, meaning 'ASC')
|
||||
my ($OrderOption) = @_;
|
||||
my $SortOrder = '';
|
||||
if ($OrderOption =~ s/-?desc$//i) {
|
||||
$SortOrder = 'DESC';
|
||||
} else {
|
||||
$OrderOption =~ s/-?asc$//i
|
||||
}
|
||||
return ($OrderOption,$SortOrder);
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLGroupList {
|
||||
################################################################################
|
||||
### explode list of newsgroups separated by : (with wildcards) to a SQL WHERE
|
||||
### clause
|
||||
### explode list of newsgroups separated by : (with wildcards)
|
||||
### to a SQL 'WHERE' expression
|
||||
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
||||
### OUT: SQL code, list of newsgroups
|
||||
### OUT: SQL code to become part of a 'WHERE' clause,
|
||||
### list of newsgroups for SQL bindings
|
||||
my ($Newsgroups) = @_;
|
||||
# substitute '*' wildcard with SQL wildcard character '%'
|
||||
$Newsgroups =~ s/\*/%/g;
|
||||
return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/;
|
||||
# just one newsgroup?
|
||||
return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
|
||||
# list of newsgroups separated by ':'
|
||||
my $SQL = '(';
|
||||
my @GroupList = split /:/, $Newsgroups;
|
||||
foreach (@GroupList) {
|
||||
$SQL .= ' OR ' if $SQL gt '(';
|
||||
$SQL .= 'newsgroup LIKE ?';
|
||||
$SQL .= SQLGroupWildcard($_);
|
||||
};
|
||||
$SQL .= ')';
|
||||
return ($SQL,@GroupList);
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLGroupWildcard {
|
||||
################################################################################
|
||||
### build a valid SQL 'WHERE' expression with or without wildcards
|
||||
### IN : $Newsgroup: newsgroup expression, probably with wildcard
|
||||
### (group.name or group.name.%)
|
||||
### OUT: SQL code to become part of a 'WHERE' clause
|
||||
my ($Newsgroup) = @_;
|
||||
# FIXME: check for validity
|
||||
if ($Newsgroup !~ /%/) {
|
||||
return 'newsgroup = ?';
|
||||
} else {
|
||||
return 'newsgroup LIKE ?';
|
||||
}
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLSetBounds {
|
||||
################################################################################
|
||||
### set upper and/or lower boundary (number of postings)
|
||||
### IN : $Type: 'level', 'average', 'sum' or 'default'
|
||||
### $LowBound,$UppBound: lower/upper boundary, respectively
|
||||
### OUT: SQL code to become part of a WHERE or HAVING clause
|
||||
my ($Type,$LowBound,$UppBound) = @_;
|
||||
($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
|
||||
if($LowBound and $UppBound and $LowBound > $UppBound) {
|
||||
&Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
|
||||
"$UppBound, exchanging boundaries.");
|
||||
($LowBound,$UppBound) = ($UppBound,$LowBound);
|
||||
}
|
||||
# default to 'default'
|
||||
my $WhereHavingFunction = 'postings';
|
||||
# set $LowBound to SQL statement:
|
||||
# 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
|
||||
if ($Type eq 'level') {
|
||||
$WhereHavingFunction = 'MIN(postings)'
|
||||
} elsif ($Type eq 'average') {
|
||||
$WhereHavingFunction = 'AVG(postings)'
|
||||
} elsif ($Type eq 'sum') {
|
||||
$WhereHavingFunction = 'SUM(postings)'
|
||||
}
|
||||
$LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
|
||||
# set $LowBound to SQL statement:
|
||||
# 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
|
||||
if ($Type eq 'level') {
|
||||
$WhereHavingFunction = 'MAX(postings)'
|
||||
} elsif ($Type eq 'average') {
|
||||
$WhereHavingFunction = 'AVG(postings)'
|
||||
} elsif ($Type eq 'sum') {
|
||||
$WhereHavingFunction = 'SUM(postings)'
|
||||
}
|
||||
$UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
|
||||
return ($LowBound,$UppBound);
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLCheckNumber {
|
||||
################################################################################
|
||||
### check if input is a valid positive integer; otherwise, make it one
|
||||
### IN : @Numbers: array of parameters
|
||||
### OUT: @Numbers: a valid positive integer
|
||||
my (@Numbers) = @_;
|
||||
foreach my $Number (@Numbers) {
|
||||
if ($Number and $Number < 0) {
|
||||
&Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
|
||||
$Number = -$Number;
|
||||
}
|
||||
$Number = '' if ($Number and $Number !~ /^\d+$/);
|
||||
}
|
||||
return @Numbers;
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLBuildClause {
|
||||
################################################################################
|
||||
### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
|
||||
### from multiple expressions which *may* be empty
|
||||
### IN : $Type: 'where', 'having', 'group' or 'order'
|
||||
### @Expressions: array of expressions
|
||||
### OUT: $SQLClause: a SQL clause
|
||||
my ($Type,@Expressions) = @_;
|
||||
my ($SQLClause,$Separator,$Statement);
|
||||
# set separator ('AND' or ',')
|
||||
if ($Type eq 'where' or $Type eq 'having') {
|
||||
$Separator = 'AND';
|
||||
} else {
|
||||
$Separator = ',';
|
||||
}
|
||||
# set statement
|
||||
if ($Type eq 'where') {
|
||||
$Statement = 'WHERE';
|
||||
} elsif ($Type eq 'order') {
|
||||
$Statement = 'ORDER BY';
|
||||
} elsif ($Type eq 'having') {
|
||||
$Statement = 'HAVING';
|
||||
} else {
|
||||
$Statement = 'GROUP BY';
|
||||
}
|
||||
# build query from expressions with separators
|
||||
foreach my $Expression (@Expressions) {
|
||||
if ($Expression) {
|
||||
$SQLClause .= " $Separator " if ($SQLClause);
|
||||
$SQLClause .= $Expression;
|
||||
}
|
||||
}
|
||||
# add statement in front if not already present
|
||||
$SQLClause = " $Statement " . $SQLClause
|
||||
if ($SQLClause and $SQLClause !~ /$Statement/);
|
||||
return $SQLClause;
|
||||
};
|
||||
|
||||
|
||||
#####------------------------------- done ---------------------------------#####
|
||||
1;
|
||||
|
||||
|
|
15
doc/TODO
15
doc/TODO
|
@ -29,6 +29,9 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
|
|||
- Add other reports
|
||||
NewsStats should include some other kinds of reports (stats on used clients,
|
||||
on postings hosts/servers, ...)
|
||||
- Add tools for database management
|
||||
NewsStats should offer tools e.g. to inject postings into the 'raw' database,
|
||||
or to split databases.
|
||||
|
||||
* Individual improvements
|
||||
+ NewsStats.pm
|
||||
|
@ -43,10 +46,15 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
|
|||
- General tests and optimisations
|
||||
|
||||
+ feedlog.pl
|
||||
- Complete rewrite (like groupstats.pl, include changes in NewsStats.pm)
|
||||
- Gracefully handle interruptions to database connection, at least log
|
||||
lost postings (Bug #30).
|
||||
- Buffer output that cannot be written to database (Bug #31).
|
||||
- Add / enhance / test error handling
|
||||
- General tests and optimisations
|
||||
|
||||
+ gatherstats.pl
|
||||
- Complete rewrite (like groupstats.pl, include changes in NewsStats.pm)
|
||||
- Use hierarchy information (see GroupInfo above)
|
||||
- Add gathering of other stats (clients, hosts, ...)
|
||||
- better modularisation (code reuse for other reports!)
|
||||
|
@ -54,13 +62,6 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
|
|||
- General tests and optimisations
|
||||
|
||||
+ groupstats.pl
|
||||
- output: fix formatting - GetMaxLength does not work for special queries
|
||||
like -l or -b
|
||||
- beautify output (formats / captions)
|
||||
- -a: improve code - it doesn't make that much sense to get first/last
|
||||
month from database just to query that same database with a time period
|
||||
that equals no time period ... this part of the WHERE clause should be
|
||||
dropped instead
|
||||
- better modularisation (code reuse for other reports!)
|
||||
- Add / enhance / test error handling
|
||||
- General tests and optimisations
|
||||
|
|
804
groupstats.pl
804
groupstats.pl
|
@ -7,7 +7,7 @@
|
|||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
# Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
@ -19,226 +19,229 @@ BEGIN {
|
|||
}
|
||||
use strict;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
|
||||
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:');
|
||||
my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
|
||||
$OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
|
||||
$OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
|
||||
GetOptions ('b|boundary=s' => \$OptBoundType,
|
||||
'c|captions!' => \$OptCaptions,
|
||||
'checkgroups=s' => \$OptCheckgroupsFile,
|
||||
'comments!' => \$OptComments,
|
||||
'filetemplate=s' => \$OptFileTemplate,
|
||||
'f|format=s' => \$OptFormat,
|
||||
'g|group-by=s' => \$OptGroupBy,
|
||||
'groupsdb=s' => \$OptGroupsDB,
|
||||
'l|lower=i' => \$LowBound,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'n|newsgroups=s' => \$OptNewsgroups,
|
||||
'o|order-by=s' => \$OptOrderBy,
|
||||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
# parse parameters
|
||||
# $OptComments defaults to TRUE
|
||||
$OptComments = 1 if (!defined($OptComments));
|
||||
# force --nocomments when --filetemplate is used
|
||||
$OptComments = 0 if ($OptFileTemplate);
|
||||
# parse $OptBoundType
|
||||
if ($OptBoundType) {
|
||||
if ($OptBoundType =~ /level/i) {
|
||||
$OptBoundType = 'level';
|
||||
} elsif ($OptBoundType =~ /av(era)?ge?/i) {
|
||||
$OptBoundType = 'average';
|
||||
} elsif ($OptBoundType =~ /sums?/i) {
|
||||
$OptBoundType = 'sum';
|
||||
} else {
|
||||
$OptBoundType = 'default';
|
||||
}
|
||||
}
|
||||
# parse $OptReportType
|
||||
if ($OptReportType) {
|
||||
if ($OptReportType =~ /av(era)?ge?/i) {
|
||||
$OptReportType = 'average';
|
||||
} elsif ($OptReportType =~ /sums?/i) {
|
||||
$OptReportType = 'sum';
|
||||
} else {
|
||||
$OptReportType = 'default';
|
||||
}
|
||||
}
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash reference
|
||||
my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig('newsstats.conf')};
|
||||
my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
|
||||
|
||||
### override configuration via commandline options
|
||||
my %ConfOverride;
|
||||
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
|
||||
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
### check for incompatible command line options
|
||||
# you can't mix '-t', '-b' and '-l'
|
||||
# -b/-l take preference over -t, and -b takes preference over -l
|
||||
# you can't use '-f' with '-b' or '-l'
|
||||
if ($Options{'b'} or $Options{'l'}) {
|
||||
if ($Options{'f'}) {
|
||||
# drop -f
|
||||
warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n");
|
||||
undef($Options{'f'});
|
||||
};
|
||||
if ($Options{'t'}) {
|
||||
# drop -t
|
||||
warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
|
||||
undef($Options{'t'});
|
||||
};
|
||||
if ($Options{'b'} and $Options{'l'}) {
|
||||
# drop -l
|
||||
warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
|
||||
undef($Options{'l'});
|
||||
};
|
||||
# -q/-d don't work with -b or -l
|
||||
warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
|
||||
warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
|
||||
};
|
||||
|
||||
### check output type
|
||||
# default output type to 'pretty'
|
||||
$Options{'o'} = 'pretty' if !$Options{'o'};
|
||||
# fail if more than one newsgroup is combined with 'dumpgroup' type
|
||||
die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
|
||||
# accept 'dumpgroup' only with -n
|
||||
if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
|
||||
$Options{'o'} = 'dump';
|
||||
warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
|
||||
};
|
||||
# set output type to 'pretty' for -l
|
||||
if ($Options{'l'} and $Options{'o'} ne 'pretty') {
|
||||
$Options{'o'} = 'pretty';
|
||||
warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
|
||||
};
|
||||
# set output type to 'dump' for -f
|
||||
if ($Options{'f'} and $Options{'o'} ne 'dump') {
|
||||
$Options{'o'} = 'dump';
|
||||
warn ("$MySelf: W: Output type forced to '-o dump' due to usage of '-f'.\n");
|
||||
};
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get time period
|
||||
my ($StartMonth,$EndMonth);
|
||||
# if '-a' is set, set start/end month from database
|
||||
# FIXME - it doesn't make that much sense to get first/last month from database to query it
|
||||
# with a time period that equals no time period ...
|
||||
if ($Options{'a'}) {
|
||||
undef($Options{'m'});
|
||||
undef($Options{'p'});
|
||||
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
|
||||
$DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
|
||||
($StartMonth,$EndMonth) = $DBQuery->fetchrow_array;
|
||||
} else {
|
||||
($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
|
||||
};
|
||||
# if -p or -a are set: drop -m
|
||||
undef $Options{'m'} if ($Options{'p'} or $Options{'a'});
|
||||
# if time period is more than one month: force output type to '-o pretty' or '-o dumpgroup'
|
||||
if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) {
|
||||
if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
|
||||
# just one newsgroup is defined
|
||||
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
|
||||
$Options{'o'} = 'dumpgroup';
|
||||
} elsif (!defined($Options{'f'})) {
|
||||
# more than one newsgroup - and no file output
|
||||
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
|
||||
$Options{'o'} = 'pretty';
|
||||
}
|
||||
};
|
||||
### get time period and newsgroups, prepare SQL 'WHERE' clause
|
||||
# get time period
|
||||
# and set caption for output and expression for SQL 'WHERE' clause
|
||||
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
||||
# bail out if --month is invalid
|
||||
&Bleat(2,"--month option has an invalid format - ".
|
||||
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
|
||||
# get list of newsgroups and set expression for SQL 'WHERE' clause
|
||||
# with placeholders as well as a list of newsgroup to bind to them
|
||||
my ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups)
|
||||
if $OptNewsgroups;;
|
||||
|
||||
### create report
|
||||
# get list of newsgroups (-n)
|
||||
my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
|
||||
my $Newsgroups = $Options{'n'};
|
||||
if ($Newsgroups) {
|
||||
# explode list of newsgroups for WHERE clause
|
||||
($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
|
||||
### build SQL WHERE clause (and HAVING clause, if needed)
|
||||
my ($SQLWhereClause,$SQLHavingClause);
|
||||
# $OptBoundType 'level'
|
||||
if ($OptBoundType and $OptBoundType ne 'default') {
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
|
||||
$SQLWhereNewsgroups,&SQLHierarchies($OptSums));
|
||||
$SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType,
|
||||
$LowBound,$UppBound));
|
||||
# $OptBoundType 'threshold' / 'default' or none
|
||||
} else {
|
||||
# set to dummy value (always true)
|
||||
$QueryGroupList = 1;
|
||||
};
|
||||
|
||||
# manage thresholds
|
||||
if (defined($Options{'t'})) {
|
||||
if ($Options{'i'}) {
|
||||
# -i: list groups below threshold
|
||||
$QueryThreshold .= ' postings < ?';
|
||||
} else {
|
||||
# default: list groups above threshold
|
||||
$QueryThreshold .= ' postings > ?';
|
||||
};
|
||||
# push threshold to Params
|
||||
push @Params,$Options{'t'};
|
||||
} else {
|
||||
# set to dummy value (always true)
|
||||
$QueryThreshold = 1;
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
|
||||
$SQLWhereNewsgroups,&SQLHierarchies($OptSums),
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
}
|
||||
|
||||
# construct WHERE clause
|
||||
# $QueryGroupList is "list of newsgroup" (or 1),
|
||||
# $QueryThreshold is threshold definition (or 1),
|
||||
# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
|
||||
# according to setting of -s
|
||||
my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
|
||||
### get sort order and build SQL 'ORDER BY' clause
|
||||
# default to 'newsgroup' for $OptBoundType 'level' or 'average'
|
||||
$OptGroupBy = 'newsgroup' if (!$OptGroupBy and
|
||||
$OptBoundType and $OptBoundType ne 'default');
|
||||
# force to 'month' for $OptReportType 'average' or 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
|
||||
# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
||||
# get length of longest newsgroup delivered by query for formatting purposes
|
||||
# FIXME
|
||||
my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
|
||||
### get report type and build SQL 'SELECT' query
|
||||
my $SQLSelect;
|
||||
my $SQLGroupClause = '';
|
||||
my $Precision = 0; # number of digits right of decimal point for output
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$SQLGroupClause = 'GROUP BY newsgroup';
|
||||
# change $SQLOrderClause: replace everything before 'postings'
|
||||
$SQLOrderClause =~ s/BY.+postings/BY postings/;
|
||||
if ($OptReportType eq 'average') {
|
||||
$SQLSelect = "'All months',newsgroup,AVG(postings)";
|
||||
$Precision = 2;
|
||||
# change $SQLOrderClause: replace 'postings' with 'AVG(postings)'
|
||||
$SQLOrderClause =~ s/postings/AVG(postings)/;
|
||||
} elsif ($OptReportType eq 'sum') {
|
||||
$SQLSelect = "'All months',newsgroup,SUM(postings)";
|
||||
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
|
||||
$SQLOrderClause =~ s/postings/SUM(postings)/;
|
||||
}
|
||||
} else {
|
||||
$SQLSelect = 'month,newsgroup,postings';
|
||||
};
|
||||
|
||||
my ($OrderClause,$DBQuery);
|
||||
# -b (best of / top list) defined?
|
||||
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
|
||||
# default: neither -b nor -l
|
||||
# set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
|
||||
# according to -q and -d
|
||||
$OrderClause = 'newsgroup';
|
||||
$OrderClause = 'postings' if $Options{'q'};
|
||||
$OrderClause .= ' DESC' if $Options{'d'};
|
||||
# prepare query: get number of postings per group from groups table for given months and newsgroups
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
|
||||
} elsif ($Options{'b'}) {
|
||||
# -b is set (then -l can't be!)
|
||||
# set sorting order (-i): top or flop list?
|
||||
if ($Options{'i'}) {
|
||||
$OrderClause = 'postings';
|
||||
} else {
|
||||
$OrderClause = 'postings DESC';
|
||||
};
|
||||
# set -b to 10 if < 1 (Top 10)
|
||||
$Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
|
||||
# push LIMIT to Params
|
||||
push @Params,$Options{'b'};
|
||||
# prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
|
||||
} else {
|
||||
# -l must be set now, as all other cases have been taken care of
|
||||
# which kind of level (-i): more than -l x or less than -l x?
|
||||
my ($Level);
|
||||
if ($Options{'i'}) {
|
||||
$Level = '<';
|
||||
} else {
|
||||
$Level = '>';
|
||||
};
|
||||
# prepare and execute query: get list of newsgroups meeting level condition
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
|
||||
$DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
|
||||
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
|
||||
### get length of longest newsgroup name delivered by query
|
||||
### for formatting purposes
|
||||
my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
|
||||
my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},
|
||||
$Field,$SQLWhereClause,$SQLHavingClause,
|
||||
@SQLBindNewsgroups);
|
||||
|
||||
### build and execute SQL query
|
||||
my ($DBQuery);
|
||||
# special query preparation for $OptBoundType 'level', 'average' or 'sums'
|
||||
if ($OptBoundType and $OptBoundType ne 'default') {
|
||||
# prepare and execute first query:
|
||||
# get list of newsgroups meeting level conditions
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '.
|
||||
'GROUP BY newsgroup %s',
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$SQLWhereClause,$SQLHavingClause));
|
||||
$DBQuery->execute(@SQLBindNewsgroups)
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$DBI::errstr));
|
||||
# add newsgroups to a comma-seperated list ready for IN(...) query
|
||||
my $GroupList;
|
||||
while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
|
||||
$GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
|
||||
$GroupList .= ',' if $GroupList;
|
||||
$GroupList .= "'$Newsgroup'";
|
||||
};
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
|
||||
};
|
||||
# enhance $WhereClause
|
||||
if ($GroupList) {
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,
|
||||
sprintf('newsgroup IN (%s)',$GroupList));
|
||||
} else {
|
||||
# condition cannot be satisfied;
|
||||
# force query to fail by adding '0=1'
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1');
|
||||
}
|
||||
}
|
||||
|
||||
# prepare query
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
|
||||
$SQLSelect,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$SQLWhereClause,$SQLGroupClause,$
|
||||
SQLOrderClause));
|
||||
|
||||
# execute query
|
||||
$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
|
||||
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
|
||||
$DBQuery->execute(@SQLBindNewsgroups)
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
|
||||
$DBI::errstr));
|
||||
|
||||
# output results
|
||||
# reset caption (-c) if -f is set
|
||||
undef($Options{'c'}) if $Options{'f'};
|
||||
# print caption (-c) with time period if -m or -p is set
|
||||
if ($Options{'c'}) {
|
||||
if ($Options{'m'}) {
|
||||
printf ("----- Report for %s\n",$StartMonth);
|
||||
} else {
|
||||
printf ("----- Report from %s to %s %s\n",$StartMonth,$EndMonth,$Options{'a'} ? '(all months)' : '');
|
||||
};
|
||||
};
|
||||
# print caption (-c) with newsgroup list if -n is set
|
||||
printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
|
||||
# print caption (-c) with threshold if -t is set, taking -i in account
|
||||
printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
|
||||
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
|
||||
# default: neither -b nor -l
|
||||
&OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength);
|
||||
} elsif ($Options{'b'}) {
|
||||
# -b is set (then -l can't be!)
|
||||
# we have to read in the query results ourselves, as they do not have standard layout
|
||||
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
|
||||
# we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
|
||||
print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
|
||||
};
|
||||
} else {
|
||||
# -l must be set now, as all other cases have been taken care of
|
||||
# print caption (-c) with level, taking -i in account
|
||||
printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
|
||||
# we have to read in the query results ourselves, as they do not have standard layout
|
||||
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
|
||||
# we just switch $Newsgroups and $Month for output generation
|
||||
print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
|
||||
};
|
||||
};
|
||||
### output results
|
||||
# set default to 'pretty'
|
||||
$OptFormat = 'pretty' if !$OptFormat;
|
||||
# print captions if --caption is set
|
||||
if ($OptCaptions && $OptComments) {
|
||||
# print time period with report type
|
||||
my $CaptionReportType= '(number of postings for each month)';
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$CaptionReportType= '(average number of postings for each month)'
|
||||
if $OptReportType eq 'average';
|
||||
$CaptionReportType= '(number of all postings for that time period)'
|
||||
if $OptReportType eq 'sum';
|
||||
}
|
||||
printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
# print newsgroup list if --newsgroups is set
|
||||
printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
|
||||
if $OptNewsgroups;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only month fulfilling this condition)';
|
||||
if ($OptBoundType and $OptBoundType ne 'default') {
|
||||
$CaptionBoundary= '(every single month)' if $OptBoundType eq 'level';
|
||||
$CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
|
||||
$CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
|
||||
}
|
||||
printf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
if ($LowBound or $UppBound);
|
||||
# print primary and secondary sort order
|
||||
printf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
|
||||
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
|
||||
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
|
||||
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
|
||||
}
|
||||
|
||||
# output data
|
||||
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
|
||||
$OptCheckgroupsFile ? $ValidGroups : '',
|
||||
$OptFileTemplate,$DBQuery,$MaxLength);
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
@ -253,60 +256,67 @@ groupstats - create reports on newsgroup usage
|
|||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM> | B<-p> I<YYYY-MM:YYYY-MM> | B<-a>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-f> I<filename template>] [B<-g> I<database table>]
|
||||
B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See doc/README: Perl 5.8.x itself and the following modules from CPAN:
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
Config::Auto
|
||||
|
||||
=item -
|
||||
|
||||
DBI
|
||||
|
||||
=back
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings per
|
||||
group per month) taken from result tables created by
|
||||
F<gatherstats.pl>.
|
||||
B<gatherstats.pl>.
|
||||
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another month via the B<-m> switch or a time period via the B<-p>
|
||||
switch; the latter takes preference.
|
||||
=head2 Features and options
|
||||
|
||||
=head3 Time period and newsgroups
|
||||
|
||||
The time period to act on defaults to last month; you can assign another
|
||||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
|
||||
B<groupstats> will process all newsgroups by default; you can limit
|
||||
that to only some newsgroups by supplying a list of those groups via
|
||||
B<-n> (see below). You can include hierarchy levels in the output by
|
||||
adding the B<-s> switch (see below).
|
||||
processing to only some newsgroups by supplying a list of those groups via
|
||||
B<--newsgroups> option (see below). You can include hierarchy levels in
|
||||
the output by adding the B<--sums> switch (see below). Optionally
|
||||
newsgroups not present in a checkgroups file can be excluded from output,
|
||||
sse B<--checkgroups> below.
|
||||
|
||||
Furthermore you can set a threshold via B<-t> so that only newsgroups
|
||||
with more postings per month will be included in the report. You can
|
||||
invert that by the B<-i> switch so only newsgroups with less than
|
||||
I<threshold> postings per month will be included.
|
||||
=head3 Report type
|
||||
|
||||
You can sort the output by number of postings per month instead of the
|
||||
default (alphabetical list of newsgroups) by using B<-q>; you can
|
||||
reverse the sorting order (from highest to lowest or in reversed
|
||||
alphabetical order) by using B<-d>.
|
||||
You can choose between different B<--report> types: postings per month,
|
||||
average postings per month or all postings summed up; for details, see
|
||||
below.
|
||||
|
||||
Furthermore, you can create a list of newsgroups that had consistently
|
||||
more (or less) than x postings per month during the whole report
|
||||
period by using B<-l> (together with B<i> as needed).
|
||||
=head3 Upper and lower boundaries
|
||||
|
||||
Last but not least you can create a "best of" list of the top x
|
||||
newsgroups via B<-b> (or a "worst of" list by adding B<i>).
|
||||
Furthermore you can set an upper and/or lower boundary to exclude some
|
||||
results from output via the B<--lower> and B<--upper> options,
|
||||
respectively. By default, all newsgroups with more and/or less postings
|
||||
per month will be excluded from the result set (i.e. not shown and not
|
||||
considered for average and sum reports). You can change the meaning of
|
||||
those boundaries with the B<--boundary> option. For details, please see
|
||||
below.
|
||||
|
||||
By default, B<groupstats> will dump an alphabetical list of newsgroups,
|
||||
one per line, followed by the number of postings in that group, for
|
||||
every month. You can change the output format by using B<-o> (see
|
||||
below). Captions can be added by setting the B<-c> switch.
|
||||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
newsgroup instead via the B<--groupy-by> option. Within those groups, the
|
||||
list of newsgroups (or months) is sorted alphabetically (or
|
||||
chronologically, respectively) ascending. You can change that order (and
|
||||
sort by number of postings) with the B<--order-by> option. For details and
|
||||
exceptions, please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of newsgroups and number of
|
||||
postings with the B<--format> option. Captions will be added by means of
|
||||
the B<--caption> option; all comments (and captions) can be supressed by
|
||||
using B<--nocomments>.
|
||||
|
||||
Last but not least you can redirect all output to a number of files, e.g.
|
||||
one for each month, by submitting the B<--filetemplate> option, see below.
|
||||
Captions and comments are automatically disabled in this case.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
|
@ -315,37 +325,28 @@ which should be present in the same directory via Config::Auto.
|
|||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override configuration options via the B<-g> switch.
|
||||
You can override some configuration options via the B<--groupsdb> option.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V> (version)
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information on B<yapfaq> and exit.
|
||||
Print out version and copyright information and exit.
|
||||
|
||||
=item B<-h> (help)
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-m> I<YYYY-MM> (month)
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a month in YYYY-MM format. Ignored if B<-p>
|
||||
or B<-a> is set.
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). By using the keyword I<all> instead, you can set no
|
||||
processing period to process the whole database.
|
||||
|
||||
=item B<-p> I<YYYY-MM:YYYY-MM> (period)
|
||||
|
||||
Set processing period to a time period between two month, each in
|
||||
YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if
|
||||
B<-a> is set.
|
||||
|
||||
=item B<-a> (all)
|
||||
|
||||
Set no processing period (process whole database). Overrides B<-m>
|
||||
and B<-p>.
|
||||
|
||||
=item B<-n> I<newsgroup(s)> (newsgroups)
|
||||
=item B<-n>, B<--newsgroups> I<newsgroup(s)>
|
||||
|
||||
Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
|
||||
be a single newsgroup name (de.alt.test), a newsgroup hierarchy
|
||||
|
@ -354,41 +355,7 @@ example
|
|||
|
||||
de.test:de.alt.test:de.newusers.*
|
||||
|
||||
=item B<-t> I<threshold> (threshold)
|
||||
|
||||
Only include newsgroups with more than I<threshold> postings per
|
||||
month. Can be inverted by the B<-i> switch so that only newsgroups
|
||||
with less than I<threshold> postings will be included.
|
||||
|
||||
This setting will be ignored if B<-l> or B<-b> is set.
|
||||
|
||||
=item B<-l> I<level> (level)
|
||||
|
||||
Only include newsgroups with more than I<level> postings per
|
||||
month, every month during the whole reporting period. Can be inverted
|
||||
by the B<-i> switch so that only newsgroups with less than I<level>
|
||||
postings every single month will be included. Output will be ordered
|
||||
by newsgroup name, followed by month.
|
||||
|
||||
This setting will be ignored if B<-b> is set. Overrides B<-t> and
|
||||
can't be used together with B<-q>, B<-d> or B<-f>.
|
||||
|
||||
=item B<-b> I<n> (best of)
|
||||
|
||||
Create a list of the I<n> newsgroups with the most postings over the
|
||||
whole reporting period. Can be inverted by the B<-i> switch so that a
|
||||
list of the I<n> newsgroups with the least postings over the whole
|
||||
period is generated. Output will be ordered by sum of postings.
|
||||
|
||||
Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d>
|
||||
or B<-f>. Output format is set to I<pretty> (see below).
|
||||
|
||||
=item B<-i> (invert)
|
||||
|
||||
Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
|
||||
threshold or level or generate a "bottom list" instead of a top list.
|
||||
|
||||
=item B<-s> (sum per hierarchy level)
|
||||
=item B<-s>, B<--sums|--nosums> (sum per hierarchy level)
|
||||
|
||||
Include "virtual" groups for every hierarchy level in output, for
|
||||
example:
|
||||
|
@ -399,62 +366,219 @@ example:
|
|||
|
||||
See the B<gatherstats> man page for details.
|
||||
|
||||
=item B<-o> I<output type> (output format)
|
||||
=item B<--checkgroups> I<filename>
|
||||
|
||||
Set output format. Default is I<pretty>, which will print a header for
|
||||
each new month, followed by an alphabetical list of newsgroups, each
|
||||
on a new line, followed by the number of postings in that month.
|
||||
B<groupstats> will try to align newsgroup names and posting counts.
|
||||
Usage of B<-b> will force this format; it cannot be used together with
|
||||
B<-f>.
|
||||
Restrict output to those newgroups present in a file in checkgroups format
|
||||
(one newgroup name per line; everything after the first whitespace on each
|
||||
line is ignored). All other newsgroups will be removed from output.
|
||||
|
||||
I<dump> format is used to create an easily parsable output consisting
|
||||
of an alphabetical list of newsgroups, each on a new line, followed by
|
||||
the number of postings in that month, without any alignment. This
|
||||
default format can't be used with time periods of more than one month.
|
||||
Usage of B<-f> will force this format.
|
||||
=item B<-r>, B<--report> I<default|average|sums>
|
||||
|
||||
I<list> format is like I<dump>, but will print the month in front of
|
||||
the newsgroup name.
|
||||
Choose the report type: I<default>, I<average> or I<sums>
|
||||
|
||||
I<dumpgroup> format can only be use with a group list (see B<-n>) of
|
||||
exactly one newsgroup and is like I<dump>, but will output months,
|
||||
followed by the number of postings.
|
||||
By default, B<groupstats> will report the number of postings for each
|
||||
newsgroup in each month. But it can also report the average number of
|
||||
postings per group for all months or the total sum of postings per group
|
||||
for all months.
|
||||
|
||||
=item B<-c> (captions)
|
||||
For report types I<average> and I<sums>, the B<group-by> option has no
|
||||
meaning and will be silently ignored (see below).
|
||||
|
||||
Add captions to output (reporting period, newsgroups list, threshold
|
||||
and so on).
|
||||
=item B<-l>, B<--lower> I<lower boundary>
|
||||
|
||||
This setting will be ignored if B<-f> is set.
|
||||
Set the lower boundary. See B<--boundary> below.
|
||||
|
||||
=item B<-q> (quantity of postings)
|
||||
=item B<-l>, B<--upper> I<upper boundary>
|
||||
|
||||
Sort by number of postings instead of by newsgroup names.
|
||||
Set the upper boundary. See B<--boundary> below.
|
||||
|
||||
Cannot be used with B<-l> or B<-b>.
|
||||
=item B<-b>, B<--boundary> I<boundary type>
|
||||
|
||||
=item B<-d> (descending)
|
||||
Set the boundary type to one of I<default>, I<level>, I<average> or
|
||||
I<sums>.
|
||||
|
||||
Change sort order to descending.
|
||||
By default, all newsgroups with more postings per month than the upper
|
||||
boundary and/or less postings per month than the lower boundary will be
|
||||
excluded from further processing. For the default report that means each
|
||||
month only newsgroups with a number of postings between the boundaries
|
||||
will be displayed. For the other report types, newsgroups with a number of
|
||||
postings exceeding the boundaries in all (!) months will not be
|
||||
considered.
|
||||
|
||||
Cannot be used with B<-l> or B<-b>.
|
||||
For example, lets take a list of newsgroups like this:
|
||||
|
||||
=item B<-f> I<filename template> (output file)
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.misc 6
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.misc 8
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
----- 2012-03:
|
||||
de.comp.datenbanken.misc 24
|
||||
de.comp.datenbanken.ms-access 83
|
||||
de.comp.datenbanken.mysql 36
|
||||
|
||||
Save output to file instead of dumping it to STDOUT. B<groupstats>
|
||||
will create one file for each month, with filenames composed by
|
||||
adding year and month to the I<filename template>, for example
|
||||
with B<-f> I<stats>:
|
||||
With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>,
|
||||
you'll get the following result:
|
||||
|
||||
stats-2010-01
|
||||
stats-2010-02
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
de.comp.datenbanken.mysql 124
|
||||
|
||||
de.comp.datenbanken.misc has not been considered even though it has 38
|
||||
postings in total, because it has less than 25 postings in every single
|
||||
month. If you want to list all newsgroups with more than 25 postings U<in
|
||||
total>, you'll have to set the boundary type to I<sum>, see below.
|
||||
|
||||
A boundary type of I<level> will show only those newsgroups - at all -
|
||||
that satisfy the boundaries in each and every single month. With the above
|
||||
list of newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
|
||||
you'll get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
|
||||
de.comp.datenbanken.mysql has not been considered because it had less than
|
||||
25 postings in 2012-02.
|
||||
|
||||
You can use that to get a list of newsgroups that have more (or less) then
|
||||
x postings during the whole reporting period.
|
||||
|
||||
A boundary type of I<average> will show only those newsgroups - at all -that
|
||||
satisfy the boundaries on average. With the above list of newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
|
||||
you'll get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
de.comp.datenbanken.mysql 145
|
||||
|
||||
The average number of postings in the three groups is:
|
||||
|
||||
de.comp.datenbanken.misc 12.67
|
||||
de.comp.datenbanken.ms-access 97.67
|
||||
de.comp.datenbanken.mysql 48.33
|
||||
|
||||
Last but not least, a boundary type of I<sums> will show only those
|
||||
newsgroups - at all - that satisfy the boundaries with the total sum of
|
||||
all postings during the reporting period. With the above list of
|
||||
newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
|
||||
you'll finally get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.misc 38
|
||||
de.comp.datenbanken.ms-access 293
|
||||
de.comp.datenbanken.mysql 145
|
||||
|
||||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
|
||||
|
||||
By default, all results are grouped by month, sorted chronologically in
|
||||
ascending order, like this:
|
||||
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
|
||||
The results can be grouped by newsgroups instead via
|
||||
B<--group-by> I<newsgroup>:
|
||||
|
||||
----- de.comp.datenbanken.ms-access:
|
||||
2012-01 84
|
||||
2012-02 126
|
||||
----- de.comp.datenbanken.mysql:
|
||||
2012-01 88
|
||||
2012-02 21
|
||||
|
||||
By appending I<-desc> to the group-by option parameter, you can reverse
|
||||
the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
|
||||
Average and sums reports (see above) will always be grouped by months;
|
||||
this option will therefore be ignored.
|
||||
|
||||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single newsgroup, see above), the
|
||||
report will be sorted by newsgroup names in ascending alphabetical order
|
||||
by default. You can change the sort order to descending or sort by number
|
||||
of postings instead.
|
||||
|
||||
=item B<-f>, B<--format> I<pretty|list|dump>
|
||||
|
||||
Select the output format, I<pretty> being the default:
|
||||
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
|
||||
I<list> format looks like this:
|
||||
|
||||
2012-01 de.comp.datenbanken.ms-access 84
|
||||
2012-01 de.comp.datenbanken.mysql 88
|
||||
2012-02 de.comp.datenbanken.ms-access 126
|
||||
2012-02 de.comp.datenbanken.mysql 21
|
||||
|
||||
And I<dump> format looks like this:
|
||||
|
||||
# 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
de.comp.datenbanken.mysql 88
|
||||
# 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
de.comp.datenbanken.mysql 21
|
||||
|
||||
You can remove the comments by using B<--nocomments>, see below.
|
||||
|
||||
=item B<-c>, B<--captions|--nocaptions>
|
||||
|
||||
Add captions to output, like this:
|
||||
|
||||
----- Report for 2012-01 to 2012-02 (number of postings for each month)
|
||||
----- Newsgroups: de.comp.datenbanken.*
|
||||
----- Threshold: 10 => x <= 20 (on average)
|
||||
----- Grouped by Newsgroups (ascending), sorted by number of postings descending
|
||||
|
||||
False by default.
|
||||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by default.
|
||||
|
||||
Use I<--nocomments> to suppress anything except newsgroup names/months and
|
||||
numbers of postings. This is enforced when using B<--filetemplate>, see below.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
|
||||
create one file for each month (or each newsgroup, accordant to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or newsgroup names) to the I<filename template>, for
|
||||
example with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
... and so on
|
||||
|
||||
This setting will be ignored if B<-l> or B<-b> is set. Output format
|
||||
is set to I<dump> (see above).
|
||||
B<--nocomments> is enforced, see above.
|
||||
|
||||
=item B<-g> I<table> (postings per group table)
|
||||
=item B<--groupsdb> I<database table>
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
|
@ -462,36 +586,40 @@ Override I<DBTableGrps> from F<newsstats.conf>.
|
|||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See doc/INSTALL.
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Show number of postings per group for lasth month in I<dump> format:
|
||||
Show number of postings per group for lasth month in I<pretty> format:
|
||||
|
||||
groupstats
|
||||
|
||||
Show that report for January of 2010 and de.alt.* plus de.test,
|
||||
including display of hierarchy levels:
|
||||
|
||||
groupstats -m 2010-01 -n de.alt.*:de.test -s
|
||||
groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums
|
||||
|
||||
Show that report for the year of 2010 in I<pretty> format:
|
||||
|
||||
groupstats -p 2010-01:2010-12 -o pretty
|
||||
|
||||
Only show newsgroups with less than 30 postings last month, ordered
|
||||
Only show newsgroups with 30 postings or less last month, ordered
|
||||
by number of postings, descending, in I<pretty> format:
|
||||
|
||||
groupstats -iqdt 30 -o pretty
|
||||
groupstats --upper 30 --order-by postings-desc
|
||||
|
||||
Show top 10 for the first half-year of of 2010 in I<pretty> format:
|
||||
Show the total of all postings for the year of 2010 for all groups that
|
||||
had 30 postings or less in every single month in that year, ordered by
|
||||
number of postings in descending order:
|
||||
|
||||
groupstats -p 2010-01:2010-06 -b 10 -o pretty
|
||||
groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
|
||||
|
||||
Report all groups that had less than 30 postings every singele month
|
||||
in the year of 2010 (I<pretty> format is forced)
|
||||
The same for the average number of postings in the year of 2010:
|
||||
|
||||
groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc
|
||||
|
||||
List number of postings per group for eacht month of 2010 and redirect
|
||||
output to one file for each month, namend stats-2010-01 and so on, in
|
||||
machine-readable form (without formatting):
|
||||
|
||||
groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
|
||||
|
||||
groupstats -p 2010-01:2010-12 -il 30
|
||||
|
||||
=head1 FILES
|
||||
|
||||
|
@ -507,7 +635,7 @@ Library functions for the NewsStats package.
|
|||
|
||||
=item F<newsstats.conf>
|
||||
|
||||
Runtime configuration file for B<yapfaq>.
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
|
@ -522,11 +650,11 @@ bug tracker at L<http://bugs.th-h.de/>!
|
|||
|
||||
=item -
|
||||
|
||||
doc/README
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
doc/INSTALL
|
||||
l>doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
|
@ -542,7 +670,7 @@ Thomas Hochstein <thh@inter.net>
|
|||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
|
Loading…
Reference in a new issue