Merge branch 'rewrite' into next

This commit is contained in:
Thomas Hochstein 2013-08-11 21:56:18 +02:00
commit a915469e0c
6 changed files with 1041 additions and 639 deletions

View file

@ -2,7 +2,7 @@
# #
# Library functions for the NewsStats package. # Library functions for the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
# #
# This module can be redistributed and/or modified under the same terms under # This module can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
@ -16,18 +16,20 @@ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
require Exporter; require Exporter;
@ISA = qw(Exporter); @ISA = qw(Exporter);
@EXPORT = qw( @EXPORT = qw(
$MySelf
$MyVersion $MyVersion
$PackageVersion $PackageVersion
ReadOptions $FullPath
$HomePath
ShowVersion
ShowPOD
ReadConfig ReadConfig
OverrideConfig OverrideConfig
InitDB InitDB
Bleat
); );
@EXPORT_OK = qw( @EXPORT_OK = qw(
GetTimePeriod GetTimePeriod
LastMonth LastMonth
CheckMonth
SplitPeriod SplitPeriod
ListMonth ListMonth
ListNewsgroups ListNewsgroups
@ -35,58 +37,47 @@ require Exporter;
OutputData OutputData
FormatOutput FormatOutput
SQLHierarchies SQLHierarchies
SQLSortOrder
SQLGroupList SQLGroupList
SQLSetBounds
SQLBuildClause
GetMaxLength GetMaxLength
); );
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
ListMonth)],
Output => [qw(OutputData FormatOutput)], Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]); SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
SQLSetBounds SQLBuildClause GetMaxLength)]);
$VERSION = '0.01'; $VERSION = '0.01';
our $PackageVersion = '0.01'; our $PackageVersion = '0.01';
use Data::Dumper; use Data::Dumper;
use File::Basename; use File::Basename;
use Getopt::Std;
use Config::Auto; use Config::Auto;
use DBI; use DBI;
#####-------------------------------- Vars --------------------------------##### #####-------------------------------- Vars --------------------------------#####
our $MySelf = fileparse($0, '.pl'); # trim the path
our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; our $FullPath = $0;
our $HomePath = dirname($0);
$0 =~ s%.*/%%;
# set version string
our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
#####------------------------------- Basics -------------------------------##### #####------------------------------- Basics -------------------------------#####
################################################################################
sub ReadOptions {
################################################################################
### read commandline options and act on standard options -h and -V
### IN : $Params: list of legal commandline paramaters (without -h and -V)
### OUT: a hash containing the commandline options
$Getopt::Std::STANDARD_HELP_VERSION = 1;
my ($Params) = @_;
my %Options;
getopts('Vh'.$Params, \%Options);
# -V: display version
&ShowVersion if ($Options{'V'});
# -h: feed myself to perldoc
&ShowPOD if ($Options{'h'});
return %Options;
};
################################################################################ ################################################################################
################################################################################ ################################################################################
sub ShowVersion { sub ShowVersion {
################################################################################ ################################################################################
### display version and exit ### display version and exit
print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n"; print "NewsStats v$PackageVersion\n$MyVersion\n";
print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
print "This program is free software; you may redistribute it ".
"and/or modify it under the same terms as Perl itself.\n";
exit(100); exit(100);
}; };
################################################################################ ################################################################################
@ -95,7 +86,7 @@ sub ShowVersion {
sub ShowPOD { sub ShowPOD {
################################################################################ ################################################################################
### feed myself to perldoc and exit ### feed myself to perldoc and exit
exec('perldoc', $0); exec('perldoc', $FullPath);
exit(100); exit(100);
}; };
################################################################################ ################################################################################
@ -112,7 +103,7 @@ sub ReadConfig {
################################################################################ ################################################################################
################################################################################ ################################################################################
sub OverrideConfig { sub OverrideConfig {
################################################################################ ################################################################################
### override configuration values ### override configuration values
### IN : $ConfigR : reference to configuration hash ### IN : $ConfigR : reference to configuration hash
@ -120,7 +111,8 @@ sub OverrideConfig {
my ($ConfigR,$OverrideR) = @_; my ($ConfigR,$OverrideR) = @_;
my %Override = %$OverrideR; my %Override = %$OverrideR;
# Config hash empty? # Config hash empty?
warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); &Bleat(1,"Empty configuration hash passed to OverrideConfig()")
if ( keys %$ConfigR < 1);
# return if no overrides # return if no overrides
return if (keys %Override < 1 or keys %$ConfigR < 1); return if (keys %Override < 1 or keys %$ConfigR < 1);
foreach my $Key (keys %Override) { foreach my $Key (keys %Override) {
@ -138,15 +130,35 @@ sub InitDB {
### OUT: DBHandle ### OUT: DBHandle
my ($ConfigR,$Die) = @_; my ($ConfigR,$Die) = @_;
my %Conf = %$ConfigR; my %Conf = %$ConfigR;
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 }); my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
$Conf{'DBDriver'},$Conf{'DBDatabase'},
$Conf{'DBHost'}), $Conf{'DBUser'},
$Conf{'DBPw'}, { PrintError => 0 });
if (!$DBHandle) { if (!$DBHandle) {
die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die); &Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
warn("$MySelf: W: $DBI::errstr\n"); &Bleat(1,$DBI::errstr);
}; };
return $DBHandle; return $DBHandle;
}; };
################################################################################ ################################################################################
################################################################################
sub Bleat {
################################################################################
### print warning or error messages and terminate in case of error
### IN : $Level : 1 = warning, 2 = error
### $Message: warning or error message
my ($Level,$Message) = @_;
if ($Level == 1) {
warn "$0 W: $Message\n"
} elsif ($Level == 2) {
die "$0 E: $Message\n"
} else {
print "$0: $Message\n"
}
};
################################################################################
#####------------------------------ GetStats ------------------------------##### #####------------------------------ GetStats ------------------------------#####
################################################################################ ################################################################################
@ -172,7 +184,7 @@ sub ListNewsgroups {
next if($TLH and !/^$TLH/); next if($TLH and !/^$TLH/);
# don't count invalid newsgroups # don't count invalid newsgroups
if(%ValidGroups and !defined($ValidGroups{$_})) { if(%ValidGroups and !defined($ValidGroups{$_})) {
warn (sprintf("DROPPED: %s\n",$_)); &Bleat(1,sprintf("DROPPED: %s",$_));
next; next;
} }
# add original newsgroup to %Newsgroups # add original newsgroup to %Newsgroups
@ -216,7 +228,7 @@ sub ReadGroupList {
### OUT: \%ValidGroups: hash containing all valid newsgroups ### OUT: \%ValidGroups: hash containing all valid newsgroups
my ($Filename) = @_; my ($Filename) = @_;
my %ValidGroups; my %ValidGroups;
open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
while (<$LIST>) { while (<$LIST>) {
s/^(\S+).*$/$1/; s/^(\S+).*$/$1/;
chomp; chomp;
@ -233,29 +245,42 @@ sub ReadGroupList {
################################################################################ ################################################################################
sub GetTimePeriod { sub GetTimePeriod {
################################################################################ ################################################################################
### get a time period to act on, in order of preference: by default the ### get a time period to act on from --month option;
### last month; or a month submitted by -m YYYY-MM; or a time period submitted ### if empty, default to last month
### by -p YYYY-MM:YYYY-MM ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all'
### IN : $Month,$Period: contents of -m and -p ### OUT: $Verbal,$SQL: verbal description and WHERE-clause
### OUT: $StartMonth, $EndMonth (identical if period is just one month) ### of the chosen time period
my ($Month,$Period) = @_; my ($Month) = @_;
# exit if -m is set and not like YYYY-MM # define result variables
die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); my ($Verbal, $SQL);
# warn if -m and -p is set # define a regular expression for a month
warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period); my $REMonth = '\d{4}-\d{2}';
# default: set -m to last month
$Month = &LastMonth if (!defined($Month) and !defined($Period)); # default to last month if option is not set
# set $StartMonth, $EndMonth if(!$Month) {
my ($StartMonth, $EndMonth); $Month = &LastMonth;
if ($Period) { }
# -p: get date range
($StartMonth, $EndMonth) = &SplitPeriod($Period); # check for valid input
die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth); if ($Month =~ /^$REMonth$/) {
# single month (YYYY-MM)
($Month) = &CheckMonth($Month);
$Verbal = $Month;
$SQL = sprintf("month = '%s'",$Month);
} elsif ($Month =~ /^$REMonth:$REMonth$/) {
# time period (YYYY-MM:YYYY-MM)
$Verbal = sprintf('%s to %s',&SplitPeriod($Month));
$SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month));
} elsif ($Month =~ /^all$/i) {
# special case: ALL
$Verbal = 'all time';
$SQL = '';
} else { } else {
# set $StartMonth = $EndMonth = $Month if -p is not set # invalid input
$StartMonth = $EndMonth = $Month; return (undef,undef);
}; }
return ($StartMonth, $EndMonth);
return ($Verbal,$SQL);
}; };
################################################################################ ################################################################################
@ -278,12 +303,23 @@ sub LastMonth {
################################################################################ ################################################################################
sub CheckMonth { sub CheckMonth {
################################################################################ ################################################################################
### check if input is a valid month in YYYY-MM form ### check if input (in YYYY-MM form) is valid with MM between 01 and 12;
### IN : $Month: month ### otherwise, fix it
### OUT: TRUE / FALSE ### IN : @Month: array of month
my ($Month) = @_; ### OUT: @Month: a valid month
return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/); my (@Month) = @_;
return 1; foreach my $Month (@Month) {
my ($OldMonth) = $Month;
my ($CalMonth) = substr ($Month, -2);
if ($CalMonth < 1 or $CalMonth > 12) {
$CalMonth = '12' if $CalMonth > 12;
$CalMonth = '01' if $CalMonth < 1;
substr($Month, -2) = $CalMonth;
&Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ".
"and '12'), set to '%s'.",$OldMonth,$Month));
}
}
return @Month;
}; };
################################################################################ ################################################################################
@ -291,10 +327,10 @@ sub SplitPeriod {
################################################################################ ################################################################################
### split a time period denoted by YYYY-MM:YYYY-MM into start and end month ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
### IN : $Period: time period ### IN : $Period: time period
### OUT: $StartMonth, Â$EndMonth ### OUT: $StartMonth, $EndMonth
my ($Period) = @_; my ($Period) = @_;
return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/;
my ($StartMonth, $EndMonth) = split /:/, $Period; my ($StartMonth, $EndMonth) = split /:/, $Period;
($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth);
# switch parameters as necessary # switch parameters as necessary
if ($EndMonth gt $StartMonth) { if ($EndMonth gt $StartMonth) {
return ($StartMonth, $EndMonth); return ($StartMonth, $EndMonth);
@ -307,12 +343,13 @@ sub SplitPeriod {
sub ListMonth { sub ListMonth {
################################################################################ ################################################################################
### return a list of months (YYYY-MM) between start and end month ### return a list of months (YYYY-MM) between start and end month
### IN : $StartMonth, $EndMonth ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
### OUT: @Months: array containing all months from $StartMonth to $EndMonth ### OUT: @Months: array containing all months from $MonthExpression enumerated
my ($StartMonth, $EndMonth) = @_; my ($MonthExpression )= @_;
return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/); # return if single month
# return if $StartMonth = $EndMonth return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
return ($StartMonth) if ($StartMonth eq $EndMonth); # parse $MonthExpression
my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
# set $Year, $Month from $StartMonth # set $Year, $Month from $StartMonth
my ($Year, $Month) = split /-/, $StartMonth; my ($Year, $Month) = split /-/, $StartMonth;
# define @Months # define @Months
@ -335,64 +372,96 @@ sub ListMonth {
sub OutputData { sub OutputData {
################################################################################ ################################################################################
### read database query results from DBHandle and print results with formatting ### read database query results from DBHandle and print results with formatting
### IN : $Format : format specifier ### IN : $Format : format specifier
### $FileName: file name template (-f): filename-YYYY-MM ### $Comments : print or suppress all comments for machine-readable output
### $DBQuery : database query handle with executed query, ### $GroupBy : primary sorting order (month or key)
### $Precision: number of digits right of decimal point (0 or 2)
### $ValidKeys: reference to a hash containing all valid keys
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
### $DBQuery : database query handle with executed query,
### containing $Month, $Key, $Value ### containing $Month, $Key, $Value
### $PadGroup: padding length for newsgroups field (optional) for 'pretty' ### $PadGroup : padding length for key field (optional) for 'pretty'
my ($Format, $FileName, $DBQuery, $PadGroup) = @_; my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
my ($Handle, $OUT); $DBQuery, $PadGroup) = @_;
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
my ($FileName, $Handle, $OUT);
our $LastIteration; our $LastIteration;
# define output types
my %LegalOutput;
@LegalOutput{('dump',,'list','pretty')} = ();
# bail out if format is unknown
&Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
# don't display invalid keys
if(%ValidKeys and !defined($ValidKeys{$Key})) {
# FIXME
# &Bleat(1,sprintf("DROPPED: %s",$Key));
next;
};
# care for correct sorting order and abstract from month and keys:
# $Caption will be $Month or $Key, according to sorting order,
# and $Key will be $Key or $Month, respectively
my $Caption;
if ($GroupBy eq 'key') {
$Caption = $Key;
$Key = $Month;
} else {
$Caption = $Month;
}
# set output file handle # set output file handle
if (!$FileName) { if (!$FileTempl) {
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
} elsif (!defined($LastIteration) or $LastIteration ne $Month) { } elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
close $OUT if ($LastIteration); close $OUT if ($LastIteration);
open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month); # safeguards for filename creation:
# replace potential problem characters with '_'
$FileName = sprintf('%s-%s',$FileTempl,$Caption);
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
open ($OUT,">$FileName")
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
$FileName));
$Handle = $OUT; $Handle = $OUT;
}; };
print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
$LastIteration = $Month; $Precision, $PadGroup);
$LastIteration = $Caption;
}; };
close $OUT if ($FileName); close $OUT if ($FileTempl);
}; };
################################################################################ ################################################################################
sub FormatOutput { sub FormatOutput {
################################################################################ ################################################################################
### format information for output according to format specifier ### format information for output according to format specifier
### IN : $Format : format specifier ### IN : $Format : format specifier
### $Month : month (as YYYY-MM) ### $Comments : print or suppress all comments for machine-readable output
### $Key : newsgroup, client, ... ### $Caption : month (as YYYY-MM) or $Key, according to sorting order
### $Value : number of postings with that attribute ### $Key : newsgroup, client, ... or $Month, as above
### $PadGroup: padding length for key field (optional) for 'pretty' ### $Value : number of postings with that attribute
### $Precision: number of digits right of decimal point (0 or 2)
### $PadGroup : padding length for key field (optional) for 'pretty'
### OUT: $Output: formatted output ### OUT: $Output: formatted output
my ($Format, $Month, $Key, $Value, $PadGroup) = @_; my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_;
# define output types
my %LegalOutput;
@LegalOutput{('dump','dumpgroup','list','pretty')} = ();
# bail out if format is unknown
die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format});
my ($Output); my ($Output);
# keep last month in mind # keep last caption in mind
our ($LastIteration); our ($LastIteration);
# create one line of output
if ($Format eq 'dump') { if ($Format eq 'dump') {
# output as dump (ng nnnnn) # output as dump (key value)
$Output = sprintf ("%s %u\n",$Key,$Value); $Output = sprintf ("# %s:\n",$Caption)
} elsif ($Format eq 'dumpgroup') { if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
# output as dump (YYYY-NN: nnnnn) $Output .= sprintf ("%s %u\n",$Key,$Value);
$Output = sprintf ("%s: %5u\n",$Month,$Value);
} elsif ($Format eq 'list') { } elsif ($Format eq 'list') {
# output as list (YYYY-NN: ng nnnnn) # output as list (caption key value)
$Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value); $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
} elsif ($Format eq 'pretty') { } elsif ($Format eq 'pretty') {
# output as table # output as a table
$Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration); $Output = sprintf ("# ----- %s:\n",$Caption)
$LastIteration = $Month; if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
$Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value); $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) :
"%s %.*f\n",$Key,$Precision,$Value);
}; };
return $Output; return $Output;
}; };
@ -408,46 +477,226 @@ sub SQLHierarchies {
### IN : $ShowHierarchies: boolean value ### IN : $ShowHierarchies: boolean value
### OUT: SQL code ### OUT: SQL code
my ($ShowHierarchies) = @_; my ($ShowHierarchies) = @_;
return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'"; return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
}; };
################################################################################ ################################################################################
sub GetMaxLength { sub GetMaxLength {
################################################################################ ################################################################################
### get length of longest field in future query result ### get length of longest field in future query result
### IN : $DBHandle : database handel ### IN : $DBHandle : database handel
### $Table : table to query ### $Table : table to query
### $Field : field to check ### $Field : field to check
### $WhereClause: WHERE clause ### $WhereClause : WHERE clause
### @BindVars : bind variables for WHERE clause ### $HavingClause: HAVING clause
### @BindVars : bind variables for WHERE clause
### OUT: $Length: length of longest instnace of $Field ### OUT: $Length: length of longest instnace of $Field
my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_; my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_;
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause)); my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ".
$DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table); "FROM %s %s %s",$Field,$Table,
$WhereClause,$HavingClause ?
'GROUP BY newsgroup' . $HavingClause .
' ORDER BY LENGTH(newsgroup) '.
'DESC LIMIT 1': ''));
$DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
"for '%s' from table '%s': ".
"$DBI::errstr",$Field,$Table));
my ($Length) = $DBQuery->fetchrow_array; my ($Length) = $DBQuery->fetchrow_array;
return $Length; return $Length;
}; };
################################################################################
sub SQLSortOrder {
################################################################################
### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
### $OptOrderBy (secondary sorting), both ascending or descending;
### descending sorting order is done by adding '-desc'
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
### $OrderBy: secondary sort by month/newsgroups (default)
### or number of 'postings'
### OUT: a SQL ORDER BY clause
my ($GroupBy,$OrderBy) = @_;
my ($GroupSort,$OrderSort) = ('','');
# $GroupBy (primary sorting)
if (!$GroupBy) {
$GroupBy = 'month';
} else {
($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
if ($GroupBy =~ /group/i) {
$GroupBy = 'newsgroup';
} else {
$GroupBy = 'month';
}
}
my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
# $OrderBy (secondary sorting)
if (!$OrderBy) {
$OrderBy = $Secondary;
} else {
($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
if ($OrderBy =~ /posting/i) {
$OrderBy = "postings $OrderSort, $Secondary";
} else {
$OrderBy = "$Secondary $OrderSort";
}
}
return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
};
################################################################################
sub SQLParseOrder {
################################################################################
### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
### IN : $OrderOption: order option (see above)
### OUT: parameter to sort by,
### sort order ('DESC' or nothing, meaning 'ASC')
my ($OrderOption) = @_;
my $SortOrder = '';
if ($OrderOption =~ s/-?desc$//i) {
$SortOrder = 'DESC';
} else {
$OrderOption =~ s/-?asc$//i
}
return ($OrderOption,$SortOrder);
};
################################################################################ ################################################################################
sub SQLGroupList { sub SQLGroupList {
################################################################################ ################################################################################
### explode list of newsgroups separated by : (with wildcards) to a SQL WHERE ### explode list of newsgroups separated by : (with wildcards)
### clause ### to a SQL 'WHERE' expression
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
### OUT: SQL code, list of newsgroups ### OUT: SQL code to become part of a 'WHERE' clause,
### list of newsgroups for SQL bindings
my ($Newsgroups) = @_; my ($Newsgroups) = @_;
# substitute '*' wildcard with SQL wildcard character '%'
$Newsgroups =~ s/\*/%/g; $Newsgroups =~ s/\*/%/g;
return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/; # just one newsgroup?
return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
# list of newsgroups separated by ':'
my $SQL = '('; my $SQL = '(';
my @GroupList = split /:/, $Newsgroups; my @GroupList = split /:/, $Newsgroups;
foreach (@GroupList) { foreach (@GroupList) {
$SQL .= ' OR ' if $SQL gt '('; $SQL .= ' OR ' if $SQL gt '(';
$SQL .= 'newsgroup LIKE ?'; $SQL .= SQLGroupWildcard($_);
}; };
$SQL .= ')'; $SQL .= ')';
return ($SQL,@GroupList); return ($SQL,@GroupList);
}; };
################################################################################
sub SQLGroupWildcard {
################################################################################
### build a valid SQL 'WHERE' expression with or without wildcards
### IN : $Newsgroup: newsgroup expression, probably with wildcard
### (group.name or group.name.%)
### OUT: SQL code to become part of a 'WHERE' clause
my ($Newsgroup) = @_;
# FIXME: check for validity
if ($Newsgroup !~ /%/) {
return 'newsgroup = ?';
} else {
return 'newsgroup LIKE ?';
}
};
################################################################################
sub SQLSetBounds {
################################################################################
### set upper and/or lower boundary (number of postings)
### IN : $Type: 'level', 'average', 'sum' or 'default'
### $LowBound,$UppBound: lower/upper boundary, respectively
### OUT: SQL code to become part of a WHERE or HAVING clause
my ($Type,$LowBound,$UppBound) = @_;
($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
if($LowBound and $UppBound and $LowBound > $UppBound) {
&Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
"$UppBound, exchanging boundaries.");
($LowBound,$UppBound) = ($UppBound,$LowBound);
}
# default to 'default'
my $WhereHavingFunction = 'postings';
# set $LowBound to SQL statement:
# 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
if ($Type eq 'level') {
$WhereHavingFunction = 'MIN(postings)'
} elsif ($Type eq 'average') {
$WhereHavingFunction = 'AVG(postings)'
} elsif ($Type eq 'sum') {
$WhereHavingFunction = 'SUM(postings)'
}
$LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
# set $LowBound to SQL statement:
# 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
if ($Type eq 'level') {
$WhereHavingFunction = 'MAX(postings)'
} elsif ($Type eq 'average') {
$WhereHavingFunction = 'AVG(postings)'
} elsif ($Type eq 'sum') {
$WhereHavingFunction = 'SUM(postings)'
}
$UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
return ($LowBound,$UppBound);
};
################################################################################
sub SQLCheckNumber {
################################################################################
### check if input is a valid positive integer; otherwise, make it one
### IN : @Numbers: array of parameters
### OUT: @Numbers: a valid positive integer
my (@Numbers) = @_;
foreach my $Number (@Numbers) {
if ($Number and $Number < 0) {
&Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
$Number = -$Number;
}
$Number = '' if ($Number and $Number !~ /^\d+$/);
}
return @Numbers;
};
################################################################################
sub SQLBuildClause {
################################################################################
### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
### from multiple expressions which *may* be empty
### IN : $Type: 'where', 'having', 'group' or 'order'
### @Expressions: array of expressions
### OUT: $SQLClause: a SQL clause
my ($Type,@Expressions) = @_;
my ($SQLClause,$Separator,$Statement);
# set separator ('AND' or ',')
if ($Type eq 'where' or $Type eq 'having') {
$Separator = 'AND';
} else {
$Separator = ',';
}
# set statement
if ($Type eq 'where') {
$Statement = 'WHERE';
} elsif ($Type eq 'order') {
$Statement = 'ORDER BY';
} elsif ($Type eq 'having') {
$Statement = 'HAVING';
} else {
$Statement = 'GROUP BY';
}
# build query from expressions with separators
foreach my $Expression (@Expressions) {
if ($Expression) {
$SQLClause .= " $Separator " if ($SQLClause);
$SQLClause .= $Expression;
}
}
# add statement in front if not already present
$SQLClause = " $Statement " . $SQLClause
if ($SQLClause and $SQLClause !~ /$Statement/);
return $SQLClause;
};
#####------------------------------- done ---------------------------------##### #####------------------------------- done ---------------------------------#####
1; 1;

View file

@ -29,6 +29,9 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
- Add other reports - Add other reports
NewsStats should include some other kinds of reports (stats on used clients, NewsStats should include some other kinds of reports (stats on used clients,
on postings hosts/servers, ...) on postings hosts/servers, ...)
- Add tools for database management
NewsStats should offer tools e.g. to inject postings into the 'raw' database,
or to split databases.
* Individual improvements * Individual improvements
+ NewsStats.pm + NewsStats.pm
@ -43,6 +46,9 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
- General tests and optimisations - General tests and optimisations
+ feedlog.pl + feedlog.pl
- Gracefully handle interruptions to database connection, at least log
lost postings (Bug #30).
- Buffer output that cannot be written to database (Bug #31).
- Add / enhance / test error handling - Add / enhance / test error handling
- General tests and optimisations - General tests and optimisations
@ -54,13 +60,6 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
- General tests and optimisations - General tests and optimisations
+ groupstats.pl + groupstats.pl
- output: fix formatting - GetMaxLength does not work for special queries
like -l or -b
- beautify output (formats / captions)
- -a: improve code - it doesn't make that much sense to get first/last
month from database just to query that same database with a time period
that equals no time period ... this part of the WHERE clause should be
dropped instead
- better modularisation (code reuse for other reports!) - better modularisation (code reuse for other reports!)
- Add / enhance / test error handling - Add / enhance / test error handling
- General tests and optimisations - General tests and optimisations

View file

@ -7,7 +7,7 @@
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
@ -25,18 +25,24 @@ use Sys::Syslog qw(:standard :macros);
use Date::Format; use Date::Format;
use DBI; use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('qd'); my ($OptDebug,$OptQuiet);
GetOptions ('d|debug!' => \$OptDebug,
'q|test!' => \$OptQuiet,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
### read configuration ### read configuration
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### init syslog ### init syslog
openlog($MySelf, 'nofatal,pid', LOG_NEWS); openlog($0, 'nofatal,pid', LOG_NEWS);
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'}; syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
### init database ### init database
my $DBHandle = InitDB(\%Conf,0); my $DBHandle = InitDB(\%Conf,0);
@ -44,7 +50,11 @@ if (!$DBHandle) {
syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
while (1) {}; # go into endless loop to suppress further errors and respawning while (1) {}; # go into endless loop to suppress further errors and respawning
}; };
my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
timestamp,token,size,peer,path,
newsgroups,headers)
VALUES (?,?,?,?,?,?,?,?,?,?)",
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
### main loop ### main loop
while (<>) { while (<>) {
@ -72,17 +82,21 @@ while (<>) {
my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
# write to database # write to database
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
$Path, $Newsgroups, $Headers)) {
syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); syslog(LOG_ERR, 'Database error: %s', $DBI::errstr);
}; };
$DBQuery->finish; $DBQuery->finish;
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if $Options{'d'}; warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
"Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
$Newsgroups, $Headers) if $OptDebug;
} }
### close handles ### close handles
$DBHandle->disconnect; $DBHandle->disconnect;
syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'}; syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
closelog(); closelog();
__END__ __END__
@ -99,23 +113,7 @@ B<feedlog> [B<-Vhdq>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See L<doc/README>.
=over 2
=item -
Config::Auto
=item -
Date::Format
=item -
DBI
=back
=head1 DESCRIPTION =head1 DESCRIPTION
@ -131,29 +129,29 @@ terminating would only result in a rapid respawn.
=head2 Configuration =head2 Configuration
F<feedlog.pl> will read its configuration from F<newsstats.conf> which B<feedlog> will read its configuration from F<newsstats.conf> which
should be present in the same directory via Config::Auto. should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V>, B<--version>
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information and exit.
=item B<-h> (help) =item B<-h>, B<--help>
Print this man page and exit. Print this man page and exit.
=item B<-d> (debug) =item B<-d>, B<--debug>
Output debugging information to STDERR while parsing STDIN. You'll Output debugging information to STDERR while parsing STDIN. You'll
find that information most probably in your B<INN> F<errlog> file. find that information most probably in your B<INN> F<errlog> file.
=item B<-q> (quiet) =item B<-q>, B<--quiet>
Suppress logging to syslog. Suppress logging to syslog.
@ -161,7 +159,7 @@ Suppress logging to syslog.
=head1 INSTALLATION =head1 INSTALLATION
See doc/INSTALL. See L<doc/INSTALL.>
=head1 EXAMPLES =head1 EXAMPLES
@ -172,7 +170,7 @@ Set up a feed like that in your B<INN> F<newsfeeds> file:
:!*,de.* :!*,de.*
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
See doc/INSTALL for further information. See L<doc/INSTALL> for further information.
=head1 FILES =head1 FILES
@ -188,7 +186,7 @@ Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file.
=back =back
@ -203,11 +201,11 @@ bug tracker at L<http://bugs.th-h.de/>!
=item - =item -
doc/README L<doc/README>
=item - =item -
doc/INSTALL L<doc/INSTALL>
=back =back
@ -219,7 +217,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

View file

@ -7,7 +7,7 @@
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
@ -22,37 +22,56 @@ use strict;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList); use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList);
use DBI; use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Definitions ################################## ################################# Definitions ##################################
# define types of information that can be gathered # define types of information that can be gathered
# all / groups (/ clients / hosts) # all / groups (/ clients / hosts)
my %LegalTypes; my %LegalStats;
@LegalTypes{('all','groups')} = (); @LegalStats{('all','groups')} = ();
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('dom:p:t:l:n:r:g:c:s:'); my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
$OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'clientsdb=s' => \$OptClientsDB,
'd|debug!' => \$OptDebug,
'groupsdb=s' => \$OptGroupsDB,
'hierarchy=s' => \$OptTLH,
'hostsdb=s' => \$OptHostsDB,
'm|month=s' => \$OptMonth,
'rawdb=s' => \$OptRawDB,
's|stats=s' => \$OptStatsType,
't|test!' => \$OptTest,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
### read configuration ### read configuration
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### override configuration via commandline options ### override configuration via commandline options
my %ConfOverride; my %ConfOverride;
$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; $ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; $ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; $ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; $ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride); &OverrideConfig(\%Conf,\%ConfOverride);
### get type of information to gather, defaulting to 'all' ### get type of information to gather, defaulting to 'all'
$Options{'t'} = 'all' if !$Options{'t'}; $OptStatsType = 'all' if !$OptStatsType;
die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); &Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
if !exists($LegalStats{$OptStatsType});
### get time period (-m or -p) ### get time period from --month
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); # get verbal description of time period, drop SQL code
my ($Period) = &GetTimePeriod($OptMonth);
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
### reformat $Conf{'TLH'} ### reformat $Conf{'TLH'}
my $TLH; my $TLH;
@ -67,45 +86,55 @@ if ($Conf{'TLH'}) {
# strip whitespace # strip whitespace
$TLH =~ s/\s//g; $TLH =~ s/\s//g;
# check for illegal characters # check for illegal characters
die "$MySelf: E: Config error - illegal characters in TLH definition\n" if ($TLH !~ /^[a-zA-Z0-9:]+$/); &Bleat(2,'Config error - illegal characters in TLH definition!')
if ($TLH !~ /^[a-zA-Z0-9:]+$/);
if ($TLH =~ /:/) { if ($TLH =~ /:/) {
# reformat $TLH form a:b to (a)|(b) # reformat $TLH from a:b to (a)|(b),
# e.g. replace '.' by '|'
$TLH =~ s/:/)|(/g; $TLH =~ s/:/)|(/g;
$TLH = '(' . $TLH . ')'; $TLH = '(' . $TLH . ')';
}; };
}; };
### read newsgroups list from -l # read list of newsgroups from --checkgroups
my %ValidGroups = %{&ReadGroupList($Options{'l'})} if $Options{'l'}; # into a hash
my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile;
### init database ### init database
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
### get data for each month ### get data for each month
warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; &Bleat(1,'Test mode. Database is not updated.') if $OptTest;
foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { foreach my $Month (&ListMonth($Period)) {
print "---------- $Month ----------\n" if $Options{'d'}; print "---------- $Month ----------\n" if $OptDebug;
if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
### ---------------------------------------------- ### ----------------------------------------------
### get groups data (number of postings per group) ### get groups data (number of postings per group)
# get groups data from raw table for given month # get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
$DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); "WHERE day LIKE ? AND NOT disregard",
$Conf{'DBDatabase'},
$Conf{'DBTableRaw'}));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
# count postings per group # count postings per group
my %Postings; my %Postings;
while (($_) = $DBQuery->fetchrow_array) { while (($_) = $DBQuery->fetchrow_array) {
# get list oft newsgroups and hierarchies from Newsgroups: # get list oft newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_,$TLH,$Options{'l'} ? \%ValidGroups : ''); my %Newsgroups = ListNewsgroups($_,$TLH,
$OptCheckgroupsFile ? \%ValidGroups : '');
# count each newsgroup and hierarchy once # count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) { foreach (sort keys %Newsgroups) {
$Postings{$_}++; $Postings{$_}++;
}; };
}; };
# add valid but empty groups if -l is set # add valid but empty groups if --checkgroups is set
if (%ValidGroups) { if (%ValidGroups) {
foreach (sort keys %ValidGroups) { foreach (sort keys %ValidGroups) {
if (!defined($Postings{$_})) { if (!defined($Postings{$_})) {
@ -116,19 +145,29 @@ foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
}; };
# delete old data for that month # delete old data for that month
if (!$Options{'o'}) { if (!$OptTest) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),undef,$Month) $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
or warn sprintf("$MySelf: E: Can't delete old groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); $Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
undef,$Month)
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
}; };
print "----- GroupStats -----\n" if $Options{'d'}; print "----- GroupStats -----\n" if $OptDebug;
foreach my $Newsgroup (sort keys %Postings) { foreach my $Newsgroup (sort keys %Postings) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
if (!$Options{'o'}) { if (!$OptTest) {
# write to database # write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
# $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); "(month,newsgroup,postings) ".
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); "VALUES (?, ?, ?)",
$Conf{'DBDatabase'},
$Conf{'DBTableGrps'}));
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
"$DBI::errstr\n",$Month,$Newsgroup,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
$DBQuery->finish; $DBQuery->finish;
}; };
}; };
@ -150,43 +189,31 @@ gatherstats - process statistical data from a raw source
=head1 SYNOPSIS =head1 SYNOPSIS
B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-l> I<filename>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>] B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats] [B<-c> I<checkgroups file>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See L<doc/README>.
=over 2
=item -
Config::Auto
=item -
DBI
=back
=head1 DESCRIPTION =head1 DESCRIPTION
This script will extract and process statistical information from a This script will extract and process statistical information from a
database table which is fed from F<feedlog.pl> for a given time period database table which is fed from F<feedlog.pl> for a given time period
and write its results to (an)other database table(s). Entries marked and write its results to (an)other database table(s). Entries marked
with I<'disregard'> in the database will be ignored; currently, you have with I<'disregard'> in the database will be ignored; currently, you
to set this flag yourself, using your database management tools. You have to set this flag yourself, using your database management tools.
can exclude erroneous entries that way (e.g. automatic reposts (think You can exclude erroneous entries that way (e.g. automatic reposts
of cancels flood and resurrectors); spam; ...). (think of cancels flood and resurrectors); spam; ...).
The time period to act on defaults to last month; you can assign The time period to act on defaults to last month; you can assign
another month via the B<-m> switch or a time period via the B<-p> another time period or a single month via the B<--month> option (see
switch; the latter takes preference. below).
By default B<gatherstats> will process all types of information; you By default B<gatherstats> will process all types of information; you
can change that using the B<-t> switch and assigning the type of can change that using the B<--stats> option and assigning the type of
information to process. Currently only processing of the number of information to process. Currently that doesn't matter yet as only
postings per group per month is implemented anyway, so that doesn't processing of the number of postings per group per month is
matter yet. implemented anyway.
Possible information types include: Possible information types include:
@ -205,59 +232,58 @@ respectively. A crossposting to de.alt.test and de.alt.admin, on the
other hand, will be counted for de.alt.test and de.alt.admin each, but other hand, will be counted for de.alt.test and de.alt.admin each, but
only once for de.alt.ALL and de.ALL. only once for de.alt.ALL and de.ALL.
Data is written to I<DBTableGrps> (see doc/INSTALL). Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
override that default through the B<--groupsdb> option.
=back =back
=head2 Configuration =head2 Configuration
F<gatherstats.pl> will read its configuration from F<newsstats.conf> B<gatherstats> will read its configuration from F<newsstats.conf>
which should be present in the same directory via Config::Auto. which should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
You can override configuration options via the B<-n>, B<-r>, B<-g>, You can override configuration options via the B<--hierarchy>,
B<-c> and B<-s> switches, respectively. B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
respectively.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V>, B<--version>
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information and exit.
=item B<-h> (help) =item B<-h>, B<--help>
Print this man page and exit. Print this man page and exit.
=item B<-d> (debug) =item B<-d>, B<--debug>
Output debugging information to STDOUT while processing (number of Output debugging information to STDOUT while processing (number of
postings per group). postings per group).
=item B<-o> (output only) =item B<-t>, B<--test>
Do not write results to database. You should use B<-d> in conjunction Do not write results to database. You should use B<--debug> in
with B<-o> ... everything else seems a bit pointless. conjunction with B<--test> ... everything else seems a bit pointless.
=item B<-m> I<YYYY-MM> (month) =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]>
Set processing period to a month in YYYY-MM format. Ignored if B<-p> Set processing period to a single month in YYYY-MM format or to a time
is set. period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon).
=item B<-p> I<YYYY-MM:YYYY-MM> (period)
Set processing period to a time period between two month, each in =item B<-s>, B<--stats> I<type>
YYYY-MM format, separated by a colon. Overrides B<-m>.
=item B<-t> I<type> (type)
Set processing type to one of I<all> and I<groups>. Defaults to all Set processing type to one of I<all> and I<groups>. Defaults to all
(and is currently rather pointless as only I<groups> has been (and is currently rather pointless as only I<groups> has been
implemented). implemented).
=item B<-l> I<filename> (check against list) =item B<-c>, B<--checkgroups> I<filename>
Check each group against a list of valid newsgroups read from Check each group against a list of valid newsgroups read from
I<filename>, one group on each line and ignoring everything after the I<filename>, one group on each line and ignoring everything after the
@ -268,23 +294,23 @@ Newsgroups not found in I<filename> will be dropped (and logged to
STDERR), and newsgroups found in I<filename> but having no postings STDERR), and newsgroups found in I<filename> but having no postings
will be added with a count of 0 (and logged to STDERR). will be added with a count of 0 (and logged to STDERR).
=item B<-n> I<TLH> (newsgroup hierarchy) =item B<--hierarchy> I<TLH> (newsgroup hierarchy)
Override I<TLH> from F<newsstats.conf>. Override I<TLH> from F<newsstats.conf>.
=item B<-r> I<table> (raw data table) =item B<--rawdb> I<table> (raw data table)
Override I<DBTableRaw> from F<newsstats.conf>. Override I<DBTableRaw> from F<newsstats.conf>.
=item B<-g> I<table> (postings per group table) =item B<--groupsdb> I<table> (postings per group table)
Override I<DBTableGrps> from F<newsstats.conf>. Override I<DBTableGrps> from F<newsstats.conf>.
=item B<-c> I<table> (client data table) =item B<--clientsdb> I<table> (client data table)
Override I<DBTableClnts> from F<newsstats.conf>. Override I<DBTableClnts> from F<newsstats.conf>.
=item B<-s> I<table> (server/host data table) =item B<--hostsdb> I<table> (host data table)
Override I<DBTableHosts> from F<newsstats.conf>. Override I<DBTableHosts> from F<newsstats.conf>.
@ -292,7 +318,7 @@ Override I<DBTableHosts> from F<newsstats.conf>.
=head1 INSTALLATION =head1 INSTALLATION
See doc/INSTALL. See L<doc/INSTALL>.
=head1 EXAMPLES =head1 EXAMPLES
@ -302,16 +328,16 @@ Process all types of information for lasth month:
Do a dry run, showing results of processing: Do a dry run, showing results of processing:
gatherstats -do gatherstats --debug --test
Process all types of information for January of 2010: Process all types of information for January of 2010:
gatherstats -m 2010-01 gatherstats --month 2010-01
Process only number of postings for the year of 2010, Process only number of postings for the year of 2010,
checking against checkgroups-2010.txt: checking against checkgroups-2010.txt:
gatherstats -p 2010-01:2010-12 -t groups -l checkgroups-2010.txt gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt
=head1 FILES =head1 FILES
@ -327,7 +353,7 @@ Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file.
=back =back
@ -342,11 +368,11 @@ bug tracker at L<http://bugs.th-h.de/>!
=item - =item -
doc/README L<doc/README>
=item - =item -
doc/INSTALL L<doc/INSTALL>
=back =back
@ -358,7 +384,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

View file

@ -7,7 +7,7 @@
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
@ -19,226 +19,229 @@ BEGIN {
} }
use strict; use strict;
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
use DBI; use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:'); my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
$OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
$OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
GetOptions ('b|boundary=s' => \$OptBoundType,
'c|captions!' => \$OptCaptions,
'checkgroups=s' => \$OptCheckgroupsFile,
'comments!' => \$OptComments,
'filetemplate=s' => \$OptFileTemplate,
'f|format=s' => \$OptFormat,
'g|group-by=s' => \$OptGroupBy,
'groupsdb=s' => \$OptGroupsDB,
'l|lower=i' => \$LowBound,
'm|month=s' => \$OptMonth,
'n|newsgroups=s' => \$OptNewsgroups,
'o|order-by=s' => \$OptOrderBy,
'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums,
'u|upper=i' => \$UppBound,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
# parse parameters
# $OptComments defaults to TRUE
$OptComments = 1 if (!defined($OptComments));
# force --nocomments when --filetemplate is used
$OptComments = 0 if ($OptFileTemplate);
# parse $OptBoundType
if ($OptBoundType) {
if ($OptBoundType =~ /level/i) {
$OptBoundType = 'level';
} elsif ($OptBoundType =~ /av(era)?ge?/i) {
$OptBoundType = 'average';
} elsif ($OptBoundType =~ /sums?/i) {
$OptBoundType = 'sum';
} else {
$OptBoundType = 'default';
}
}
# parse $OptReportType
if ($OptReportType) {
if ($OptReportType =~ /av(era)?ge?/i) {
$OptReportType = 'average';
} elsif ($OptReportType =~ /sums?/i) {
$OptReportType = 'sum';
} else {
$OptReportType = 'default';
}
}
# read list of newsgroups from --checkgroups
# into a hash reference
my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
### read configuration ### read configuration
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### override configuration via commandline options ### override configuration via commandline options
my %ConfOverride; my %ConfOverride;
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
&OverrideConfig(\%Conf,\%ConfOverride); &OverrideConfig(\%Conf,\%ConfOverride);
### check for incompatible command line options
# you can't mix '-t', '-b' and '-l'
# -b/-l take preference over -t, and -b takes preference over -l
# you can't use '-f' with '-b' or '-l'
if ($Options{'b'} or $Options{'l'}) {
if ($Options{'f'}) {
# drop -f
warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n");
undef($Options{'f'});
};
if ($Options{'t'}) {
# drop -t
warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
undef($Options{'t'});
};
if ($Options{'b'} and $Options{'l'}) {
# drop -l
warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
undef($Options{'l'});
};
# -q/-d don't work with -b or -l
warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
};
### check output type
# default output type to 'pretty'
$Options{'o'} = 'pretty' if !$Options{'o'};
# fail if more than one newsgroup is combined with 'dumpgroup' type
die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
# accept 'dumpgroup' only with -n
if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
$Options{'o'} = 'dump';
warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
};
# set output type to 'pretty' for -l
if ($Options{'l'} and $Options{'o'} ne 'pretty') {
$Options{'o'} = 'pretty';
warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
};
# set output type to 'dump' for -f
if ($Options{'f'} and $Options{'o'} ne 'dump') {
$Options{'o'} = 'dump';
warn ("$MySelf: W: Output type forced to '-o dump' due to usage of '-f'.\n");
};
### init database ### init database
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
### get time period ### get time period and newsgroups, prepare SQL 'WHERE' clause
my ($StartMonth,$EndMonth); # get time period
# if '-a' is set, set start/end month from database # and set caption for output and expression for SQL 'WHERE' clause
# FIXME - it doesn't make that much sense to get first/last month from database to query it my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# with a time period that equals no time period ... # bail out if --month is invalid
if ($Options{'a'}) { &Bleat(2,"--month option has an invalid format - ".
undef($Options{'m'}); "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
undef($Options{'p'}); # get list of newsgroups and set expression for SQL 'WHERE' clause
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); # with placeholders as well as a list of newsgroup to bind to them
$DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); my ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups)
($StartMonth,$EndMonth) = $DBQuery->fetchrow_array; if $OptNewsgroups;;
} else {
($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
};
# if -p or -a are set: drop -m
undef $Options{'m'} if ($Options{'p'} or $Options{'a'});
# if time period is more than one month: force output type to '-o pretty' or '-o dumpgroup'
if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) {
if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
# just one newsgroup is defined
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
$Options{'o'} = 'dumpgroup';
} elsif (!defined($Options{'f'})) {
# more than one newsgroup - and no file output
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
$Options{'o'} = 'pretty';
}
};
### create report ### build SQL WHERE clause (and HAVING clause, if needed)
# get list of newsgroups (-n) my ($SQLWhereClause,$SQLHavingClause);
my ($QueryGroupList,$QueryThreshold,@GroupList,@Params); # $OptBoundType 'level'
my $Newsgroups = $Options{'n'}; if ($OptBoundType and $OptBoundType ne 'default') {
if ($Newsgroups) { $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
# explode list of newsgroups for WHERE clause $SQLWhereNewsgroups,&SQLHierarchies($OptSums));
($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups); $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType,
$LowBound,$UppBound));
# $OptBoundType 'threshold' / 'default' or none
} else { } else {
# set to dummy value (always true) $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
$QueryGroupList = 1; $SQLWhereNewsgroups,&SQLHierarchies($OptSums),
}; &SQLSetBounds('default',$LowBound,$UppBound));
# manage thresholds
if (defined($Options{'t'})) {
if ($Options{'i'}) {
# -i: list groups below threshold
$QueryThreshold .= ' postings < ?';
} else {
# default: list groups above threshold
$QueryThreshold .= ' postings > ?';
};
# push threshold to Params
push @Params,$Options{'t'};
} else {
# set to dummy value (always true)
$QueryThreshold = 1;
} }
# construct WHERE clause ### get sort order and build SQL 'ORDER BY' clause
# $QueryGroupList is "list of newsgroup" (or 1), # default to 'newsgroup' for $OptBoundType 'level' or 'average'
# $QueryThreshold is threshold definition (or 1), $OptGroupBy = 'newsgroup' if (!$OptGroupBy and
# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) $OptBoundType and $OptBoundType ne 'default');
# according to setting of -s # force to 'month' for $OptReportType 'average' or 'sum'
my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'})); $OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
# get length of longest newsgroup delivered by query for formatting purposes ### get report type and build SQL 'SELECT' query
# FIXME my $SQLSelect;
my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params)); my $SQLGroupClause = '';
my $Precision = 0; # number of digits right of decimal point for output
if ($OptReportType and $OptReportType ne 'default') {
$SQLGroupClause = 'GROUP BY newsgroup';
# change $SQLOrderClause: replace everything before 'postings'
$SQLOrderClause =~ s/BY.+postings/BY postings/;
if ($OptReportType eq 'average') {
$SQLSelect = "'All months',newsgroup,AVG(postings)";
$Precision = 2;
# change $SQLOrderClause: replace 'postings' with 'AVG(postings)'
$SQLOrderClause =~ s/postings/AVG(postings)/;
} elsif ($OptReportType eq 'sum') {
$SQLSelect = "'All months',newsgroup,SUM(postings)";
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
$SQLOrderClause =~ s/postings/SUM(postings)/;
}
} else {
$SQLSelect = 'month,newsgroup,postings';
};
my ($OrderClause,$DBQuery); ### get length of longest newsgroup name delivered by query
# -b (best of / top list) defined? ### for formatting purposes
if (!defined($Options{'b'}) and !defined($Options{'l'})) { my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
# default: neither -b nor -l my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},
# set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" $Field,$SQLWhereClause,$SQLHavingClause,
# according to -q and -d @SQLBindNewsgroups);
$OrderClause = 'newsgroup';
$OrderClause = 'postings' if $Options{'q'}; ### build and execute SQL query
$OrderClause .= ' DESC' if $Options{'d'}; my ($DBQuery);
# prepare query: get number of postings per group from groups table for given months and newsgroups # special query preparation for $OptBoundType 'level', 'average' or 'sums'
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); if ($OptBoundType and $OptBoundType ne 'default') {
} elsif ($Options{'b'}) { # prepare and execute first query:
# -b is set (then -l can't be!) # get list of newsgroups meeting level conditions
# set sorting order (-i): top or flop list? $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '.
if ($Options{'i'}) { 'GROUP BY newsgroup %s',
$OrderClause = 'postings'; $Conf{'DBDatabase'},$Conf{'DBTableGrps'},
} else { $SQLWhereClause,$SQLHavingClause));
$OrderClause = 'postings DESC'; $DBQuery->execute(@SQLBindNewsgroups)
}; or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
# set -b to 10 if < 1 (Top 10) $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
$Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1; $DBI::errstr));
# push LIMIT to Params
push @Params,$Options{'b'};
# prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} else {
# -l must be set now, as all other cases have been taken care of
# which kind of level (-i): more than -l x or less than -l x?
my ($Level);
if ($Options{'i'}) {
$Level = '<';
} else {
$Level = '>';
};
# prepare and execute query: get list of newsgroups meeting level condition
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
$DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
# add newsgroups to a comma-seperated list ready for IN(...) query # add newsgroups to a comma-seperated list ready for IN(...) query
my $GroupList; my $GroupList;
while (my ($Newsgroup) = $DBQuery->fetchrow_array) { while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
$GroupList .= ',' if (defined($GroupList) and $GroupList ne ''); $GroupList .= ',' if $GroupList;
$GroupList .= "'$Newsgroup'"; $GroupList .= "'$Newsgroup'";
}; };
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause)); # enhance $WhereClause
}; if ($GroupList) {
$SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,
sprintf('newsgroup IN (%s)',$GroupList));
} else {
# condition cannot be satisfied;
# force query to fail by adding '0=1'
$SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1');
}
}
# prepare query
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
$SQLSelect,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
$SQLWhereClause,$SQLGroupClause,$
SQLOrderClause));
# execute query # execute query
$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params) $DBQuery->execute(@SQLBindNewsgroups)
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
$DBI::errstr));
# output results ### output results
# reset caption (-c) if -f is set # set default to 'pretty'
undef($Options{'c'}) if $Options{'f'}; $OptFormat = 'pretty' if !$OptFormat;
# print caption (-c) with time period if -m or -p is set # print captions if --caption is set
if ($Options{'c'}) { if ($OptCaptions && $OptComments) {
if ($Options{'m'}) { # print time period with report type
printf ("----- Report for %s\n",$StartMonth); my $CaptionReportType= '(number of postings for each month)';
} else { if ($OptReportType and $OptReportType ne 'default') {
printf ("----- Report from %s to %s %s\n",$StartMonth,$EndMonth,$Options{'a'} ? '(all months)' : ''); $CaptionReportType= '(average number of postings for each month)'
}; if $OptReportType eq 'average';
}; $CaptionReportType= '(number of all postings for that time period)'
# print caption (-c) with newsgroup list if -n is set if $OptReportType eq 'sum';
printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; }
# print caption (-c) with threshold if -t is set, taking -i in account printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; # print newsgroup list if --newsgroups is set
if (!defined($Options{'b'}) and !defined($Options{'l'})) { printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
# default: neither -b nor -l if $OptNewsgroups;
&OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength); # print boundaries, if set
} elsif ($Options{'b'}) { my $CaptionBoundary= '(counting only month fulfilling this condition)';
# -b is set (then -l can't be!) if ($OptBoundType and $OptBoundType ne 'default') {
# we have to read in the query results ourselves, as they do not have standard layout $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level';
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { $CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
# we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); }
}; printf("# ----- Threshold: %s %s x %s %s %s\n",
} else { $LowBound ? $LowBound : '',$LowBound ? '=>' : '',
# -l must be set now, as all other cases have been taken care of $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
# print caption (-c) with level, taking -i in account if ($LowBound or $UppBound);
printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'}; # print primary and secondary sort order
# we have to read in the query results ourselves, as they do not have standard layout printf("# ----- Grouped by %s (%s), sorted %s%s\n",
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { ($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
# we just switch $Newsgroups and $Month for output generation ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
}; ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
}; }
# output data
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
$OptCheckgroupsFile ? $ValidGroups : '',
$OptFileTemplate,$DBQuery,$MaxLength);
### close handles ### close handles
$DBHandle->disconnect; $DBHandle->disconnect;
@ -253,60 +256,67 @@ groupstats - create reports on newsgroup usage
=head1 SYNOPSIS =head1 SYNOPSIS
B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM> | B<-p> I<YYYY-MM:YYYY-MM> | B<-a>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-f> I<filename template>] [B<-g> I<database table>] B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See L<doc/README>.
=over 2
=item -
Config::Auto
=item -
DBI
=back
=head1 DESCRIPTION =head1 DESCRIPTION
This script create reports on newsgroup usage (number of postings per This script create reports on newsgroup usage (number of postings per
group per month) taken from result tables created by group per month) taken from result tables created by
F<gatherstats.pl>. B<gatherstats.pl>.
The time period to act on defaults to last month; you can assign =head2 Features and options
another month via the B<-m> switch or a time period via the B<-p>
switch; the latter takes preference. =head3 Time period and newsgroups
The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the
B<--month> option (see below).
B<groupstats> will process all newsgroups by default; you can limit B<groupstats> will process all newsgroups by default; you can limit
that to only some newsgroups by supplying a list of those groups via processing to only some newsgroups by supplying a list of those groups via
B<-n> (see below). You can include hierarchy levels in the output by B<--newsgroups> option (see below). You can include hierarchy levels in
adding the B<-s> switch (see below). the output by adding the B<--sums> switch (see below). Optionally
newsgroups not present in a checkgroups file can be excluded from output,
sse B<--checkgroups> below.
Furthermore you can set a threshold via B<-t> so that only newsgroups =head3 Report type
with more postings per month will be included in the report. You can
invert that by the B<-i> switch so only newsgroups with less than
I<threshold> postings per month will be included.
You can sort the output by number of postings per month instead of the You can choose between different B<--report> types: postings per month,
default (alphabetical list of newsgroups) by using B<-q>; you can average postings per month or all postings summed up; for details, see
reverse the sorting order (from highest to lowest or in reversed below.
alphabetical order) by using B<-d>.
Furthermore, you can create a list of newsgroups that had consistently =head3 Upper and lower boundaries
more (or less) than x postings per month during the whole report
period by using B<-l> (together with B<i> as needed).
Last but not least you can create a "best of" list of the top x Furthermore you can set an upper and/or lower boundary to exclude some
newsgroups via B<-b> (or a "worst of" list by adding B<i>). results from output via the B<--lower> and B<--upper> options,
respectively. By default, all newsgroups with more and/or less postings
per month will be excluded from the result set (i.e. not shown and not
considered for average and sum reports). You can change the meaning of
those boundaries with the B<--boundary> option. For details, please see
below.
By default, B<groupstats> will dump an alphabetical list of newsgroups, =head3 Sorting and formatting the output
one per line, followed by the number of postings in that group, for
every month. You can change the output format by using B<-o> (see By default, all results are grouped by month; you can group results by
below). Captions can be added by setting the B<-c> switch. newsgroup instead via the B<--groupy-by> option. Within those groups, the
list of newsgroups (or months) is sorted alphabetically (or
chronologically, respectively) ascending. You can change that order (and
sort by number of postings) with the B<--order-by> option. For details and
exceptions, please see below.
The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of newsgroups and number of
postings with the B<--format> option. Captions will be added by means of
the B<--caption> option; all comments (and captions) can be supressed by
using B<--nocomments>.
Last but not least you can redirect all output to a number of files, e.g.
one for each month, by submitting the B<--filetemplate> option, see below.
Captions and comments are automatically disabled in this case.
=head2 Configuration =head2 Configuration
@ -315,37 +325,28 @@ which should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
You can override configuration options via the B<-g> switch. You can override some configuration options via the B<--groupsdb> option.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V>, B<--version>
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information and exit.
=item B<-h> (help) =item B<-h>, B<--help>
Print this man page and exit. Print this man page and exit.
=item B<-m> I<YYYY-MM> (month) =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a month in YYYY-MM format. Ignored if B<-p> Set processing period to a single month in YYYY-MM format or to a time
or B<-a> is set. period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database.
=item B<-p> I<YYYY-MM:YYYY-MM> (period) =item B<-n>, B<--newsgroups> I<newsgroup(s)>
Set processing period to a time period between two month, each in
YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if
B<-a> is set.
=item B<-a> (all)
Set no processing period (process whole database). Overrides B<-m>
and B<-p>.
=item B<-n> I<newsgroup(s)> (newsgroups)
Limit processing to a certain set of newsgroups. I<newsgroup(s)> can Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
be a single newsgroup name (de.alt.test), a newsgroup hierarchy be a single newsgroup name (de.alt.test), a newsgroup hierarchy
@ -354,41 +355,7 @@ example
de.test:de.alt.test:de.newusers.* de.test:de.alt.test:de.newusers.*
=item B<-t> I<threshold> (threshold) =item B<-s>, B<--sums|--nosums> (sum per hierarchy level)
Only include newsgroups with more than I<threshold> postings per
month. Can be inverted by the B<-i> switch so that only newsgroups
with less than I<threshold> postings will be included.
This setting will be ignored if B<-l> or B<-b> is set.
=item B<-l> I<level> (level)
Only include newsgroups with more than I<level> postings per
month, every month during the whole reporting period. Can be inverted
by the B<-i> switch so that only newsgroups with less than I<level>
postings every single month will be included. Output will be ordered
by newsgroup name, followed by month.
This setting will be ignored if B<-b> is set. Overrides B<-t> and
can't be used together with B<-q>, B<-d> or B<-f>.
=item B<-b> I<n> (best of)
Create a list of the I<n> newsgroups with the most postings over the
whole reporting period. Can be inverted by the B<-i> switch so that a
list of the I<n> newsgroups with the least postings over the whole
period is generated. Output will be ordered by sum of postings.
Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d>
or B<-f>. Output format is set to I<pretty> (see below).
=item B<-i> (invert)
Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
threshold or level or generate a "bottom list" instead of a top list.
=item B<-s> (sum per hierarchy level)
Include "virtual" groups for every hierarchy level in output, for Include "virtual" groups for every hierarchy level in output, for
example: example:
@ -399,62 +366,219 @@ example:
See the B<gatherstats> man page for details. See the B<gatherstats> man page for details.
=item B<-o> I<output type> (output format) =item B<--checkgroups> I<filename>
Set output format. Default is I<pretty>, which will print a header for Restrict output to those newgroups present in a file in checkgroups format
each new month, followed by an alphabetical list of newsgroups, each (one newgroup name per line; everything after the first whitespace on each
on a new line, followed by the number of postings in that month. line is ignored). All other newsgroups will be removed from output.
B<groupstats> will try to align newsgroup names and posting counts.
Usage of B<-b> will force this format; it cannot be used together with
B<-f>.
I<dump> format is used to create an easily parsable output consisting =item B<-r>, B<--report> I<default|average|sums>
of an alphabetical list of newsgroups, each on a new line, followed by
the number of postings in that month, without any alignment. This
default format can't be used with time periods of more than one month.
Usage of B<-f> will force this format.
I<list> format is like I<dump>, but will print the month in front of Choose the report type: I<default>, I<average> or I<sums>
the newsgroup name.
I<dumpgroup> format can only be use with a group list (see B<-n>) of By default, B<groupstats> will report the number of postings for each
exactly one newsgroup and is like I<dump>, but will output months, newsgroup in each month. But it can also report the average number of
followed by the number of postings. postings per group for all months or the total sum of postings per group
for all months.
=item B<-c> (captions) For report types I<average> and I<sums>, the B<group-by> option has no
meaning and will be silently ignored (see below).
Add captions to output (reporting period, newsgroups list, threshold =item B<-l>, B<--lower> I<lower boundary>
and so on).
This setting will be ignored if B<-f> is set. Set the lower boundary. See B<--boundary> below.
=item B<-q> (quantity of postings) =item B<-l>, B<--upper> I<upper boundary>
Sort by number of postings instead of by newsgroup names. Set the upper boundary. See B<--boundary> below.
Cannot be used with B<-l> or B<-b>. =item B<-b>, B<--boundary> I<boundary type>
=item B<-d> (descending) Set the boundary type to one of I<default>, I<level>, I<average> or
I<sums>.
Change sort order to descending. By default, all newsgroups with more postings per month than the upper
boundary and/or less postings per month than the lower boundary will be
excluded from further processing. For the default report that means each
month only newsgroups with a number of postings between the boundaries
will be displayed. For the other report types, newsgroups with a number of
postings exceeding the boundaries in all (!) months will not be
considered.
Cannot be used with B<-l> or B<-b>. For example, lets take a list of newsgroups like this:
=item B<-f> I<filename template> (output file) ----- 2012-01:
de.comp.datenbanken.misc 6
de.comp.datenbanken.ms-access 84
de.comp.datenbanken.mysql 88
----- 2012-02:
de.comp.datenbanken.misc 8
de.comp.datenbanken.ms-access 126
de.comp.datenbanken.mysql 21
----- 2012-03:
de.comp.datenbanken.misc 24
de.comp.datenbanken.ms-access 83
de.comp.datenbanken.mysql 36
Save output to file instead of dumping it to STDOUT. B<groupstats> With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>,
will create one file for each month, with filenames composed by you'll get the following result:
adding year and month to the I<filename template>, for example
with B<-f> I<stats>:
stats-2010-01 ----- All months:
stats-2010-02 de.comp.datenbanken.ms-access 293
de.comp.datenbanken.mysql 124
de.comp.datenbanken.misc has not been considered even though it has 38
postings in total, because it has less than 25 postings in every single
month. If you want to list all newsgroups with more than 25 postings U<in
total>, you'll have to set the boundary type to I<sum>, see below.
A boundary type of I<level> will show only those newsgroups - at all -
that satisfy the boundaries in each and every single month. With the above
list of newsgroups and
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
you'll get this result:
----- All months:
de.comp.datenbanken.ms-access 293
de.comp.datenbanken.mysql has not been considered because it had less than
25 postings in 2012-02.
You can use that to get a list of newsgroups that have more (or less) then
x postings during the whole reporting period.
A boundary type of I<average> will show only those newsgroups - at all -that
satisfy the boundaries on average. With the above list of newsgroups and
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
you'll get this result:
----- All months:
de.comp.datenbanken.ms-access 293
de.comp.datenbanken.mysql 145
The average number of postings in the three groups is:
de.comp.datenbanken.misc 12.67
de.comp.datenbanken.ms-access 97.67
de.comp.datenbanken.mysql 48.33
Last but not least, a boundary type of I<sums> will show only those
newsgroups - at all - that satisfy the boundaries with the total sum of
all postings during the reporting period. With the above list of
newsgroups and
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
you'll finally get this result:
----- All months:
de.comp.datenbanken.misc 38
de.comp.datenbanken.ms-access 293
de.comp.datenbanken.mysql 145
=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
By default, all results are grouped by month, sorted chronologically in
ascending order, like this:
----- 2012-01:
de.comp.datenbanken.ms-access 84
de.comp.datenbanken.mysql 88
----- 2012-02:
de.comp.datenbanken.ms-access 126
de.comp.datenbanken.mysql 21
The results can be grouped by newsgroups instead via
B<--group-by> I<newsgroup>:
----- de.comp.datenbanken.ms-access:
2012-01 84
2012-02 126
----- de.comp.datenbanken.mysql:
2012-01 88
2012-02 21
By appending I<-desc> to the group-by option parameter, you can reverse
the sort order - e.g. B<--group-by> I<month-desc> will give:
----- 2012-02:
de.comp.datenbanken.ms-access 126
de.comp.datenbanken.mysql 21
----- 2012-01:
de.comp.datenbanken.ms-access 84
de.comp.datenbanken.mysql 88
Average and sums reports (see above) will always be grouped by months;
this option will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single newsgroup, see above), the
report will be sorted by newsgroup names in ascending alphabetical order
by default. You can change the sort order to descending or sort by number
of postings instead.
=item B<-f>, B<--format> I<pretty|list|dump>
Select the output format, I<pretty> being the default:
----- 2012-01:
de.comp.datenbanken.ms-access 84
de.comp.datenbanken.mysql 88
----- 2012-02:
de.comp.datenbanken.ms-access 126
de.comp.datenbanken.mysql 21
I<list> format looks like this:
2012-01 de.comp.datenbanken.ms-access 84
2012-01 de.comp.datenbanken.mysql 88
2012-02 de.comp.datenbanken.ms-access 126
2012-02 de.comp.datenbanken.mysql 21
And I<dump> format looks like this:
# 2012-01:
de.comp.datenbanken.ms-access 84
de.comp.datenbanken.mysql 88
# 2012-02:
de.comp.datenbanken.ms-access 126
de.comp.datenbanken.mysql 21
You can remove the comments by using B<--nocomments>, see below.
=item B<-c>, B<--captions|--nocaptions>
Add captions to output, like this:
----- Report for 2012-01 to 2012-02 (number of postings for each month)
----- Newsgroups: de.comp.datenbanken.*
----- Threshold: 10 => x <= 20 (on average)
----- Grouped by Newsgroups (ascending), sorted by number of postings descending
False by default.
=item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by default.
Use I<--nocomments> to suppress anything except newsgroup names/months and
numbers of postings. This is enforced when using B<--filetemplate>, see below.
=item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
create one file for each month (or each newsgroup, accordant to the
setting of B<--group-by>, see above), with filenames composed by adding
year and month (or newsgroup names) to the I<filename template>, for
example with B<--filetemplate> I<stats>:
stats-2012-01
stats-2012-02
... and so on ... and so on
This setting will be ignored if B<-l> or B<-b> is set. Output format B<--nocomments> is enforced, see above.
is set to I<dump> (see above).
=item B<-g> I<table> (postings per group table) =item B<--groupsdb> I<database table>
Override I<DBTableGrps> from F<newsstats.conf>. Override I<DBTableGrps> from F<newsstats.conf>.
@ -462,36 +586,40 @@ Override I<DBTableGrps> from F<newsstats.conf>.
=head1 INSTALLATION =head1 INSTALLATION
See doc/INSTALL. See L<doc/INSTALL>.
=head1 EXAMPLES =head1 EXAMPLES
Show number of postings per group for lasth month in I<dump> format: Show number of postings per group for lasth month in I<pretty> format:
groupstats groupstats
Show that report for January of 2010 and de.alt.* plus de.test, Show that report for January of 2010 and de.alt.* plus de.test,
including display of hierarchy levels: including display of hierarchy levels:
groupstats -m 2010-01 -n de.alt.*:de.test -s groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums
Show that report for the year of 2010 in I<pretty> format: Only show newsgroups with 30 postings or less last month, ordered
groupstats -p 2010-01:2010-12 -o pretty
Only show newsgroups with less than 30 postings last month, ordered
by number of postings, descending, in I<pretty> format: by number of postings, descending, in I<pretty> format:
groupstats -iqdt 30 -o pretty groupstats --upper 30 --order-by postings-desc
Show top 10 for the first half-year of of 2010 in I<pretty> format: Show the total of all postings for the year of 2010 for all groups that
had 30 postings or less in every single month in that year, ordered by
number of postings in descending order:
groupstats -p 2010-01:2010-06 -b 10 -o pretty groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
Report all groups that had less than 30 postings every singele month The same for the average number of postings in the year of 2010:
in the year of 2010 (I<pretty> format is forced)
groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc
List number of postings per group for eacht month of 2010 and redirect
output to one file for each month, namend stats-2010-01 and so on, in
machine-readable form (without formatting):
groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
groupstats -p 2010-01:2010-12 -il 30
=head1 FILES =head1 FILES
@ -507,7 +635,7 @@ Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file.
=back =back
@ -522,11 +650,11 @@ bug tracker at L<http://bugs.th-h.de/>!
=item - =item -
doc/README L<doc/README>
=item - =item -
doc/INSTALL l>doc/INSTALL>
=item - =item -
@ -542,7 +670,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

View file

@ -6,7 +6,7 @@
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
@ -24,11 +24,16 @@ use NewsStats qw(:DEFAULT);
use Cwd; use Cwd;
use DBI; use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('u:'); my ($OptUpdate);
GetOptions ('u|update=s' => \$OptUpdate,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
### change working directory to .. (as we're in .../install) ### change working directory to .. (as we're in .../install)
chdir dirname($0).'/..'; chdir dirname($0).'/..';
@ -36,7 +41,7 @@ my $Path = cwd();
### read configuration ### read configuration
print("Reading configuration.\n"); print("Reading configuration.\n");
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
##### -------------------------------------------------------------------------- ##### --------------------------------------------------------------------------
##### Database table definitions ##### Database table definitions
@ -131,7 +136,7 @@ INSTALL
my $Upgrade = <<UPGRADE; my $Upgrade = <<UPGRADE;
---------- ----------
Your installation was upgraded from $Options{'u'} to $PackageVersion. Your installation was upgraded from $OptUpdate to $PackageVersion.
Don't forget to restart your INN feed so that it can pick up the new version: Don't forget to restart your INN feed so that it can pick up the new version:
@ -145,9 +150,10 @@ UPGRADE
### DB init, read list of tables ### DB init, read list of tables
print "Reading database information.\n"; print "Reading database information.\n";
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
my %TablesInDB = %{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; my %TablesInDB =
%{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')};
if (!$Options{'u'}) { if (!$OptUpdate) {
##### installation mode ##### installation mode
print "----------\nStarting database table generation.\n"; print "----------\nStarting database table generation.\n";
# check for tables and create them, if they don't exist yet # check for tables and create them, if they don't exist yet
@ -162,8 +168,8 @@ if (!$Options{'u'}) {
##### upgrade mode ##### upgrade mode
print "----------\nStarting upgrade process.\n"; print "----------\nStarting upgrade process.\n";
$PackageVersion = '0.03'; $PackageVersion = '0.03';
if ($Options{'u'} < $PackageVersion) { if ($OptUpdate < $PackageVersion) {
if ($Options{'u'} < 0.02) { if ($OptUpdate < 0.02) {
# 0.01 -> 0.02 # 0.01 -> 0.02
# &DoMySQL('...;'); # &DoMySQL('...;');
# print "v0.02: Database upgrades ...\n"; # print "v0.02: Database upgrades ...\n";
@ -185,19 +191,23 @@ exit(0);
sub CreateTable { sub CreateTable {
my $Table = shift; my $Table = shift;
if (defined($TablesInDB{$Conf{$Table}})) { if (defined($TablesInDB{$Conf{$Table}})) {
printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table}); printf("Database table %s.%s already exists, skipping ....\n",
$Conf{'DBDatabase'},$Conf{$Table});
return; return;
}; };
my $DBQuery = $DBHandle->prepare($DBCreate{$Table}); my $DBQuery = $DBHandle->prepare($DBCreate{$Table});
$DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr); $DBQuery->execute() or
printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table}); &Bleat(2, sprintf("Can't create table %s in database %s: %s%\n",$Table,
$Conf{'DBDatabase'},$DBI::errstr));
printf("Database table %s.%s created succesfully.\n",
$Conf{'DBDatabase'},$Conf{$Table});
return; return;
}; };
sub DoMySQL { sub DoMySQL {
my $SQL = shift; my $SQL = shift;
my $DBQuery = $DBHandle->prepare($SQL); my $DBQuery = $DBHandle->prepare($SQL);
$DBQuery->execute() or warn sprintf("$MySelf: E: Database error: %s\n",$DBI::errstr); $DBQuery->execute() or &Bleat(1, sprintf("Database error: %s\n",$DBI::errstr));
return; return;
}; };
@ -221,23 +231,11 @@ install - installation script
=head1 SYNOPSIS =head1 SYNOPSIS
B<install> [B<-Vh>] B<install> [B<-Vh> [--update I<version>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See L<doc/README>.
=over 2
=item -
Config::Auto
=item -
DBI
=back
=head1 DESCRIPTION =head1 DESCRIPTION
@ -245,23 +243,27 @@ This script will create database tables as necessary and configured.
=head2 Configuration =head2 Configuration
F<install.pl> will read its configuration from F<newsstats.conf> via B<install> will read its configuration from F<newsstats.conf> via
Config::Auto. Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V>, B<--version>
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information and exit.
=item B<-h> (help) =item B<-h>, B<--help>
Print this man page and exit. Print this man page and exit.
=item B<-u>, B<--update> I<version>
Don't do a fresh install, but update from I<version>.
=back =back
=head1 FILES =head1 FILES
@ -278,7 +280,7 @@ Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file.
=back =back
@ -293,11 +295,11 @@ bug tracker at L<http://bugs.th-h.de/>!
=item - =item -
doc/README L<doc/README>
=item - =item -
doc/INSTALL L<doc/INSTALL>
=back =back
@ -309,7 +311,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.