Compare commits

..

No commits in common. "20434ab1dc938be46a5a9d1ef426ca42f4fb22b2" and "5a6a3e58bff6a76dc723bac7b4c0a5275dfb8bdf" have entirely different histories.

17 changed files with 416 additions and 1558 deletions

View file

@ -4,14 +4,12 @@
## Description ## Description
**NewsStats** stores overview data and complete headers of all incoming postings (in one or more specific Usenet hierarchies) in real time in a MySQL database. This raw dataset can then be analysed regularly, e.g. monthly, for instance in terms of postings per group and month. The analysis results will also be stored in databases which in turn can be used to generate various reports (postings per group, injection server or posting agent, per month). **NewsStats** stores overview data and complete headers of all incoming postings (in one or more specific Usenet hierarchies) in real time in a MySQL database. This raw dataset can then be analysed regularly, e.g. monthly, for instance in terms of postings per group and month. The analysis results will also be stored in a database which in turn can be used to generate various reports.
This software is currently used to generate the monthly statistics posted to `de.admin.news.lists` for the de.\* hierarchy. The software package is still under development.
It is currently used to generate the monthly statistics posted to `de.admin.news.lists` for the de.\* hierarchy.
## More information ## More information
Please see the [distribution page](https://th-h.de/net/software/newsstats/) (in German). Please see the [distribution page](https://th-h.de/net/software/newsstats/) (in German).
* General overview and examples: [README](doc/README)
* Installation instructions: [INSTALL](doc/INSTALL)
* Changelog: [ChangeLog](doc/ChangeLog)

View file

@ -1,634 +0,0 @@
#! /usr/bin/perl
#
# clientstats.pl
#
# This script will get statistical data on newsreader (client) usage
# from a database.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Main program #################################
### read commandline options
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
$OptReportType,$OptSums,$UppBound,$OptVersions,$OptConfFile);
GetOptions ('c|captions!' => \$OptCaptions,
'comments!' => \$OptComments,
'db=s' => \$OptDB,
'filetemplate=s' => \$OptFileTemplate,
'f|format=s' => \$OptFormat,
'g|group-by=s' => \$OptGroupBy,
'l|lower=i' => \$LowBound,
'm|month=s' => \$OptMonth,
'n|names=s' => \$OptNames,
'o|order-by=s' => \$OptOrderBy,
'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums,
'u|upper=i' => \$UppBound,
'v|versions!' => \$OptVersions,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
# parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# parse $OptReportType
if ($OptReportType) {
if ($OptReportType =~ /sums?/i) {
$OptReportType = 'sum';
} else {
$OptReportType = 'default';
}
}
### read configuration
my %Conf = %{ReadConfig($OptConfFile)};
### set DBTable
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
$Conf{'DBTable'} = $OptDB if $OptDB;
### init database
my $DBHandle = InitDB(\%Conf,1);
### get time period and names, prepare SQL 'WHERE' clause
# get time period
# and set caption for output and expression for SQL 'WHERE' clause
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# bail out if --month is invalid
&Bleat(2,"--month option has an invalid format - ".
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
# get list of clients and set expression for SQL 'WHERE' clause
# with placeholders as well as a list of names to bind to them
my ($SQLWhereNames,@SQLBindNames);
if ($OptNames) {
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'client');
# bail out if --names is invalid
&Bleat(2,"--names option has an invalid format!")
if !$SQLWhereNames;
}
### build SQL WHERE clause
my $ExcludeSums = $OptSums ? '' : "client != 'ALL'";
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums,"version = 'ALL'",
&SQLSetBounds('default',$LowBound,$UppBound));
### get sort order and build SQL 'ORDER BY' clause
# force to 'month' for $OptReportType 'sum'
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
# default to 'name' if $OptGroupBy is not set and
# just one name is requested, but more than one month
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNames and $OptNames !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'client, version');
# $GroupBy will contain 'month' or 'client, version' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
### get report type and build SQL 'SELECT' query
my $SQLSelect;
my $SQLGroupClause = '';
if ($OptReportType and $OptReportType ne 'default') {
$SQLGroupClause = "GROUP BY client, version";
# change $SQLOrderClause: replace everything before 'postings'
$SQLOrderClause =~ s/BY.+postings/BY postings/;
$SQLSelect = "'All months',LEFT(client,40),SUM(postings)";
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
$SQLOrderClause =~ s/postings/SUM(postings)/;
} else {
$SQLSelect = "month,LEFT(client,40),postings";
};
### get length of longest name delivered by query
### for formatting purposes
my $Field = ($GroupBy eq 'month') ? 'LEFT(client,40)' : 'month';
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
$Field,'postings',$SQLWhereClause,
'',@SQLBindNames);
### build and execute SQL query
my ($DBQuery);
# prepare query
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
$SQLSelect,
$Conf{'DBDatabase'},$Conf{'DBTable'},
$SQLWhereClause,$SQLGroupClause,
$SQLOrderClause));
# execute query
$DBQuery->execute(@SQLBindNames)
or &Bleat(2,sprintf("Can't get client data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr));
### output results
# set default to 'pretty'
$OptFormat = 'pretty' if !$OptFormat;
# print captions if --caption is set
my $LeadIn;
if ($OptCaptions && $OptComments) {
# print time period with report type
my $CaptionReportType = '(number of postings for each month)';
if ($OptReportType and $OptReportType ne 'default') {
$CaptionReportType = '(number of all postings for that time period)';
}
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
# print name list if --names is set
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
if $OptNames;
# print boundaries, if set
my $CaptionBoundary= '(counting only months fulfilling this condition)';
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
if ($LowBound or $UppBound);
# print primary and secondary sort order
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
($GroupBy eq 'month') ? 'Months' : 'Names',
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
}
# output data
# (changed code copy from NewsStats::OutputData)
my ($LastIteration, $FileName, $Handle, $OUT);
# define output types
my %LegalOutput;
@LegalOutput{('dump','list','pretty')} = ();
# bail out if format is unknown
&Bleat(2,"Unknown output type '$OptFormat'!") if !exists($LegalOutput{$OptFormat});
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
# save client for later use
my $Client = $Key;
# care for correct sorting order and abstract from month and keys:
# $Caption will be $Month or $Key, according to sorting order,
# and $Key will be $Key or $Month, respectively
my $Caption;
if ($GroupBy eq 'key') {
$Caption = $Key;
$Key = $Month;
} else {
$Caption = $Month;
}
# set output file handle
if (!$OptFileTemplate) {
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
close $OUT if ($LastIteration);
# safeguards for filename creation:
# replace potential problem characters with '_'
$FileName = sprintf('%s-%s',$OptFileTemplate,$Caption);
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
open ($OUT,">$FileName")
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
$FileName));
$Handle = $OUT;
};
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, $Caption,
$Key, $Value, 0, $MaxLength, $MaxValLength, $LastIteration);
# output client versions
if ($OptVersions) {
### get client versions
# $SQLWhereClause without 'ALL' version, with client and month set
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums,"version != 'ALL'",
'client = ?','month = ?',
&SQLSetBounds('default',$LowBound,$UppBound));
# push client and month to @SQLVersBindNames
my @SQLVersBindNames = @SQLBindNames;
push (@SQLVersBindNames, ($Client, $Month));
# save length of longest client
my $ClientMaxLenght = $MaxLength;
my $ClientMaxValLenght = $MaxValLength;
# get length of longest version delivered by query
# for formatting purposes
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
'version','postings',$SQLWhereClause,
'',@SQLVersBindNames);
if ($MaxLength) {
# add lenght of '- '
$MaxLength += 2;
# set to length of longest client, if longer
$MaxLength = $ClientMaxLenght if $ClientMaxLenght > $MaxLength;
$MaxValLength = $ClientMaxValLenght if $ClientMaxValLenght > $MaxValLength;
}
# prepare query
my $DBVersQuery = $DBHandle->prepare(sprintf('SELECT version,postings FROM %s.%s %s %s %s',
$Conf{'DBDatabase'},$Conf{'DBTable'},
$SQLWhereClause,$SQLGroupClause,
$SQLOrderClause));
# execute query
$DBVersQuery->execute(@SQLVersBindNames)
or &Bleat(2,sprintf("Can't get version data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr));
# output versions
while (my ($Version, $Postings) = $DBVersQuery->fetchrow_array) {
$Version = '- ' . $Version;
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, '',
$Version, $Postings, 0, $MaxLength, $MaxValLength,
'');
}
}
$LastIteration = $Caption;
};
close $OUT if ($OptFileTemplate);
### close handles
$DBHandle->disconnect;
__END__
################################ Documentation #################################
=head1 NAME
clientstats - create reports on client usage
=head1 SYNOPSIS
B<clientstats> [B<-Vhcsv> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
See L<doc/README>.
=head1 DESCRIPTION
This script create reports on newsgroup usage (number of postings
using each client per month) taken from result tables created by
B<gatherstats.pl>.
=head2 Features and options
=head3 Time period and names
The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the
B<--month> option (see below).
B<clientstats> will process all clients by default; you can limit
processing to only some clients by supplying a list of those names by
using the B<--names> option (see below).
=head3 Report type
You can choose between different B<--report> types: postings per month
or all postings summed up; for details, see below.
=head3 Upper and lower boundaries
Furthermore you can set an upper and/or lower boundary to exclude some
results from output via the B<--lower> and B<--upper> options,
respectively. By default, all clients with more and/or less postings
per month will be excluded from the result set (i.e. not shown and
not considered for sum reports).
=head3 Sorting and formatting the output
By default, all results are grouped by month; you can group results by
clients instead via the B<--group-by> option. Within those groups,
the list of clients (or months) is sorted alphabetically
(or chronologically, respectively) ascending. You can change that order
(and sort by number of postings) with the B<--order-by> option. For
details and exceptions, please see below.
The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of names and number of
postings with the B<--format> option. Captions will be added by means
of the B<--caption> option; all comments (and captions) can be
supressed by using B<--nocomments>.
Last but not least you can redirect all output to a number of files,
e.g. one for each month, by submitting the B<--filetemplate> option,
see below.
=head2 Configuration
B<clientstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration
file submitted by the B<--conffile> option.
See doc/INSTALL for an overview of possible configuration options.
You can override some configuration options via the B<--db> option.
=head1 OPTIONS
=over 3
=item B<-V>, B<--version>
Display version and copyright information and exit.
=item B<-h>, B<--help>
Display this man page and exit.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database. Defaults to last month.
=item B<-n>, B<--names> I<name(s)>
Limit processing to a certain set of client names. I<names(s)>
can be a single name (Thunderbird), a group of names (Ice*) or a list
of either of these, separated by colons, for example
Forte Agent:Thunderbird:Ice*
Spaces or special characters like "*" need to be quoted from the shell,
like
-n 'Forte Agent:Thunderbird:Ice*'
There is no way to limit processing to a specific version, but you can
alway grep through the output.
=item B<-s>, B<--sums|--nosums> (sum per month)
Include "virtual" clients named "ALL" for every month in output,
containing the sum of all detected clients for that month. False
by default.
=item B<-v>, B<--versions|--noversions> (client versions)
Include a list of all observed versions of each client in output.
Version information will be displayed with indents ('-') below each
client, sorted in the same way (by postings or alphanumeric). False
by default.
=item B<-r>, B<--report> I<default|sums>
Choose the report type: I<default> or I<sums>
By default, B<clientstats> will report the number of postings for each
client in each month. But it can also report the total sum of postings
per client for all months. Sums of B<--versions> can be included.
For report type I<sums>, the B<group-by> option has no meaning and
will be silently ignored (see below).
=item B<-l>, B<--lower> I<lower boundary>
Set the lower boundary. See below.
=item B<-l>, B<--upper> I<upper boundary>
Set the upper boundary.
By default, all clients with more postings per month than the
upper boundary and/or less postings per month than the lower boundary
will be excluded from further processing. For the default report that
means each month only clients with a number of postings between the
boundaries will be displayed. For the sums report, clients with a
number of postings exceeding the boundaries in all (!) months will
not be considered.
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
By default, all results are grouped by month, sorted chronologically in
ascending order, like this:
# ----- 2012-01:
40tude_Dialog: 5873
Forte Agent : 7735
Thunderbird : 20925
# ----- 2012-02:
40tude_Dialog: 4142
Forte Agent : 5895
Thunderbird : 19091
The results can be grouped by client instead via
B<--group-by> I<name>:
# ----- 40tude_Dialog:
2012-01: 5873
2012-02: 4142
# ----- Forte Agent:
2012-01: 7735
2012-02: 5895
# ----- Thunderbird:
2012-01: 20925
2012-02: 19091
By appending I<-desc> to the group-by option parameter, you can reverse
the sort order - e.g. B<--group-by> I<month-desc> will give:
# ----- 2012-02:
40tude_Dialog: 4142
Forte Agent : 5895
Thunderbird : 19091
# ----- 2012-01:
40tude_Dialog: 5873
Forte Agent : 7735
Thunderbird : 20925
Sums reports (see above) will always be grouped by months; this option
will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single client, see above),
the report will be sorted by name (or month) in ascending alphabetical
order by default. You can change the sort order to descending or sort
by number of postings instead.
By default, output is sorted alphabetically:
# ----- 2012-01:
40tude_Dialog: 5873
Forte Agent : 7735
Thunderbird : 20925
Using B<--order-by> I<postings[-desc]>, it will be sorted from most
to least postings:
# ----- 2012-01:
Thunderbird : 20925
Forte Agent : 7735
40tude_Dialog: 5873
=item B<-f>, B<--format> I<pretty|list|dump>
Select the output format, I<pretty> (a kind of table) being the default:
# ----- 2012-01:
40tude_Dialog: 5873
Forte Agent : 7735
# ----- 2012-02:
40tude_Dialog: 4142
Forte Agent : 5895
I<list> format looks like this (each client preceded by month):
2012-01 40tude_Dialog 5873
2012-01 Forte Agent 7735
2012-02 40tude_Dialog 4142
2012-02 Forte Agent 5895
And I<dump> format looks like this:
# 2012-01:
40tude_Dialog 5873
Forte Agent 7735
# 2012-02:
40tude_Dialog 4142
Forte Agent 5895
You can remove the comments (lines after '#') by using B<--nocomments>,
see below.
=item B<-c>, B<--captions|--nocaptions>
Add captions to output, like this:
----- Report for 2012-01 to 2012-02 (number of postings for each month)
----- Names: Thunderbird
----- Threshold: 8000 => x (counting only month fulfilling this condition)
----- Grouped by Month (ascending), sorted by number of postings descending
False by default.
=item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by
default as long as B<--filetemplate> is not set.
Use I<--nocomments> to suppress anything except client names or months
and numbers of postings.
=item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<clientstats>
will create one file for each month (or each client, according to the
setting of B<--group-by>, see above), with filenames composed by adding
year and month (or client names) to the I<filename template>, for
example with B<--filetemplate> I<stats>:
stats-2012-01
stats-2012-02
... and so on
=item B<--db> I<database table>
Override I<DBTableClnts> or I<DBTableClnts> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Read configuration from I<filename> instead of F<newsstats.conf>.
=back
=head1 INSTALLATION
See L<doc/INSTALL>.
=head1 EXAMPLES
Show number of postings per client for lasth month in I<pretty> format:
clientstats
Show that report for January of 2010 and Thunderbird plus Ice*:
clientstats --month 2010-01 --names 'Thunderbird:Ice*'
Only show clients with at least 30 postings last month and the versions
of those clients, ordered each by number of postings, descending,
in I<pretty> format:
clientstats --lower 30 --versions --order-by postings-desc
List number of postings per client for each month of 2010 and redirect
output to one file for each month, named hosts-2010-01 and so on, in
machine-readable form (without formatting):
clientstats -m 2010-01:2010-12 -f dump --filetemplate hosts
=head1 FILES
=over 4
=item F<bin/clientstats.pl>
The script itself.
=item F<lib/NewsStats.pm>
Library functions for the NewsStats package.
=item F<etc/newsstats.conf>
Runtime configuration file.
=back
=head1 BUGS
Please report any bugs or feature requests to the author or use the
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
=head1 SEE ALSO
=over 2
=item -
L<doc/README>
=item -
L<doc/INSTALL>
=item -
gatherstats -h
=back
This script is part of the B<NewsStats> package.
=head1 AUTHOR
Thomas Hochstein <thh@thh.name>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=cut

182
bin/hoststats.pl → bin/cliservstats.pl Executable file → Normal file
View file

@ -1,9 +1,9 @@
#! /usr/bin/perl #! /usr/bin/perl
# #
# hoststats.pl # cliservstats.pl
# #
# This script will get statistical data on server (host) usage # This script will get statistical data on client (newsreader) and
# from a database. # server (host) usage from a database.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
### read commandline options ### read commandline options
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat, my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy, $OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
$OptReportType,$OptSums,$UppBound,$OptConfFile); $OptReportType,$OptSums,$OptType,$UppBound,$OptConfFile);
GetOptions ('c|captions!' => \$OptCaptions, GetOptions ('c|captions!' => \$OptCaptions,
'comments!' => \$OptComments, 'comments!' => \$OptComments,
'db=s' => \$OptDB, 'db=s' => \$OptDB,
@ -44,6 +44,7 @@ GetOptions ('c|captions!' => \$OptCaptions,
'o|order-by=s' => \$OptOrderBy, 'o|order-by=s' => \$OptOrderBy,
'r|report=s' => \$OptReportType, 'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums, 's|sums!' => \$OptSums,
't|type=s' => \$OptType,
'u|upper=i' => \$UppBound, 'u|upper=i' => \$UppBound,
'conffile=s' => \$OptConfFile, 'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD, 'h|help' => \&ShowPOD,
@ -51,6 +52,15 @@ GetOptions ('c|captions!' => \$OptCaptions,
# parse parameters # parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used # $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments)); $OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# parse $OptType
if ($OptType) {
if ($OptType =~ /(host|server)s?/i) {
$OptType = 'host';
} elsif ($OptType =~ /(newsreader|client)s?/i) {
$OptType = 'client';
}
}
&Bleat(2, "Please use '--type server' or '-type newsreader'.") if !$OptType;
# parse $OptReportType # parse $OptReportType
if ($OptReportType) { if ($OptReportType) {
if ($OptReportType =~ /sums?/i) { if ($OptReportType =~ /sums?/i) {
@ -64,8 +74,14 @@ if ($OptReportType) {
my %Conf = %{ReadConfig($OptConfFile)}; my %Conf = %{ReadConfig($OptConfFile)};
### set DBTable ### set DBTable
if ($OptDB) {
$Conf{'DBTable'} = $OptDB;
}
elsif ($OptType eq 'host') {
$Conf{'DBTable'} = $Conf{'DBTableHosts'}; $Conf{'DBTable'} = $Conf{'DBTableHosts'};
$Conf{'DBTable'} = $OptDB if $OptDB; } else {
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
}
### init database ### init database
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
@ -81,14 +97,14 @@ my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# with placeholders as well as a list of names to bind to them # with placeholders as well as a list of names to bind to them
my ($SQLWhereNames,@SQLBindNames); my ($SQLWhereNames,@SQLBindNames);
if ($OptNames) { if ($OptNames) {
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'host'); ($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,$OptType);
# bail out if --names is invalid # bail out if --names is invalid
&Bleat(2,"--names option has an invalid format!") &Bleat(2,"--names option has an invalid format!")
if !$SQLWhereNames; if !$SQLWhereNames;
} }
### build SQL WHERE clause ### build SQL WHERE clause
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",'host'); my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",$OptType);
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames, my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums, $ExcludeSums,
&SQLSetBounds('default',$LowBound,$UppBound)); &SQLSetBounds('default',$LowBound,$UppBound));
@ -102,8 +118,8 @@ $OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNames and $OptNames !~ /[:*%]/); and $OptNames and $OptNames !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause # parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month' # if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'host'); my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, $OptType);
# $GroupBy will contain 'month' or 'host' (parsed result of $OptGroupBy) # $GroupBy will contain 'month' or 'host'/'client' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData() # set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; $GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
@ -112,19 +128,19 @@ my $SQLSelect;
my $SQLGroupClause = ''; my $SQLGroupClause = '';
my $Precision = 0; # number of digits right of decimal point for output my $Precision = 0; # number of digits right of decimal point for output
if ($OptReportType and $OptReportType ne 'default') { if ($OptReportType and $OptReportType ne 'default') {
$SQLGroupClause = "GROUP BY host"; $SQLGroupClause = "GROUP BY $OptType";
# change $SQLOrderClause: replace everything before 'postings' # change $SQLOrderClause: replace everything before 'postings'
$SQLOrderClause =~ s/BY.+postings/BY postings/; $SQLOrderClause =~ s/BY.+postings/BY postings/;
$SQLSelect = "'All months',host,SUM(postings)"; $SQLSelect = "'All months',$OptType,SUM(postings)";
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)' # change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
$SQLOrderClause =~ s/postings/SUM(postings)/; $SQLOrderClause =~ s/postings/SUM(postings)/;
} else { } else {
$SQLSelect = "month,host,postings"; $SQLSelect = "month,$OptType,postings";
}; };
### get length of longest name delivered by query ### get length of longest name delivered by query
### for formatting purposes ### for formatting purposes
my $Field = ($GroupBy eq 'month') ? 'host' : 'month'; my $Field = ($GroupBy eq 'month') ? $OptType : 'month';
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'}, my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
$Field,'postings',$SQLWhereClause, $Field,'postings',$SQLWhereClause,
'',@SQLBindNames); '',@SQLBindNames);
@ -139,8 +155,8 @@ $DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
$SQLOrderClause)); $SQLOrderClause));
# execute query # execute query
$DBQuery->execute(@SQLBindNames) $DBQuery->execute(@SQLBindNames)
or &Bleat(2,sprintf("Can't get host data for %s from %s.%s: %s\n", or &Bleat(2,sprintf("Can't get %s data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'}, $OptType,$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr)); $DBI::errstr));
### output results ### output results
@ -159,7 +175,7 @@ if ($OptCaptions && $OptComments) {
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames))) $LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
if $OptNames; if $OptNames;
# print boundaries, if set # print boundaries, if set
my $CaptionBoundary= '(counting only months fulfilling this condition)'; my $CaptionBoundary= '(counting only month fulfilling this condition)';
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n", $LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
$LowBound ? $LowBound : '',$LowBound ? '=>' : '', $LowBound ? $LowBound : '',$LowBound ? '=>' : '',
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
@ -185,11 +201,11 @@ __END__
=head1 NAME =head1 NAME
hoststats - create reports on host usage cliservstats - create reports on host or client usage
=head1 SYNOPSIS =head1 SYNOPSIS
B<hoststats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>] B<cliservstats> B<-t> I<host|client> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)|client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -197,8 +213,9 @@ See L<doc/README>.
=head1 DESCRIPTION =head1 DESCRIPTION
This script creates reports on newsgroup usage (number of postings from This script create reports on newsgroup usage (number of postings from
each host) taken from result tables created by B<gatherstats.pl>. each host or using each client per month) taken from result tables
created by B<gatherstats.pl>.
=head2 Features and options =head2 Features and options
@ -208,9 +225,9 @@ The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the time period or a single month (or drop all time constraints) via the
B<--month> option (see below). B<--month> option (see below).
B<hoststats> will process all hosts by default; you can limit B<cliservstats> will process all hosts or clients by default; you can
processing to only some hosts by supplying a list of those names by limit processing to only some hosts or clients by supplying a list of
using the B<--names> option (see below). those names by using the B<--names> option (see below).
=head3 Report type =head3 Report type
@ -221,34 +238,33 @@ or all postings summed up; for details, see below.
Furthermore you can set an upper and/or lower boundary to exclude some Furthermore you can set an upper and/or lower boundary to exclude some
results from output via the B<--lower> and B<--upper> options, results from output via the B<--lower> and B<--upper> options,
respectively. By default, all hosts with more and/or less postings respectively. By default, all hosts/clients with more and/or less
per month will be excluded from the result set (i.e. not shown and postings per month will be excluded from the result set (i.e. not
not considered for sum reports). shown and not considered forsum reports).
=head3 Sorting and formatting the output =head3 Sorting and formatting the output
By default, all results are grouped by month; you can group results by By default, all results are grouped by month; you can group results by
hosts instead via the B<--group-by> option. Within those groups, the hosts/clients instead via the B<--group-by> option. Within those
list of hosts (or months) is sorted alphabetically (or chronologically, groups, the list of hosts/clients (or months) is sorted alphabetically
respectively) ascending. You can change that order (and sort by number (or chronologically, respectively) ascending. You can change that order
of postings) with the B<--order-by> option. For details and exceptions, (and sort by number of postings) with the B<--order-by> option. For
please see below. details and exceptions, please see below.
The results will be formatted as a kind of table; you can change the The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of names and number of output format to a simple list or just a list of names and number of
postings with the B<--format> option. Captions will be added by means postings with the B<--format> option. Captions will be added by means of
of the B<--caption> option; all comments (and captions) can be the B<--caption> option; all comments (and captions) can be supressed by
supressed by using B<--nocomments>. using B<--nocomments>.
Last but not least you can redirect all output to a number of files, Last but not least you can redirect all output to a number of files, e.g.
e.g. one for each month, by submitting the B<--filetemplate> option, one for each month, by submitting the B<--filetemplate> option, see below.
see below.
=head2 Configuration =head2 Configuration
B<hoststats> will read its configuration from F<newsstats.conf> B<cliservstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration which should be present in etc/ via Config::Auto or from a configuration file
file submitted by the B<--conffile> option. submitted by the B<--conffile> option.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
@ -260,22 +276,27 @@ You can override some configuration options via the B<--db> option.
=item B<-V>, B<--version> =item B<-V>, B<--version>
Display version and copyright information and exit. Print out version and copyright information and exit.
=item B<-h>, B<--help> =item B<-h>, B<--help>
Display this man page and exit. Print this man page and exit.
=item B<-t>, B<--type> I<host|client>
Create report for hosts (servers) or clients (newsreaders), using
I<DBTableHosts> or I<DBTableClnts> respectively.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all> =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database. Defaults to last month. processing period to process the whole database.
=item B<-n>, B<--names> I<name(s)> =item B<-n>, B<--names> I<name(s)>
Limit processing to a certain set of hostnames. I<names(s)> Limit processing to a certain set of host or client names. I<names(s)>
can be a single name (eternal-september.org), a group of names can be a single name (eternal-september.org), a group of names
(*.inka.de) or a list of either of these, separated by colons, for (*.inka.de) or a list of either of these, separated by colons, for
example example
@ -285,16 +306,15 @@ example
=item B<-s>, B<--sums|--nosums> (sum per month) =item B<-s>, B<--sums|--nosums> (sum per month)
Include a "virtual" host named "ALL" for every month in output, Include a "virtual" host named "ALL" for every month in output,
containing the sum of all detected hosts for that month. False containing the sum of all detected hosts for that month.
by default.
=item B<-r>, B<--report> I<default|sums> =item B<-r>, B<--report> I<default|sums>
Choose the report type: I<default> or I<sums> Choose the report type: I<default> or I<sums>
By default, B<hoststats> will report the number of postings for each By default, B<cliservstats> will report the number of postings for each
host in each month. But it can also report the total sum of postings host/client in each month. But it can also report the total sum of postings
per host for all months. per host/client for all months.
For report type I<sums>, the B<group-by> option has no meaning and For report type I<sums>, the B<group-by> option has no meaning and
will be silently ignored (see below). will be silently ignored (see below).
@ -307,18 +327,18 @@ Set the lower boundary. See below.
Set the upper boundary. Set the upper boundary.
By default, all hosts with more postings per month than the upper By default, all hosts/clients with more postings per month than the
boundary and/or less postings per month than the lower boundary upper boundary and/or less postings per month than the lower boundary
will be excluded from further processing. For the default report that will be excluded from further processing. For the default report that
means each month only hosts with a number of postings between the means each month only hosts/clients with a number of postings between
boundaries will be displayed. For the sums report, hosts with a number the boundaries will be displayed. For the sums report, hosts/clients
of postings exceeding the boundaries in all (!) months will not be with a number of postings exceeding the boundaries in all (!) months
considered. will not be considered.
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]> =item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
By default, all results are grouped by month, sorted chronologically By default, all results are grouped by month, sorted chronologically in
in ascending order, like this: ascending order, like this:
# ----- 2012-01: # ----- 2012-01:
arcor-online.net : 9379 arcor-online.net : 9379
@ -329,7 +349,8 @@ in ascending order, like this:
individual.net : 16768 individual.net : 16768
news.albasani.net: 7879 news.albasani.net: 7879
The results can be grouped by host instead via B<--group-by> I<name>: The results can be grouped by host/client instead via
B<--group-by> I<name>:
----- individual.net ----- individual.net
2012-01: 19525 2012-01: 19525
@ -341,8 +362,8 @@ The results can be grouped by host instead via B<--group-by> I<name>:
2012-01: 9063 2012-01: 9063
2012-02: 7879 2012-02: 7879
By appending I<-desc> to the group-by option parameter, you can By appending I<-desc> to the group-by option parameter, you can reverse
reverse the sort order - e.g. B<--group-by> I<month-desc> will give: the sort order - e.g. B<--group-by> I<month-desc> will give:
# ----- 2012-02: # ----- 2012-02:
arcor-online.net : 8606 arcor-online.net : 8606
@ -358,8 +379,8 @@ will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]> =item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single host, see above), the Within each group (a single month or single host/client, see above),
report will be sorted by host (or month) in ascending alphabetical the report will be sorted by name (or month) in ascending alphabetical
order by default. You can change the sort order to descending or sort order by default. You can change the sort order to descending or sort
by number of postings instead. by number of postings instead.
@ -405,19 +426,19 @@ False by default.
=item B<--comments|--nocomments> =item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by Add comments (group headers) to I<dump> and I<pretty> output. True by default
default as long as B<--filetemplate> is not set. as logn as B<--filetemplate> is not set.
Use I<--nocomments> to suppress anything except host names or months Use I<--nocomments> to suppress anything except host/client names or months and
and numbers of postings. numbers of postings.
=item B<--filetemplate> I<filename template> =item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<hoststats> Save output to file(s) instead of dumping it to STDOUT. B<cliservstats> will
will create one file for each month (or each host, according to the create one file for each month (or each host/client, accordant to the
setting of B<--group-by>, see above), with filenames composed by adding setting of B<--group-by>, see above), with filenames composed by adding
year and month (or hostnames) to the I<filename template>, for example year and month (or host/client names) to the I<filename template>, for
with B<--filetemplate> I<stats>: example with B<--filetemplate> I<stats>:
stats-2012-01 stats-2012-01
stats-2012-02 stats-2012-02
@ -425,11 +446,11 @@ with B<--filetemplate> I<stats>:
=item B<--db> I<database table> =item B<--db> I<database table>
Override I<DBTableHosts> from F<newsstats.conf>. Override I<DBTableHosts> or I<DBTableClnts> from F<newsstats.conf>.
=item B<--conffile> I<filename> =item B<--conffile> I<filename>
Read configuration from I<filename> instead of F<newsstats.conf>. Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back
@ -439,30 +460,31 @@ See L<doc/INSTALL>.
=head1 EXAMPLES =head1 EXAMPLES
Show number of postings per host for lasth month in I<pretty> format: Show number of postings per group for lasth month in I<pretty> format:
hoststats cliservstats --type host
Show that report for January of 2010 and *.inka plus individual.net: Show that report for January of 2010 and *.inka plus individual.net:
hoststats --month 2010-01 --names *.inka:individual.net: cliservstats --type host --month 2010-01 --names *.inka:individual.net:
Only show hosts with 30 postings or less last month, ordered Only show clients with 30 postings or less last month, ordered
by number of postings, descending, in I<pretty> format: by number of postings, descending, in I<pretty> format:
hoststats --upper 30 --order-by postings-desc cliservstats --type client --upper 30 --order-by postings-desc
List number of postings per host for each month of 2010 and redirect List number of postings per host for each month of 2010 and redirect
output to one file for each month, named hosts-2010-01 and so on, in output to one file for each month, named hosts-2010-01 and so on, in
machine-readable form (without formatting): machine-readable form (without formatting):
hoststats -m 2010-01:2010-12 -f dump --filetemplate hosts cliservstats -t host -m 2010-01:2010-12 -f dump --filetemplate hosts
=head1 FILES =head1 FILES
=over 4 =over 4
=item F<bin/hoststats.pl> =item F<bin/cliservstats.pl>
The script itself. The script itself.
@ -491,7 +513,7 @@ L<doc/README>
=item - =item -
L<doc/INSTALL> l>doc/INSTALL>
=item - =item -

View file

@ -46,7 +46,7 @@ my $DBCreate = <<SQLDB;
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8; CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
SQLDB SQLDB
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS, 'DBTableClnts' => <<CLIENTS); my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS);
-- --
-- Table structure for table DBTableRaw -- Table structure for table DBTableRaw
-- --
@ -102,23 +102,6 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableHosts'}` (
KEY `host` (`host`) KEY `host` (`host`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per server'; ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per server';
HOSTS HOSTS
--
-- Table structure for table DBTableClnts
--
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableClnts'}` (
`id` bigint(20) unsigned NOT NULL auto_increment,
`month` varchar(7) character set ascii NOT NULL,
`client` varchar(150) NOT NULL,
`version` varchar(50) NOT NULL,
`postings` int(11) NOT NULL,
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `month_client_version` (`month`,`client`,`version`),
KEY `month` (`month`),
KEY `client` (`client`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='Postings per client';
CLIENTS
##### -------------------------------------------------------------------------- ##### --------------------------------------------------------------------------
##### Installation / upgrade instructions ##### Installation / upgrade instructions
@ -218,7 +201,16 @@ if (!$OptUpdate) {
} else { } else {
##### upgrade mode ##### upgrade mode
print "----------\nStarting upgrade process.\n"; print "----------\nStarting upgrade process.\n";
# TBD my $PackageVersion = '0.03';
if ($OptUpdate < $PackageVersion) {
if ($OptUpdate < 0.02) {
# 0.01 -> 0.02
# &DoMySQL('...;');
# print "v0.02: Database upgrades ...\n";
# &PrintInstructions('0.02',<<" INSTRUCTIONS");
# INSTRUCTIONS
};
};
# Display general upgrade instructions # Display general upgrade instructions
print $Upgrade; print $Upgrade;
}; };
@ -298,11 +290,11 @@ See L<doc/INSTALL> for an overview of possible configuration options.
=item B<-V>, B<--version> =item B<-V>, B<--version>
Display version and copyright information and exit. Print out version and copyright information and exit.
=item B<-h>, B<--help> =item B<-h>, B<--help>
Display this man page and exit. Print this man page and exit.
=item B<-u>, B<--update> I<version> =item B<-u>, B<--update> I<version>
@ -310,7 +302,7 @@ Don't do a fresh install, but update from I<version>.
=item B<--conffile> I<filename> =item B<--conffile> I<filename>
Read configuration from I<filename> instead of F<newsstats.conf>. Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back

View file

@ -167,9 +167,8 @@ time.
All reporting is done to I<syslog> via I<news> facility. If B<feedlog> All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
fails to initiate a database connection at startup, it will log to fails to initiate a database connection at startup, it will log to
I<syslog> with I<CRIT> priority and go in an endless loop, trying again I<syslog> with I<CRIT> priority and go in an endless loop, as
to connect every 5 seconds, as terminating would only result in a rapid terminating would only result in a rapid respawn.
respawn.
=head2 Configuration =head2 Configuration
@ -185,15 +184,15 @@ See L<doc/INSTALL> for an overview of possible configuration options.
=item B<-V>, B<--version> =item B<-V>, B<--version>
Display version and copyright information and exit. Print out version and copyright information and exit.
=item B<-h>, B<--help> =item B<-h>, B<--help>
Display this man page and exit. Print this man page and exit.
=item B<-d>, B<--debug> =item B<-d>, B<--debug>
Print debugging information to STDERR while parsing STDIN. You'll Output debugging information to STDERR while parsing STDIN. You'll
find that information most probably in your B<INN> F<errlog> file. find that information most probably in your B<INN> F<errlog> file.
=item B<-q>, B<--quiet> =item B<-q>, B<--quiet>
@ -202,7 +201,7 @@ Suppress logging to syslog.
=item B<--conffile> I<filename> =item B<--conffile> I<filename>
Read configuration from I<filename> instead of F<newsstats.conf>. Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back

View file

@ -23,7 +23,6 @@ use warnings;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders); use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
use DBI; use DBI;
use Encode qw(decode encode);
use Getopt::Long qw(GetOptions); use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling'); Getopt::Long::config ('bundling');
@ -32,7 +31,7 @@ Getopt::Long::config ('bundling');
# define types of information that can be gathered # define types of information that can be gathered
# all / groups (/ clients / hosts) # all / groups (/ clients / hosts)
my %LegalStats; my %LegalStats;
@LegalStats{('all','groups','hosts','clients')} = (); @LegalStats{('all','groups','hosts')} = ();
################################# Main program ################################# ################################# Main program #################################
@ -62,8 +61,8 @@ my %Conf = %{ReadConfig($OptConfFile)};
my %ConfOverride; my %ConfOverride;
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; $ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; $ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'TLH'} = $OptTLH if $OptTLH; $ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride); &OverrideConfig(\%Conf,\%ConfOverride);
@ -85,21 +84,6 @@ my ($Period) = &GetTimePeriod($OptMonth);
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ". &Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time'); "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
### init database
my $DBHandle = InitDB(\%Conf,1);
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
my $DBClients = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableClnts'});
### get data for each month
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
foreach my $Month (&ListMonth($Period)) {
print "---------- $Month ----------\n" if $OptDebug;
### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
### reformat $Conf{'TLH'} ### reformat $Conf{'TLH'}
my $TLH; my $TLH;
if ($Conf{'TLH'}) { if ($Conf{'TLH'}) {
@ -128,6 +112,21 @@ foreach my $Month (&ListMonth($Period)) {
$TLH = '(' . $TLH . ')'; $TLH = '(' . $TLH . ')';
}; };
}; };
### init database
my $DBHandle = InitDB(\%Conf,1);
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
### get data for each month
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
foreach my $Month (&ListMonth($Period)) {
print "---------- $Month ----------\n" if $OptDebug;
### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug); &GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
}; };
@ -139,16 +138,7 @@ foreach my $Month (&ListMonth($Period)) {
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl); united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@KnownHosts); &HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
};
### ClientStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'clients') {
# define agents/clients that shouldn't be counted
my @DropAgents = qw(debian fedora firefox gecko gentoo lightning mandriva mnenhy mozilla
pclinuxos perl php presto suse suse/opensuse thunderbrowse ubuntu version);
push(@DropAgents, 'red hat');
&ClientStats($DBHandle,$DBRaw,$DBClients,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@DropAgents);
}; };
}; };
@ -164,10 +154,10 @@ sub GroupStats {
### $DBRaw : database table for raw data (to read from) ### $DBRaw : database table for raw data (to read from)
### $DBGrps : database table for groups data (to write to) ### $DBGrps : database table for groups data (to write to)
### $Month : current month to do ### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $TLH : TLHs to collect ### $TLH : TLHs to collect
### $Checkgroupsfile : filename template for checkgroups file ### $Checkgroupsfile : filename template for checkgroups file
### (expanded to $Checkgroupsfile-$Month) ### (expanded to $Checkgroupsfile-$Month)
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode ### $Test : test mode
### $Debug : debug mode ### $Debug : debug mode
### OUT: (nothing) ### OUT: (nothing)
@ -262,24 +252,46 @@ sub HostStats {
### $DBRaw : database table for raw data (to read from) ### $DBRaw : database table for raw data (to read from)
### $DBHosts : database table for hosts data (to write to) ### $DBHosts : database table for hosts data (to write to)
### $Month : current month to do ### $Month : current month to do
### $TLH : TLHs to collect
### $MID : specific Message-ID to fetch (testing purposes) ### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode ### $Test : test mode
### $Debug : debug mode ### $Debug : debug mode
### @KnownHosts : list of known hosts with subdomains ### @KnownHosts : list of known hosts with subdomains
### OUT: (nothing) ### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBHosts,$Month,$TLH,$MID,$Test,$Debug,@KnownHosts) = @_; my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
my (%Postings,$DBQuery); my (%Postings,$DBQuery);
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID); if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
### ---------------------------------------------- ### ----------------------------------------------
print "----- HostStats -----\n" if $Debug; print "----- HostStats -----\n" if $Debug;
### parse headers ### parse headers
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) { while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
### skip postings with wrong TLH ### skip postings with wrong TLH
next if ($TLH && !CheckTLH($Newsgroups,$TLH)); # remove whitespace from contents of Newsgroups:
chomp($Newsgroups);
$Newsgroups =~ s/\s//;
my $GroupCount;
for (split /,/, $Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH
next if($TLH and !/^$TLH/);
$GroupCount++;
};
next if !$GroupCount;
my $Host; my $Host;
my %Header = ParseHeaders(split(/\n/,$Headers)); my %Header = ParseHeaders(split(/\n/,$Headers));
@ -332,13 +344,15 @@ sub HostStats {
} }
} }
# lowercase
$Host = lc($Host);
# count host # count host
if ($Host) { if ($Host) {
$Host = lc($Host);
$Postings{$Host}++; $Postings{$Host}++;
$Postings{'ALL'}++; $Postings{'ALL'}++;
} else { } else {
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !$Host; &Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
} }
printf("%s: %s\n", $Header{'message-id'}, $Host) if ($MID or $Debug && $Debug >1); printf("%s: %s\n", $Header{'message-id'}, $Host) if ($MID or $Debug && $Debug >1);
@ -367,363 +381,6 @@ sub HostStats {
}; };
}; };
sub ClientStats {
### ----------------------------------------------------------------------------
### collect number of postings per client (and version)
### IN : $DBHandle : database handle
### $DBRaw : database table for raw data (to read from)
### $DBClients : database table for clients data (to write to)
### $Month : current month to do
### $TLH : TLHs to collect
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode
### $Debug : debug mode
### @DropAgents : list of UserAgent "agents" that won't be counted
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBClients,$Month,$TLH,$MID,$Test,$Debug,@DropAgents) = @_;
my (%Postings,$DBQuery);
my %DropAgent = map { $_ => 1 } @DropAgents;
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
### ----------------------------------------------
print "----- ClientStats -----\n" if $Debug;
### parse headers
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
### skip postings with wrong TLH
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
my (@Clients, $Client, $Version);
my %Header = ParseHeaders(split(/\n/,$Headers));
### X-Mailer
if ($Header{'x-mailer'}) {
# transfer to x-newsreader and parse from there
$Header{'x-newsreader'} = $Header{'x-mailer'};
}
### X-Newsreader
if ($Header{'x-newsreader'}) {
$Header{'x-newsreader'} = RemoveComments($Header{'x-newsreader'});
# remove 'http://' and 'via' (CrossPoint)
$Header{'x-newsreader'} =~ s/https?:\/\///;
$Header{'x-newsreader'} =~ s/ ?via(.+)?$//;
# parse header
# User-Agent style
if ($Header{'x-newsreader'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
# transfer to user-agent and parse from there
$Header{'user-agent'} = $Header{'x-newsreader'};
# "client name version"
} elsif ($Header{'x-newsreader'} =~ / /) {
($Client, $Version) = ParseXNewsreader($Header{'x-newsreader'});
} else {
$Client = $Header{'x-newsreader'};
$Version = '';
}
if ($Client) {
# special cases
$Client = 'CrossPoint' if $Client =~ /^CrossPoint\//;
$Client = 'Virtual Access' if $Client =~ /^Virtual Access/;
my %UserAgent = (agent => $Client,
version => $Version);
push @Clients, { %UserAgent };
} else {
$Header{'user-agent'} = $Header{'x-newsreader'};
}
}
### User-Agent
if(!@Clients && $Header{'user-agent'}) {
$Header{'user-agent'} = RemoveComments($Header{'user-agent'});
### well-formed?
if ($Header{'user-agent'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
@Clients = ParseUserAgent($Header{'user-agent'});
} else {
# snip and add known well-formed agents from the trailing end
while ($Header{'user-agent'} =~ /(((Hamster)|(Hamster-Pg)|(KorrNews)|(OE-Tools)|(Mime-proxy))(\/[^\/ ]+))$/) {
push @Clients, ParseUserAgent($1);
$Header{'user-agent'} =~ s/ [^\/ ]+\/[^\/ ]+$//;
}
### special cases
# remove 'http://open-news-network.org'
$Header{'user-agent'} =~ s/^https?:\/\/open-news-network.org(\S+)?//;
# Thunderbird
if ($Header{'user-agent'} =~ /((Mozilla[- ])?Thunderbird) ?([0-9.]+)?/) {
$Client = 'Thunderbird';
$Version = $3;
# XP
} elsif ($Header{'user-agent'} =~ /((TrueXP|FreeXP|XP2(\/Agent)?)) \/(.+)$/) {
$Client = $1;
$Version = $4;
$Client = 'XP2' if $Client eq 'XP2/Agent';
### most general case
# client version
# client/version
# client/32 version
# - version may end in one non-numeric character
# - including trailing beta/pre/...
# 1) client: (([^0-9]+)|(\D+\/\d+))
# 2) version: (\S+\d\D?)
# 3) trailing: (( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?
} elsif ($Header{'user-agent'} =~ /^(([^0-9]+)|(\D+\/\d+))[\/ ]((\S+\d\D?)(( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?)$/) {
$Client = $1;
$Version = $4;
### some very special cases
# SeaMonkey/nn
} elsif ($Header{'user-agent'} =~ /SeaMonkey\/([0-9.]+)/) {
$Client = 'Seamonkey';
$Version = $1;
# Emacs nn/Gnus nn
} elsif ($Header{'user-agent'} =~ /Emacs [0-9.]+\/Gnus ([0-9.]+)/) {
$Client = 'Gnus';
$Version = $1;
# failed to parse
} else {
$Client = $Header{'user-agent'};
}
# count client, if found
if ($Client) {
my %UserAgent = (agent => $Client,
version => $Version);
push @Clients, { %UserAgent };
} else {
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !@Clients;
}
}
}
if (@Clients) {
$Postings{'ALL'}{'ALL'}++;
foreach (@Clients) {
# filter agents for User-Agent with multiple agents
next if $#Clients && exists($DropAgent{lc($_->{'agent'})});
# remove whitespace
$_->{'agent'} =~ s/^\s+|\s+$//g;
$_->{'version'} =~ s/^\s+|\s+$//g if $_->{'version'};
# encode to utf-8, if necessary
$_->{'agent'} = encode('UTF-8', $_->{'agent'}) if $_->{'agent'} =~ /[\x80-\x{ffff}]/;
$_->{'version'} = encode('UTF-8', $_->{'version'}) if $_->{'version'} and $_->{'version'} =~ /[\x80-\x{ffff}]/;
# truncate overlong clients or versions
$_->{'agent'} = substr($_->{'agent'}, 0, 150) if length($_->{'agent'}) > 150;
$_->{'version'} = substr($_->{'version'}, 0, 50) if $_->{'version'} and length($_->{'version'}) > 50;
# special cases
# Mozilla
$_->{'agent'} = 'Mozilla' if $_->{'agent'} eq '•Mozilla';
$_->{'agent'} =~ s/^Mozilla //;
# Forte Agent
$_->{'agent'} = 'Forte Agent' if $_->{'agent'} eq 'ForteAgent';
if ($_->{'agent'} eq 'Forte Agent') {
$_->{'version'} =~ s/-/\//;
$_->{'version'} = '' if $_->{'version'} eq '32Bit';
}
# count client ('ALL') and client/version (if version is present)
$Postings{$_->{'agent'}}{'ALL'}++;
$Postings{$_->{'agent'}}{$_->{'version'}}++ if $_->{'version'};
printf("%s: %s {%s}\n", $Header{'message-id'}, $_->{'agent'},
$_->{'version'} ? $Postings{$_->{'agent'}}{$_->{'version'}} : '')
if ($MID or $Debug && $Debug >1);
}
}
};
# delete old data for that month
if (!$Test) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
$DBClients),undef,$Month)
or &Bleat(2,sprintf("Can't delete old client data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBClients));
};
foreach my $Client (sort keys %Postings) {
foreach my $Version (sort keys %{$Postings{$Client}}) {
printf ("%s {%s}: %d\n",$Client,$Version,$Postings{$Client}{$Version}) if $Debug;
if (!$Test) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
"(month,client,version,postings) ".
"VALUES (?, ?, ?, ?)",$DBClients));
$DBQuery->execute($Month, $Client, $Version, $Postings{$Client}{$Version})
or &Bleat(2,sprintf("Can't write groups data for %s/%s/%s to %s: ".
"$DBI::errstr\n",$Month,$Client,$Version,$DBClients));
$DBQuery->finish;
};
}
};
};
sub GetHeaders {
### ----------------------------------------------------------------------------
### get (newsgroups and) raw headers from database
### IN : $DBHandle: database handle
### $DBRaw : database table for raw data (to read from)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### OUT: DBI statement handle
my ($DBHandle,$DBRaw,$Month,$MID) = @_;
my $DBQuery;
if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
return $DBQuery;
}
sub CheckTLH {
### ----------------------------------------------------------------------------
### count newsgroups from legal TLH(s)
### IN : $Newsgroups: comma separated list of newsgroups
### $TLH : (reference to an array of) legal TLH(s)
### OUT: number of newsgroups from legal TLH(s)
my ($Newsgroups,$TLH) = @_;
my (@TLH,$GroupCount);
# fill @TLH from $TLH, which can be an array reference or a scalar value
if (ref($TLH) eq 'ARRAY') {
@TLH = @{$TLH};
} else {
push @TLH, $TLH;
}
# remove whitespace from contents of Newsgroups:
chomp($Newsgroups);
$Newsgroups =~ s/\s//;
for (split /,/, $Newsgroups) {
my $Newsgroup = $_;
foreach (@TLH) {
# increment $GroupCount if $Newsgroup starts with $TLH
$GroupCount++ if $Newsgroup =~ /^$_/;
}
};
return $GroupCount;
}
sub RemoveComments {
### ----------------------------------------------------------------------------
### remove comments and other junk from header
### IN : $Header: a header
### OUT: the header, with comments and other junk removed
my $Header = shift;
# decode MIME encoded words
if ($Header =~ /=\?\S+\?[BQ]\?/) {
$Header = decode("MIME-Header",$Header);
}
# remove nested comments from '(' to first ')'
while ($Header =~ /\([^)]+\)/) {
$Header =~ s/\([^()]+?\)//;
}
# remove dangling ')'
$Header =~ s/\S+\)//;
# remove from dangling '(' to end of header
$Header =~ s/\(.+$//;
# remove from '[' to first ']'
$Header =~ s/\[[^\[\]]+?\]//;
# remove 'Nr. ... lebt'
$Header =~ s/Nr\. \d+ lebt//;
# remove nn:nn:nn
$Header =~ s/\d\d:\d\d:\d\d//;
# remove 'mm/... '
$Header =~ s/\/mm\/\S+//;
# remove ' DE' / _DE'
$Header =~ s/[ _]DE//;
# remove trailing 'eol' or '-shl'
# or ml-inews[-sig]
$Header =~ s/(eol)|(-shl)|(ml-inews(-sig)?)$//;
# remove from ';' or ',' (CrossPoint)
# or '&' to end of header
$Header =~ s/[;,&].+$//;
# remove from 'by ' or 'unter Windows' or '@ Windows'
# to end of header
$Header =~ s/((by )|(unter +Windows)|(@ Windows)).+$//;
# remove superfluous whitespace in header
# and whitespace around header
$Header =~ s/\s+/ /g;
$Header =~ s/^\s+|\s+$//g;
return $Header;
}
sub ParseXNewsreader {
### ----------------------------------------------------------------------------
### parse X-Newsreader header (client and version, if present)
### IN : $XNR: a X-Newsreader header
### OUT: client and version, if present
my $XNR = shift;
my ($Client, $Version);
foreach (split(/ /,$XNR)) {
# add to client name if no digit present
if (!/\d[0-9.]/ or /\/\d$/) {
$Client .= $_ . ' ' ;
# otherwise, use as version and terminate parsing
} else {
$Version = $_;
last;
}
}
# remove trailing whitespace
$Client =~ s/\s+$// if $Client;
# set $Version
$Version = '' if !$Version;
return $Client, $Version;
}
sub ParseUserAgent {
### ----------------------------------------------------------------------------
### parse User-Agent header (agent and version)
### IN : $UserAgent: a User-Agent header
### OUT: array of hashes (agent/version)
my $UserAgent = shift;
my @UserAgents;
# a well-formed User-Agent header will contain pairs of
# client/version, i.e. 'slrn/0.9.7.3'
foreach (split(/ /,$UserAgent)) {
my %UserAgent;
/^(.+)\/(.+)$/;
$UserAgent{'agent'} = $1;
$UserAgent{'version'} = $2;
push @UserAgents, { %UserAgent };
}
return @UserAgents;
}
__END__ __END__
################################ Documentation ################################# ################################ Documentation #################################
@ -734,7 +391,7 @@ gatherstats - process statistical data from a raw source
=head1 SYNOPSIS =head1 SYNOPSIS
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--conffile> I<filename>] B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -781,34 +438,23 @@ override that default through the B<--groupsdb> option.
=item B<hosts> (postings from host per month) =item B<hosts> (postings from host per month)
B<gatherstats> will examine Injection-Info:, X-Trace: and Path: B<gatherstats> will examine Injection-Info:, X-Trace: and Path:
headers and try to normalize them. The sum of all detected hosts will headers and try to normalize them. Groups not in I<TLH> will be
also be saved for each month. Groups not in I<TLH> will be ignored. ignored. The sum of all detected hosts will also saved for each month.
Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can
override that default through the B<--hostsdb> option. override that default through the B<--hostsdb> option.
=item B<clients> (postings by client per month)
B<gatherstats> will examine User-Agent:, X-Newsreader: and X-Mailer:
headers and try to remove comments and non-standard contents. Clients
and client versions are counted separately. The sum of all detected
clients will also be saved for each month. Groups not in I<TLH> will
be ignored.
Data is written to I<DBTableClnts> (see L<doc/INSTALL>); you can
override that default through the B<--clientsdb> option.
=back =back
=head2 Configuration =head2 Configuration
B<gatherstats> will read its configuration from F<newsstats.conf> B<gatherstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration which should be present in etc/ via Config::Auto or from a configuration file
file submitted by the B<--conffile> option. submitted by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
You can override configuration options by using the B<--hierarchy>, You can override configuration options via the B<--hierarchy>,
B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
respectively. respectively.
@ -818,15 +464,15 @@ respectively.
=item B<-V>, B<--version> =item B<-V>, B<--version>
Display version and copyright information and exit. Print out version and copyright information and exit.
=item B<-h>, B<--help> =item B<-h>, B<--help>
Display this man page and exit. Print this man page and exit.
=item B<-d>, B<--debug> =item B<-d>, B<--debug>
Print debugging information to STDOUT while processing (number of Output debugging information to STDOUT while processing (number of
postings per group). postings per group).
=item B<-t>, B<--test> =item B<-t>, B<--test>
@ -838,17 +484,15 @@ conjunction with B<--test> ... everything else seems a bit pointless.
Set processing period to a single month in YYYY-MM format or to a time Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). Defaults to last month. by a colon).
=item B<-s>, B<--stats> I<type> =item B<-s>, B<--stats> I<type>
Set processing type to one of I<all>, I<groups>, I<hosts> or I<clients>. Set processing type to one of I<all>, I<groups> or I<hosts>. Defaults
Defaults to I<all>. to all.
=item B<-c>, B<--checkgroups> I<filename template> =item B<-c>, B<--checkgroups> I<filename template>
Relevant only for newsgroup stats (I<groups>).
Check each group against a list of valid newsgroups read from a file, Check each group against a list of valid newsgroups read from a file,
one group on each line and ignoring everything after the first one group on each line and ignoring everything after the first
whitespace (so you can use a file in checkgroups format or (part of) whitespace (so you can use a file in checkgroups format or (part of)
@ -867,12 +511,10 @@ Newsgroups not found in the checkgroups file will be dropped (and
logged to STDERR), and newsgroups found there but having no postings logged to STDERR), and newsgroups found there but having no postings
will be added with a count of 0 (and logged to STDERR). will be added with a count of 0 (and logged to STDERR).
=item B<--hierarchy> I<TLH> (newsgroup hierarchy/hierarchies) =item B<--hierarchy> I<TLH> (newsgroup hierarchy)
Override I<TLH> from F<newsstats.conf>. Override I<TLH> from F<newsstats.conf>.
I<TLH> can be a single word or a comma-separated list.
=item B<--rawdb> I<table> (raw data table) =item B<--rawdb> I<table> (raw data table)
Override I<DBTableRaw> from F<newsstats.conf>. Override I<DBTableRaw> from F<newsstats.conf>.
@ -881,17 +523,17 @@ Override I<DBTableRaw> from F<newsstats.conf>.
Override I<DBTableGrps> from F<newsstats.conf>. Override I<DBTableGrps> from F<newsstats.conf>.
=item B<--hostsdb> I<table> (host data table)
Override I<DBTableHosts> from F<newsstats.conf>.
=item B<--clientsdb> I<table> (client data table) =item B<--clientsdb> I<table> (client data table)
Override I<DBTableClnts> from F<newsstats.conf>. Override I<DBTableClnts> from F<newsstats.conf>.
=item B<--hostsdb> I<table> (host data table)
Override I<DBTableHosts> from F<newsstats.conf>.
=item B<--conffile> I<filename> =item B<--conffile> I<filename>
Read configuration from I<filename> instead of F<newsstats.conf>. Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back

View file

@ -283,7 +283,7 @@ See L<doc/README>.
=head1 DESCRIPTION =head1 DESCRIPTION
This script creates reports on newsgroup usage (number of postings per This script create reports on newsgroup usage (number of postings per
group per month) taken from result tables created by group per month) taken from result tables created by
B<gatherstats.pl>. B<gatherstats.pl>.
@ -291,16 +291,16 @@ B<gatherstats.pl>.
=head3 Time period and newsgroups =head3 Time period and newsgroups
The time period to act on defaults to last month; you can assign The time period to act on defaults to last month; you can assign another
another time period or a single month (or drop all time constraints) time period or a single month (or drop all time constraints) via the
via the B<--month> option (see below). B<--month> option (see below).
B<groupstats> will process all newsgroups by default; you can limit B<groupstats> will process all newsgroups by default; you can limit
processing to only some newsgroups by supplying a list of those groups processing to only some newsgroups by supplying a list of those groups via
via B<--newsgroups> option (see below). You can include hierarchy B<--newsgroups> option (see below). You can include hierarchy levels in
levels in the output by adding the B<--sums> switch (see below). the output by adding the B<--sums> switch (see below). Optionally
Optionally newsgroups not present in a checkgroups file can be excluded newsgroups not present in a checkgroups file can be excluded from output,
from output, sse B<--checkgroups> below. sse B<--checkgroups> below.
=head3 Report type =head3 Report type
@ -321,27 +321,26 @@ below.
=head3 Sorting and formatting the output =head3 Sorting and formatting the output
By default, all results are grouped by month; you can group results by By default, all results are grouped by month; you can group results by
newsgroup instead via the B<--groupy-by> option. Within those groups, newsgroup instead via the B<--groupy-by> option. Within those groups, the
the list of newsgroups (or months) is sorted alphabetically (or list of newsgroups (or months) is sorted alphabetically (or
chronologically, respectively) ascending. You can change that order chronologically, respectively) ascending. You can change that order (and
(and sort by number of postings) with the B<--order-by> option. For sort by number of postings) with the B<--order-by> option. For details and
details and exceptions, please see below. exceptions, please see below.
The results will be formatted as a kind of table; you can change the The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of newsgroups and number output format to a simple list or just a list of newsgroups and number of
of postings with the B<--format> option. Captions will be added by means postings with the B<--format> option. Captions will be added by means of
of the B<--caption> option; all comments (and captions) can be supressed the B<--caption> option; all comments (and captions) can be supressed by
by using B<--nocomments>. using B<--nocomments>.
Last but not least you can redirect all output to a number of files, Last but not least you can redirect all output to a number of files, e.g.
e.g. one for each month, by submitting the B<--filetemplate> option, one for each month, by submitting the B<--filetemplate> option, see below.
see below.
=head2 Configuration =head2 Configuration
B<groupstats> will read its configuration from F<newsstats.conf> B<groupstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration which should be present in etc/ via Config::Auto or from a configuration file
file submitted by the B<--conffile> option. submitted by the B<--conffile> option.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
@ -353,18 +352,18 @@ You can override some configuration options via the B<--groupsdb> option.
=item B<-V>, B<--version> =item B<-V>, B<--version>
Display version and copyright information and exit. Print out version and copyright information and exit.
=item B<-h>, B<--help> =item B<-h>, B<--help>
Display this man page and exit. Print this man page and exit.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all> =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database. Defaults to last month. processing period to process the whole database.
=item B<-n>, B<--newsgroups> I<newsgroup(s)> =item B<-n>, B<--newsgroups> I<newsgroup(s)>
@ -389,20 +388,17 @@ See the B<gatherstats> man page for details.
This option does not work together with the B<--checkgroups> option as This option does not work together with the B<--checkgroups> option as
all "virtual" groups will not be present in the checkgroups file. all "virtual" groups will not be present in the checkgroups file.
False by default.
=item B<--checkgroups> I<filename> =item B<--checkgroups> I<filename>
Restrict output to those newgroups present in a file in checkgroups Restrict output to those newgroups present in a file in checkgroups format
format (one newgroup name per line; everything after the first (one newgroup name per line; everything after the first whitespace on each
whitespace on each line is ignored). All other newsgroups will be line is ignored). All other newsgroups will be removed from output.
removed from output.
Contrary to B<gatherstats>, I<filename> is not a template, but refers Contrary to B<gatherstats>, I<filename> is not a template, but refers to
to a single file in checkgroups format. a single file in checkgroups format.
The B<--sums> option will not work together with this option as The B<--sums> option will not work together with this option as "virtual"
"virtual" groups will not be present in the checkgroups file. groups will not be present in the checkgroups file.
=item B<-r>, B<--report> I<default|average|sums> =item B<-r>, B<--report> I<default|average|sums>
@ -410,8 +406,8 @@ Choose the report type: I<default>, I<average> or I<sums>
By default, B<groupstats> will report the number of postings for each By default, B<groupstats> will report the number of postings for each
newsgroup in each month. But it can also report the average number of newsgroup in each month. But it can also report the average number of
postings per group for all months or the total sum of postings per postings per group for all months or the total sum of postings per group
group for all months. for all months.
For report types I<average> and I<sums>, the B<group-by> option has no For report types I<average> and I<sums>, the B<group-by> option has no
meaning and will be silently ignored (see below). meaning and will be silently ignored (see below).
@ -430,13 +426,12 @@ Set the boundary type to one of I<default>, I<level>, I<average> or
I<sums>. I<sums>.
By default, all newsgroups with more postings per month than the upper By default, all newsgroups with more postings per month than the upper
boundary and/or less postings per month than the lower boundary will boundary and/or less postings per month than the lower boundary will be
be excluded from further processing. For the default report that means each
excluded from further processing. For the default report that means month only newsgroups with a number of postings between the boundaries
each month only newsgroups with a number of postings between the will be displayed. For the other report types, newsgroups with a number of
boundaries will be displayed. For the other report types, newsgroups postings exceeding the boundaries in all (!) months will not be
with a number of postings exceeding the boundaries in all (!) months considered.
will not be considered.
For example, lets take a list of newsgroups like this: For example, lets take a list of newsgroups like this:
@ -466,23 +461,22 @@ month. If you want to list all newsgroups with more than 25 postings
I<in total>, you'll have to set the boundary type to I<sum>, see below. I<in total>, you'll have to set the boundary type to I<sum>, see below.
A boundary type of I<level> will show only those newsgroups - at all - A boundary type of I<level> will show only those newsgroups - at all -
that satisfy the boundaries in each and every single month. With the that satisfy the boundaries in each and every single month. With the above
above list of newsgroups and list of newsgroups and
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>, C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
you'll get this result: you'll get this result:
----- All months: ----- All months:
de.comp.datenbanken.ms-access 293 de.comp.datenbanken.ms-access 293
de.comp.datenbanken.mysql has not been considered because it had less de.comp.datenbanken.mysql has not been considered because it had less than
than 25 postings in 2012-02 (only). 25 postings in 2012-02 (only).
You can use that to get a list of newsgroups that have more (or less) You can use that to get a list of newsgroups that have more (or less) then
then x postings in every month during the whole reporting period. x postings in every month during the whole reporting period.
A boundary type of I<average> will show only those newsgroups - at A boundary type of I<average> will show only those newsgroups - at all -that
all - that satisfy the boundaries on average. With the above list of satisfy the boundaries on average. With the above list of newsgroups and
newsgroups and
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>, C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
you'll get this result: you'll get this result:
@ -497,8 +491,8 @@ The average number of postings in the three groups is:
de.comp.datenbanken.mysql 48.33 de.comp.datenbanken.mysql 48.33
Last but not least, a boundary type of I<sums> will show only those Last but not least, a boundary type of I<sums> will show only those
newsgroups - at all - that satisfy the boundaries with the total sum newsgroups - at all - that satisfy the boundaries with the total sum of
of all postings during the reporting period. With the above list of all postings during the reporting period. With the above list of
newsgroups and newsgroups and
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>, C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
you'll finally get this result: you'll finally get this result:
@ -511,8 +505,8 @@ you'll finally get this result:
=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]> =item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
By default, all results are grouped by month, sorted chronologically By default, all results are grouped by month, sorted chronologically in
in ascending order, like this: ascending order, like this:
----- 2012-01: ----- 2012-01:
de.comp.datenbanken.ms-access 84 de.comp.datenbanken.ms-access 84
@ -531,8 +525,8 @@ B<--group-by> I<newsgroup>:
2012-01 88 2012-01 88
2012-02 21 2012-02 21
By appending I<-desc> to the group-by option parameter, you can By appending I<-desc> to the group-by option parameter, you can reverse
reverse the sort order - e.g. B<--group-by> I<month-desc> will give: the sort order - e.g. B<--group-by> I<month-desc> will give:
----- 2012-02: ----- 2012-02:
de.comp.datenbanken.ms-access 126 de.comp.datenbanken.ms-access 126
@ -547,9 +541,9 @@ this option will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]> =item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single newsgroup, see above), the Within each group (a single month or single newsgroup, see above), the
report will be sorted by newsgroup names in ascending alphabetical report will be sorted by newsgroup names in ascending alphabetical order
order by default. You can change the sort order to descending or sort by default. You can change the sort order to descending or sort by number
by number of postings instead. of postings instead.
=item B<-f>, B<--format> I<pretty|list|dump> =item B<-f>, B<--format> I<pretty|list|dump>
@ -593,19 +587,19 @@ False by default.
=item B<--comments|--nocomments> =item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by Add comments (group headers) to I<dump> and I<pretty> output. True by default
default as long as B<--filetemplate> is not set. as logn as B<--filetemplate> is not set.
Use I<--nocomments> to suppress anything except newsgroup names/months Use I<--nocomments> to suppress anything except newsgroup names/months and
and numbers of postings. numbers of postings.
=item B<--filetemplate> I<filename template> =item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<groupstats> Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
will create one file for each month (or each newsgroup, according to create one file for each month (or each newsgroup, accordant to the
the setting of B<--group-by>, see above), with filenames composed by setting of B<--group-by>, see above), with filenames composed by adding
adding year and month (or newsgroup names) to the I<filename template>, year and month (or newsgroup names) to the I<filename template>, for
for example with B<--filetemplate> I<stats>: example with B<--filetemplate> I<stats>:
stats-2012-01 stats-2012-01
stats-2012-02 stats-2012-02
@ -617,7 +611,7 @@ Override I<DBTableGrps> from F<newsstats.conf>.
=item B<--conffile> I<filename> =item B<--conffile> I<filename>
Read configuration from I<filename> instead of F<newsstats.conf>. Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back
@ -641,9 +635,9 @@ by number of postings, descending, in I<pretty> format:
groupstats --upper 30 --order-by postings-desc groupstats --upper 30 --order-by postings-desc
Show the total of all postings for the year of 2010 for all groups Show the total of all postings for the year of 2010 for all groups that
that had 30 postings or less in every single month in that year, had 30 postings or less in every single month in that year, ordered by
ordered by number of postings in descending order: number of postings in descending order:
groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
@ -657,6 +651,7 @@ machine-readable form (without formatting):
groupstats -m 2010-01:2010-12 -f dump --filetemplate stats groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
=head1 FILES =head1 FILES
=over 4 =over 4
@ -690,7 +685,7 @@ L<doc/README>
=item - =item -
L<doc/INSTALL> l>doc/INSTALL>
=item - =item -

165
bin/postingstats.pl Executable file → Normal file
View file

@ -15,8 +15,7 @@
# #
# Usage: # Usage:
# $~ groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups # $~ groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
# $~ hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts # $~ cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
# $~ clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
# #
BEGIN { BEGIN {
@ -54,22 +53,19 @@ if (!$Type) {
$Type = 'GroupStats'; $Type = 'GroupStats';
} elsif ($Type =~ /(host|server)s?/i) { } elsif ($Type =~ /(host|server)s?/i) {
$Type = 'HostStats'; $Type = 'HostStats';
} elsif ($Type =~ /(client|reader)s?/i) {
$Type = 'ClientStats';
}; };
my $Timestamp = time; my $Timestamp = time;
##### ----- configuration -------------------------------------------- ##### ----- configuration --------------------------------------------
my $TLH = 'de'; my $TLH = 'de';
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month, my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month, 'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month
'ClientStats' => 'Newsreaderstatistik fuer de.* im Monat '.$Month
); );
my %TH = ('counter' => 'Nr.', my %TH = ('counter' => 'Nr.',
'value' => 'Anzahl', 'value' => 'Anzahl',
'percentage' => 'Prozent' 'percentage' => 'Prozent'
); );
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN, 'ClientStats' => <<CLIENTSIN); my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN);
From: Thomas Hochstein <thh\@thh.name> From: Thomas Hochstein <thh\@thh.name>
Newsgroups: local.test Newsgroups: local.test
Subject: Postingstatistik fuer de.* im Monat $Month Subject: Postingstatistik fuer de.* im Monat $Month
@ -92,18 +88,7 @@ Content-Transfer-Encoding: 7bit
User-Agent: postingstats.pl/$VERSION (NewsStats) User-Agent: postingstats.pl/$VERSION (NewsStats)
HOSTSIN HOSTSIN
From: Thomas Hochstein <thh\@thh.name> my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT);
Newsgroups: local.test
Subject: Newsreaderstatistik fuer de.* im Monat $Month
Message-ID: <destat-clients-$Month.$Timestamp\@mid.news.szaf.org>
Approved: thh\@thh.name
Mime-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
User-Agent: postingstats.pl/$VERSION (NewsStats)
CLIENTSIN
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT, 'ClientStats' => <<CLIENTSOUT);
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin- Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach- dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
@ -125,19 +110,6 @@ wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
Loeschnachricht) erreicht haben. Loeschnachricht) erreicht haben.
HOSTSOUT HOSTSOUT
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
Loeschnachricht) erreicht haben. Versionsangaben werden nur gezaehlt,
wenn Sie ermittelbar sind; daher kann die Summe der Newsreader-Versionen
kleiner sein als die Postingzahl fuer den Newsreader. Ausserdem koennen
an einem Beitrag mehrere Clients beteiligt sein, bspw. der Newsreader
und ein lokaler Server wie der Hamster. Daher kann die Summe aller
Newsreader groesser sein als die Summe der Postings; auch ergeben die
Prozentzahlen dementsprechend in der Summe mehr als 100%.
CLIENTSOUT
##### ----- subroutines ---------------------------------------------- ##### ----- subroutines ----------------------------------------------
sub Percentage { sub Percentage {
@ -152,76 +124,35 @@ sub Divider {
return ':' . $Symbol x ($MaxLength+TABLEWIDTH) . ":\n"; return ':' . $Symbol x ($MaxLength+TABLEWIDTH) . ":\n";
} }
sub SingleVersion {
my ($LastName,$RSubValue,$RValue,$RMaxLength) = @_;
# get version to add to client name
my ($Version) = keys %{$$RSubValue{$LastName}};
$Version =~ s/^- //;
# add version to client name by creating a new name
# and deleting the old one
my ($NameVersion) = $LastName . ' ' . $Version;
$$RValue{$NameVersion} = $$RValue{$LastName};
delete($$RValue{$LastName});
$$RMaxLength = length($NameVersion) if length($NameVersion) > $$RMaxLength;
# delete single version
delete($$RSubValue{$LastName});
}
##### ----- main loop ------------------------------------------------ ##### ----- main loop ------------------------------------------------
my (%Value, %SubValue, $SubCounter, $LastName, $SumName, $SumTotal, my (%Value, $SumName, $SumTotal, $MaxLength);
$MaxLength); $MaxLength = 0;
if ($Type eq 'GroupStats') { if ($Type eq 'GroupStats') {
$SumName = "$TLH.ALL"; $SumName = "$TLH.ALL";
$TH{'name'} = 'Newsgroup' $TH{'name'} = 'Newsgroup'
} elsif ($Type eq 'HostStats') { } elsif ($Type eq 'HostStats') {
$SumName = 'ALL'; $SumName = 'ALL';
$TH{'name'} = 'Postingserver' $TH{'name'} = 'Server'
} elsif ($Type eq 'ClientStats') {
$SumName = 'ALL';
$TH{'name'} = 'Newsreader / Client'
} }
### read from STDIN # read from STDIN
$MaxLength = 0;
while(<>) { while(<>) {
my ($Name, $Value) = $_ =~ /(.+) (\d+)$/; my ($Name, $Value) = split;
$SumTotal = $Value if $Name eq $SumName; $SumTotal = $Value if $Name eq $SumName;
next if $Name =~ /ALL$/; next if $Name =~ /ALL$/;
# handle client versions
if ($Type eq 'ClientStats' and $Name =~ /^- /) {
$SubValue{$LastName}{$Name} = $Value;
$SubCounter++;
} else {
# clients with just one version
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
if ($LastName && $SubCounter == 1);
# reset version counter and client name
$SubCounter = 0;
$LastName = $Name;
$Value{$Name} = $Value; $Value{$Name} = $Value;
$MaxLength = length($Name) if length($Name) > $MaxLength; $MaxLength = length($Name) if length($Name) > $MaxLength;
} }
}
# clients with just one version (last iteration)
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
if ($LastName && $SubCounter == 1);
### print to STDOUT # print to STDOUT
# calculate padding for $Heading
my $PaddingLeft = ' ' x int((($MaxLength+TABLEWIDTH-2-length($Heading{$Type}))/2)); my $PaddingLeft = ' ' x int((($MaxLength+TABLEWIDTH-2-length($Heading{$Type}))/2));
my $PaddingRight = $PaddingLeft; my $PaddingRight = $PaddingLeft;
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) +2 < $MaxLength+TABLEWIDTH); $PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) < $MaxLength+TABLEWIDTH);
my $Counter = 0;
print $LeadIn{$Type}; print $LeadIn{$Type};
# print table header
print &Divider('=',$MaxLength); print &Divider('=',$MaxLength);
printf(": %s%s%s :\n",$PaddingLeft,$Heading{$Type},$PaddingRight); printf(": %s%s%s :\n",$PaddingLeft,$Heading{$Type},$PaddingRight);
print &Divider('=',$MaxLength); print &Divider('=',$MaxLength);
@ -232,26 +163,11 @@ printf(": %-3s : %-6s : %-7s : %-*s :\n",
$MaxLength,$TH{'name'}); $MaxLength,$TH{'name'});
print &Divider('-',$MaxLength); print &Divider('-',$MaxLength);
# print table
my $Counter = 0;
foreach my $Name (sort { $Value{$b} <=> $Value {$a}} keys %Value) { foreach my $Name (sort { $Value{$b} <=> $Value {$a}} keys %Value) {
$Counter++; $Counter++;
printf(": %3u. : %6u : %6.2f%% : %-*s :\n", printf(": %3u. : %6u : %6.2f%% : %-*s :\n",$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),$MaxLength,$Name);
$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),
$MaxLength,$Name);
# handle client versions
if ($SubValue{$Name}) {
foreach my $SubName (sort { $SubValue{$Name}{$b} <=> $SubValue{$Name}{$a} }
keys %{$SubValue{$Name}}) {
printf(": : %6u : %6.2f%% : %-*s :\n",
$SubValue{$Name}{$SubName},
&Percentage($SumTotal,$SubValue{$Name}{$SubName}),
$MaxLength,$SubName);
}
}
} }
# print table footer
print &Divider('-',$MaxLength); print &Divider('-',$MaxLength);
printf(": : %6u : %s : %-*s :\n",$SumTotal,'100.00%',$MaxLength,''); printf(": : %6u : %s : %-*s :\n",$SumTotal,'100.00%',$MaxLength,'');
print &Divider('=',$MaxLength); print &Divider('=',$MaxLength);
@ -268,7 +184,7 @@ postingstats - format and post reports
=head1 SYNOPSIS =head1 SYNOPSIS
B<postingstats> [B<-Vh>] [B<-t> I<groups|hosts|clients>] [B<-m> I<YYYY-MM>] B<postingstats> B<-t> I<groups|hosts> [B<-Vh> [B<-m> I<YYYY-MM>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -277,21 +193,19 @@ See L<doc/README>.
=head1 DESCRIPTION =head1 DESCRIPTION
This script will re-format reports on newsgroup usage created by This script will re-format reports on newsgroup usage created by
B<groupstats.pl>, B<hoststats.pl> or B<clientstats.pl> and create a B<groupstats.pl> or B<cliservstats.pl> and create a message that can
message that can be posted to Usenet. be posted to Usenet.
=head2 Features and options =head2 Features and options
B<postingstats> will create a table with entries numbered from most B<postingstats> will create a table with entries numbered from most
to least and percentages calculated from the sum total of all values. to least and percentages calculated from the sum total of all values.
It depends on a sorted list on STDIN in I<dump> format with I<sums>; It depends on a sorted list on STDIN in I<dump> format with I<sums>.
I<versions> from B<clientstas.pl> are optional.
B<postingstats> needs a B<--type> and a B<--month> to create a caption B<postingstats> needs a B<--type> and a B<--month> to create a caption
and select matching lead-ins and lead-outs. B<--type> is also needed and select matching lead-ins and lead-outs. B<--type> is also needed
to catch the correct sum total from input which differs between I<groups> to catch the correct sum total from input.
on one hand and I<hosts> or I<clients> on the other hand.
It will default to posting statistics (number of postings per group) It will default to posting statistics (number of postings per group)
and last month. and last month.
@ -310,12 +224,12 @@ C<----- configuration -----> section.
=item C<$TLH> =item C<$TLH>
Top level hierarchy the report was created for. Used for display and Top level hierarchy the report was created for. Used for display and
sum total (only for I<groups>). sum total.
=item C<%Heading> =item C<%Heading>
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>. Hash with keys for I<GroupStats> and I<HostStats>. Used to display a
Used to display a heading. heading.
=item C<%TH> =item C<%TH>
@ -328,14 +242,14 @@ Output will be truncated otherwise.
=item C<%LeadIn> =item C<%LeadIn>
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>. Hash with keys for I<GroupStats> and I<HostStats>. Used to create the
Used to create the headers for the postings. Can contain other text headers for our posting. Can contain other text that will be shown
that will be shown before C<%Heading>. before C<%Heading>.
=item C<%LeadOut> =item C<%LeadOut>
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>. Hash with keys for I<GroupStats> and I<HostStats>. Will be shown at the
Will be shown at the end of the posting. end of our posting.
=back =back
@ -345,20 +259,19 @@ Will be shown at the end of the posting.
=item B<-V>, B<--version> =item B<-V>, B<--version>
Display version and copyright information and exit. Print out version and copyright information and exit.
=item B<-h>, B<--help> =item B<-h>, B<--help>
Display this man page and exit. Print this man page and exit.
=item B<-t>, B<--type> I<groups|hosts|clients> =item B<-t>, B<--type> I<groups|hosts>
Set report type to posting statistics, hosts statistics or client Set report type to posting statistics or hosts statistics accordingly.
statistics accordingly.
=item B<-m>, B<--month> I<YYYY-MM> =item B<-m>, B<--month> I<YYYY-MM>
Set month (for display only). Set month for display.
=back =back
@ -374,15 +287,11 @@ Create a posting from a posting statistics report for last month:
Create a posting from a posting statistics report for 2012-01: Create a posting from a posting statistics report for 2012-01:
groupstats.pl --nocomments --sums --format dump -m 2012-01 | postingstats.pl -t groups -m 2012-01 groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups -m 2012-01
Create a posting from a host statistics report for last month: Create a posting from a host statistics report for last month:
hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
Create a posting from a client statistics report for last month:
clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
=head1 FILES =head1 FILES
@ -417,7 +326,7 @@ L<doc/README>
=item - =item -
L<doc/INSTALL> l>doc/INSTALL>
=item - =item -
@ -425,11 +334,7 @@ groupstats -h
=item - =item -
hoststats -h cliservstats -h
=item -
clientstats -h
=back =back

14
contrib/dopostingstats.sh Executable file → Normal file
View file

@ -1,13 +1,9 @@
#!/bin/bash #!/bin/bash
# installation path is /srv/newsstats/, please adjust accordingly # installation path is /srv/newsstats/, please adjust accordingly
if [[ $1 =~ [0-9]{4}-[0-9]{2} ]]; then
# get month /srv/newsstats/bin/groupstats.pl --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
MONTH=$1 /srv/newsstats/bin/cliservstats.pl -t server --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl -t server --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
if ! [[ $1 =~ [0-9]{4}-[0-9]{2} ]]; then else
MONTH=$(date -d "$(date +%Y-%m-15) -1 month" '+%Y-%m') echo 'Input error, please use dopostingstats.sh YYYY-MM'
fi fi
# post stats
/srv/newsstats/bin/groupstats.pl --nocomments --sums --format dump --month $MONTH | /srv/newsstats/bin/postingstats.pl --month $MONTH | /srv/newsstats/contrib/tinews.pl -X -Y
/srv/newsstats/bin/hoststats.pl --nocomments --sums --format dump --month $MONTH | /srv/newsstats/bin/postingstats.pl -t server --month $MONTH | /srv/newsstats/contrib/tinews.pl -X -Y
/srv/newsstats/bin/clientstats.pl --nocomments --sums --versions --format dump --month $MONTH | /srv/newsstats/bin/postingstats.pl -t client --month $MONTH | /srv/newsstats/contrib/tinews.pl -X -Y

0
contrib/tinews.pl Executable file → Normal file
View file

0
contrib/yearstats.sh Executable file → Normal file
View file

View file

@ -1,23 +1,3 @@
NewsStats 0.4.0 (2025-06-02)
* Reformat $Conf{TLH} for GroupStats only.
* Extract TLH check from HostStats to subroutine, fix no-op check.
* Extract getting raw headers from HostStats to subroutine.
* Improve documentation for config file.
* ParseHeader: re-merge continuation lines.
* Add ClientStats to gatherstats.
* Move cliservstats to hoststats.
* Add clientstats (for clients).
* Add ClientStats to postingstats.
* gatherstats: Don't die on parsing errors.
* DBClnts: set version length to 50.
* gatherstats: Truncate overlong clients or versions.
* gatherstats: Remove whitespace from client and version.
* Fix version queries.
* Add ClientStats to dopostingstats.
* Let dopostingstats default to last month.
* Set executable bit for new scripts.
* Update documentation.
NewsStats 0.3.0 (2025-05-18) NewsStats 0.3.0 (2025-05-18)
* Extract GroupStats (in gatherstats) to subroutine. * Extract GroupStats (in gatherstats) to subroutine.
* Add ParseHeader() to library. * Add ParseHeader() to library.

View file

@ -1,12 +1,11 @@
NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name> NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
NewsStats is a software package that can be used to collect NewsStats is a software package used to gather statistical information
statistical information from a live Usenet feed and then analyze it from a live Usenet feed and for its subsequent examination.
to create statistical reports.
This package is free software; you can redistribute it and/or modify This script package is free software; you can redistribute it and/or
it under the terms of the GNU Public License as published by the Free modify it under the terms of the GNU Public License as published by
Software Foundation. the Free Software Foundation.
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -17,10 +16,9 @@ INSTALLATION INSTRUCTIONS
* Download the current version of NewsStats from * Download the current version of NewsStats from
<https://th-h.de/net/software/newsstats/>. <https://th-h.de/net/software/newsstats/>.
* Untar it into a directory of your choice, i.e. /srv/newsstats: * Untar it into a directory of your choice:
$ cd /srv # tar -xzf newsstats-nn.tar.gz
$ tar -xzf newsstats-n.n.n.tar.gz
Scripts in this path - at least feedlog.pl - should be executable by the Scripts in this path - at least feedlog.pl - should be executable by the
news user. news user.
@ -30,8 +28,8 @@ INSTALLATION INSTRUCTIONS
* Copy the sample configuration file newsstats.conf.sample to * Copy the sample configuration file newsstats.conf.sample to
newsstats.conf and modify it for your purposes: newsstats.conf and modify it for your purposes:
$ cp etc/newsstats.conf.sample etc/newsstats.conf # cp etc/newsstats.conf.sample etc/newsstats.conf
$ vim etc/newsstats.conf # vim etc/newsstats.conf
a) Mandatory configuration options a) Mandatory configuration options
@ -62,28 +60,22 @@ INSTALLATION INSTRUCTIONS
* DBTableHosts = hosts_de * DBTableHosts = hosts_de
Table holding data on postings per server. Table holding data on postings per server.
* DBTableClnts = clients_de
Table holding data on postings per client.
b) Optional configuration options b) Optional configuration options
* TLH = de.alt,news.admin * TLH = de
Limit examination to that top-level hierarchy/hierarchies. Limit examination to that top-level hierarchy.
Comma-separated list.
3) Database (mysql) setup 3) Database (mysql) setup
* Setup your database server with an username, a password and * Setup your database server with a username, password and
(optionally) a database matching the NewsStats configuration database matching the NewsStats configuration (see 2 a).
(see 2 a).
* Start the database creation script: * Start the database creation script:
$ bin/dbcreate.pl # bin/dbcreate.pl
It will create the database (if not already present), create the It will setup the necessary database tables and display some
necessary database tables and display some information on the information on the next steps.
next steps.
4) Feed (INN) setup 4) Feed (INN) setup
@ -97,39 +89,39 @@ INSTALLATION INSTRUCTIONS
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
* You should only feed that hierarchy (those hierarchies ...) to * You should only feed that hierarchy (those hierarchies ...) to
feedlog.pl that you want to cover with your statistical analysis. feedlog.pl you'll want to cover with your statistical
It may be a good idea to setup different feeds (to different examination. It may be a good idea to setup different feeds (to
databases ...) for different hierarchies. different databases ...) for different hierarchies.
* Please double check that your path to feedlog.pl is correct and * Please double check that your path to feedlog.pl is correct and
feedlog.pl can be executed by the news user feedlog.pl can be executed by the news user
* Check your 'newsfeeds' syntax: * Check your 'newsfeeds' syntax:
$ ctlinnd checkfile # ctlinnd checkfile
* Reload 'newsfeeds': * Reload 'newsfeeds':
$ ctlinnd reload newsfeeds 'Adding newsstats! feed' # ctlinnd reload newsfeeds 'Adding newsstats! feed'
* Watch your 'news.notice' and 'errlog' files: * Watch your 'news.notice' and 'errlog' files:
$ tail -f /var/log/news/news.notice # tail -f /var/log/news/news.notice
... ...
$ tail -f /var/log/news/errlog # tail -f /var/log/news/errlog
Everything should be going smoothly now. Everything should be going smoothly now.
* If INN is spewing error messages to 'errlog' or reporting * If INN is spewing error messages to 'errlog' or reporting
continous respawns of feedlog.pl to 'news.notice', stop your feed: continous respawns of feedlog.pl to 'news.notice', stop your feed:
$ ctlinnd drop 'newsstats!' # ctlinnd drop 'newsstats!'
and investigate. 'errlog' may be helpful here. and investigate. 'errlog' may be helpful here.
* You can restart the feed with * You can restart the feed with
$ ctlinnd begin 'newsstats!' # ctlinnd begin 'newsstats!'
later. later.

View file

@ -1,21 +1,21 @@
NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name> NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
NewsStats is a software package for gathering statistical data live NewsStats is a software package for gathering statistical data live
from a Usenet feed and subsequent analysis. from a Usenet feed and subsequent examination.
This package is free software; you can redistribute it and/or modify This script package is free software; you can redistribute it and/or
it under the terms of the GNU Public License as published by the Free modify it under the terms of the GNU Public License as published by
Software Foundation. the Free Software Foundation.
--------------------------------------------------------------------- ---------------------------------------------------------------------
What's that? What's that?
There's a multitude of tools to create statistics about newsgroup There's a multitude of tools for the statistical examination of
usage: number of postings per month or per person, longest threads, newsgroups: number of postings per month or per person, longest
and so on (see <https://th-h.de/net/usenet/stats/> [German language] threads, and so on (see <https://th-h.de/net/usenet/stats/>
for an incomplete list). Most of them use a per-newsgroup approach [German language] for an incomplete list). Most of them use a per-
while NewsStats is hierarchy oriented. newsgroup approach while NewsStats is hierarchy oriented.
NewsStats will accumulate data from a live INN feed, allowing you NewsStats will accumulate data from a live INN feed, allowing you
to process the saved information later on. to process the saved information later on.
@ -40,9 +40,7 @@ Prerequisites
* Perl 5.8.x with standard modules * Perl 5.8.x with standard modules
- Cwd - Cwd
- Encode
- File::Basename - File::Basename
- Getopt::Long
- Sys::Syslog - Sys::Syslog
* Perl modules from CPAN * Perl modules from CPAN
@ -52,7 +50,7 @@ Prerequisites
* mysql 5.0.x * mysql 5.0.x
* a working installation of INN * working installation of INN
Installation instructions Installation instructions
@ -69,52 +67,15 @@ Getting Started
table. See the feedlog.pl man page for more information. table. See the feedlog.pl man page for more information.
You can process that data via 'gatherstats.pl'; currently the You can process that data via 'gatherstats.pl'; currently the
tabulation of postings per group, injection server and posting tabulation of postings per group and injection server per month is
agent (newsreader) per month is supported. See the gatherstats.pl supported. Tabulation of clients (newsreaders) is planned. See
man page for more information. the gatherstats.pl man page for more information.
Example:
bin/gatherstats.pl
will parse raw data from the last month and save the results in
tables for postings per group, server and client, respectively.
Report generation is handled by specialised scripts for each Report generation is handled by specialised scripts for each
report type: 'groupstats.pl' for postings per group report type. Currently reports on the number of postings per group
(s), 'hoststats.pl' for postings per injection server and month and injection server and month are supported; you can
(s) and 'clientstats.pl' for postings per posting agent. See the use 'groupstats.pl' and 'cliservstats.pl' for that. See the
groupstats.pl, hoststats.pl and clientstats.pl man pages for more groupstats.pl and cliservstats.pl man pages for more information.
information.
Example:
bin/groupstats.pl -o postings-desc
bin/hoststats.pl -o postings-desc
bin/clientstats.pl -o postings-desc -v
will show reports for postings per group, per injection server and
per client (with detailed client versions) for the last month,
using the result tables filled by gatherstats.
To post those reports to Usenet, change postingstats.pl according
to your needs (sender, newsgroups and other headers, translation
of table headers and text templates) and display a test posting
by piping report data into postingstats.pl:
bin/groupstats.pl --nocomments -s -f dump | bin/postingstats.pl
If the result is to your liking, add a pipe to a inews
implementation.
Example:
bin/groupstats.pl --nocomments -s -f dump | bin/postingstats.pl | contrib/tinews.pl -X
More information
See the man pages for 'gatherstats' and the report generating
scripts.
Reporting Bugs Reporting Bugs
@ -126,7 +87,7 @@ Reporting Bugs
Development Development
This package is maintained using the Git version control system at This program is maintained using the Git version control system at
<https://code.virtcomm.de/thh/newsstats/>. <https://code.virtcomm.de/thh/newsstats/>.
Related projects Related projects

View file

@ -1,10 +1,12 @@
NewsStats To-Do List NewsStats To-Do List
==================== ====================
This is a list of possible bug fixes, improvements and enhancements for This is a list of planned bug fixes, improvements and enhancements for
NewsStats. NewsStats.
* General * General
- Improve Documentation
The documentation is rather sparse and could use some improvement.
- Add a test suite - Add a test suite
There is currently no kind of test suite or regression tests. Something like There is currently no kind of test suite or regression tests. Something like
that is badly needed. that is badly needed.
@ -25,6 +27,8 @@ NewsStats.
for late creation and deletion), optionally including the previously for late creation and deletion), optionally including the previously
mentioned information; and you should be able to get the history of any mentioned information; and you should be able to get the history of any
group. group.
- Add other reports
NewsStats should include some other kinds of reports (stats on used clients)
- Add tools for database management - Add tools for database management
NewsStats should offer tools e.g. to inject postings into the 'raw' database, NewsStats should offer tools e.g. to inject postings into the 'raw' database,
or to split databases. or to split databases.
@ -49,11 +53,23 @@ NewsStats.
Some other tests - working database connection, valid database and table Some other tests - working database connection, valid database and table
names - would be nice. names - would be nice.
+ gatherstats.pl + install/install.pl
- Use hierarchy information (see GroupInfo above) - Read current version from a file dropped and updated by installer
- Add / enhance / test error handling - Add / enhance / test error handling
- General tests and optimisations
+ groupstats.pl, hoststats.pl, clientstats.pl
- better modularisation (code reuse) + feedlog.pl
- Add / enhance / test error handling
- General tests and optimisations
+ gatherstats.pl
- Use hierarchy information (see GroupInfo above)
- Add gathering of other stats (clients, ...)
- better modularisation (code reuse for other reports!)
- Add / enhance / test error handling
- General tests and optimisations
+ groupstats.pl
- better modularisation (code reuse for other reports!)
- Add / enhance / test error handling - Add / enhance / test error handling
- General tests and optimisations - General tests and optimisations

View file

@ -13,9 +13,7 @@ DBDatabase = newsstats
DBTableRaw = raw_de DBTableRaw = raw_de
DBTableGrps = groups_de DBTableGrps = groups_de
DBTableHosts = hosts_de DBTableHosts = hosts_de
DBTableClnts = clnts_de #DBTableClnts =
### hierarchy configuration ### hierarchy configuration
# comma-separated list of TLHs to parse
# newsgroups not starting with one of those patterns are not counted
TLH = de TLH = de

View file

@ -49,8 +49,9 @@ require Exporter;
Output => [qw(OutputData FormatOutput)], Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
SQLSetBounds SQLBuildClause GetMaxLength)]); SQLSetBounds SQLBuildClause GetMaxLength)]);
$VERSION = '0.4.0'; $VERSION = '0.3.0';
use Data::Dumper;
use File::Basename; use File::Basename;
use Cwd qw(realpath); use Cwd qw(realpath);
@ -279,8 +280,7 @@ sub ParseHeaders {
} elsif (/^\s/) { } elsif (/^\s/) {
# continuation lines # continuation lines
if ($Label) { if ($Label) {
s/^\s+/ /; $Header{lc($Label)} .= "\n$_";
$Header{lc($Label)} .= $_;
} else { } else {
warn (sprintf("Non-header line: %s\n",$_)); warn (sprintf("Non-header line: %s\n",$_));
} }
@ -439,7 +439,8 @@ sub OutputData {
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $LeadIn, $FileTempl, my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $LeadIn, $FileTempl,
$DBQuery, $PadField, $PadValue) = @_; $DBQuery, $PadField, $PadValue) = @_;
my %ValidKeys = %{$ValidKeys} if $ValidKeys; my %ValidKeys = %{$ValidKeys} if $ValidKeys;
my ($LastIteration, $FileName, $Handle, $OUT); my ($FileName, $Handle, $OUT);
our $LastIteration;
# define output types # define output types
my %LegalOutput; my %LegalOutput;
@ -479,7 +480,7 @@ sub OutputData {
$Handle = $OUT; $Handle = $OUT;
}; };
print $Handle &FormatOutput($Format, $Comments, $LeadIn, $Caption, print $Handle &FormatOutput($Format, $Comments, $LeadIn, $Caption,
$Key, $Value, $Precision, $PadField, $PadValue, $LastIteration); $Key, $Value, $Precision, $PadField, $PadValue);
$LastIteration = $Caption; $LastIteration = $Caption;
}; };
close $OUT if ($FileTempl); close $OUT if ($FileTempl);
@ -499,26 +500,24 @@ sub FormatOutput {
### $PadValue : padding length for value field (optional) for 'pretty' ### $PadValue : padding length for value field (optional) for 'pretty'
### OUT: $Output: formatted output ### OUT: $Output: formatted output
my ($Format, $Comments, $LeadIn, $Caption, $Key, $Value, $Precision, $PadField, my ($Format, $Comments, $LeadIn, $Caption, $Key, $Value, $Precision, $PadField,
$PadValue, $LastIteration) = @_; $PadValue) = @_;
my ($Output); my ($Output);
# keep last caption in mind
our ($LastIteration);
# create one line of output # create one line of output
if ($Format eq 'dump') { if ($Format eq 'dump') {
# output as dump (key value) # output as dump (key value)
$Output = sprintf ("# %s:\n",$Caption) $Output = sprintf ("# %s:\n",$Caption)
if ($Caption and $Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
$Output .= sprintf ("%s %u\n",$Key,$Value); $Output .= sprintf ("%s %u\n",$Key,$Value);
} elsif ($Format eq 'list') { } elsif ($Format eq 'list') {
# output as list (caption key value) # output as list (caption key value)
if ($Caption) {
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
} else {
$Output = sprintf ("%s %u\n",$Key,$Value);
}
} elsif ($Format eq 'pretty') { } elsif ($Format eq 'pretty') {
# output as a table # output as a table
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) { if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) {
$Output = $LeadIn; $Output = $LeadIn;
$Output .= sprintf ("# ----- %s:\n",$Caption) if $Caption; $Output .= sprintf ("# ----- %s:\n",$Caption);
} }
# increase $PadValue for numbers with decimal point # increase $PadValue for numbers with decimal point
$PadValue += $Precision+1 if $Precision; $PadValue += $Precision+1 if $Precision;
@ -583,7 +582,7 @@ sub SQLSortOrder {
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
### $OrderBy: secondary sort by month/newsgroups (default) ### $OrderBy: secondary sort by month/newsgroups (default)
### or number of 'postings' ### or number of 'postings'
### $Type : newsgroup, host or client+version ### $Type : newsgroup, host, client
### OUT: a SQL ORDER BY clause ### OUT: a SQL ORDER BY clause
my ($GroupBy,$OrderBy,$Type) = @_; my ($GroupBy,$OrderBy,$Type) = @_;
my ($GroupSort,$OrderSort) = ('',''); my ($GroupSort,$OrderSort) = ('','');
@ -641,11 +640,9 @@ sub SQLGroupList {
### OUT: SQL code to become part of a 'WHERE' clause, ### OUT: SQL code to become part of a 'WHERE' clause,
### list of names for SQL bindings ### list of names for SQL bindings
my ($Names,$Type) = @_; my ($Names,$Type) = @_;
my $InvalidCharRegExp;
# substitute '*' wildcard with SQL wildcard character '%' # substitute '*' wildcard with SQL wildcard character '%'
$Names =~ s/\*/%/g; $Names =~ s/\*/%/g;
$InvalidCharRegExp = ',;' if $Type eq 'client'; return (undef,undef) if !CheckValidNames($Names);
return (undef,undef) if !CheckValidNames($Names,$InvalidCharRegExp);
# just one name/newsgroup? # just one name/newsgroup?
return (SQLGroupWildcard($Names,$Type),$Names) if $Names !~ /:/; return (SQLGroupWildcard($Names,$Type),$Names) if $Names !~ /:/;
my ($SQL,@WildcardNames,@NoWildcardNames); my ($SQL,@WildcardNames,@NoWildcardNames);
@ -809,10 +806,9 @@ sub CheckValidNames {
################################################################################ ################################################################################
### syntax check of a list ### syntax check of a list
### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*) ### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
### InvalidCharRegExp: regular expression for invalid characters
### OUT: boolean ### OUT: boolean
my ($Names,$InvalidCharRegExp) = @_; my ($Names) = @_;
$InvalidCharRegExp = ',; ' if (!$InvalidCharRegExp); my $InvalidCharRegExp = ',; ';
return ($Names =~ /[$InvalidCharRegExp]/) ? 0 : 1; return ($Names =~ /[$InvalidCharRegExp]/) ? 0 : 1;
}; };