Compare commits

...

16 commits

Author SHA1 Message Date
Thomas Hochstein 23a28815b2 gatherstats: Don't die on parsing errors.
Just warn if host or client can't be
identified.

Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-31 00:02:44 +02:00
Thomas Hochstein 39e845d552 Add ClientStats to postingstats.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-31 00:02:44 +02:00
Thomas Hochstein 09a9112679 Fix CheckValidNames().
- Make RegExp configurable.
- Change default for clients
  (client names have spaces).

Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-31 00:02:44 +02:00
Thomas Hochstein 66a175c7f8 Add clientstats (for clients).
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-31 00:02:44 +02:00
Thomas Hochstein 963f07432c Move cliservstats to hoststats.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-31 00:02:44 +02:00
Thomas Hochstein a553b374ce Add ClientStats to gatherstats.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-31 00:02:44 +02:00
Thomas Hochstein 3e73346b20 OutputData(): Change handover of LastIteration.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-30 19:48:35 +02:00
Thomas Hochstein eea296391c ParseHeader will now re-merge continuation lines.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-30 12:54:36 +02:00
Thomas Hochstein d194ef754f Move lc() to counting.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-29 19:04:16 +02:00
Thomas Hochstein c985e29b7e Improve documentation for config file.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-29 18:57:50 +02:00
Thomas Hochstein f78d4c2158 Refactor getting raw headers.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-29 18:57:50 +02:00
Thomas Hochstein 995173456b Refactor and fix TLH check.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-29 18:30:35 +02:00
Thomas Hochstein 3447cdabff Reformat Conf(TLH) for GroupStats only.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-29 18:30:35 +02:00
Thomas Hochstein 671ae67be0 Fix typo.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-29 17:11:49 +02:00
Thomas Hochstein 6122d1a49d Fix POD.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-18 17:20:36 +02:00
Thomas Hochstein 988e7b2f13 Bump version.
Signed-off-by: Thomas Hochstein <thh@thh.name>
2025-05-18 17:17:22 +02:00
12 changed files with 1272 additions and 218 deletions

598
bin/clientstats.pl Normal file
View file

@ -0,0 +1,598 @@
#! /usr/bin/perl
#
# clientstats.pl
#
# This script will get statistical data on newsreader (client) usage
# from a database.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Main program #################################
### read commandline options
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
$OptReportType,$OptSums,$UppBound,$OptVersions,$OptConfFile);
GetOptions ('c|captions!' => \$OptCaptions,
'comments!' => \$OptComments,
'db=s' => \$OptDB,
'filetemplate=s' => \$OptFileTemplate,
'f|format=s' => \$OptFormat,
'g|group-by=s' => \$OptGroupBy,
'l|lower=i' => \$LowBound,
'm|month=s' => \$OptMonth,
'n|names=s' => \$OptNames,
'o|order-by=s' => \$OptOrderBy,
'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums,
'u|upper=i' => \$UppBound,
'v|versions!' => \$OptVersions,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
# parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# parse $OptReportType
if ($OptReportType) {
if ($OptReportType =~ /sums?/i) {
$OptReportType = 'sum';
} else {
$OptReportType = 'default';
}
}
### read configuration
my %Conf = %{ReadConfig($OptConfFile)};
### set DBTable
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
$Conf{'DBTable'} = $OptDB if $OptDB;
### init database
my $DBHandle = InitDB(\%Conf,1);
### get time period and names, prepare SQL 'WHERE' clause
# get time period
# and set caption for output and expression for SQL 'WHERE' clause
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# bail out if --month is invalid
&Bleat(2,"--month option has an invalid format - ".
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
# get list of clients and set expression for SQL 'WHERE' clause
# with placeholders as well as a list of names to bind to them
my ($SQLWhereNames,@SQLBindNames);
if ($OptNames) {
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'client');
# bail out if --names is invalid
&Bleat(2,"--names option has an invalid format!")
if !$SQLWhereNames;
}
### build SQL WHERE clause
my $ExcludeSums = $OptSums ? '' : "client != 'ALL'";
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums,"version = 'ALL'",
&SQLSetBounds('default',$LowBound,$UppBound));
### get sort order and build SQL 'ORDER BY' clause
# force to 'month' for $OptReportType 'sum'
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
# default to 'name' if $OptGroupBy is not set and
# just one name is requested, but more than one month
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNames and $OptNames !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'client, version');
# $GroupBy will contain 'month' or 'client, version' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
### get report type and build SQL 'SELECT' query
my $SQLSelect;
my $SQLGroupClause = '';
if ($OptReportType and $OptReportType ne 'default') {
$SQLGroupClause = "GROUP BY client, version";
# change $SQLOrderClause: replace everything before 'postings'
$SQLOrderClause =~ s/BY.+postings/BY postings/;
$SQLSelect = "'All months',LEFT(client,40),SUM(postings)";
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
$SQLOrderClause =~ s/postings/SUM(postings)/;
} else {
$SQLSelect = "month,LEFT(client,40),postings";
};
### get length of longest name delivered by query
### for formatting purposes
my $Field = ($GroupBy eq 'month') ? 'LEFT(client,40)' : 'month';
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
$Field,'postings',$SQLWhereClause,
'',@SQLBindNames);
### build and execute SQL query
my ($DBQuery);
# prepare query
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
$SQLSelect,
$Conf{'DBDatabase'},$Conf{'DBTable'},
$SQLWhereClause,$SQLGroupClause,
$SQLOrderClause));
# execute query
$DBQuery->execute(@SQLBindNames)
or &Bleat(2,sprintf("Can't get client data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr));
### output results
# set default to 'pretty'
$OptFormat = 'pretty' if !$OptFormat;
# print captions if --caption is set
my $LeadIn;
if ($OptCaptions && $OptComments) {
# print time period with report type
my $CaptionReportType = '(number of postings for each month)';
if ($OptReportType and $OptReportType ne 'default') {
$CaptionReportType = '(number of all postings for that time period)';
}
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
# print name list if --names is set
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
if $OptNames;
# print boundaries, if set
my $CaptionBoundary= '(counting only months fulfilling this condition)';
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
if ($LowBound or $UppBound);
# print primary and secondary sort order
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
($GroupBy eq 'month') ? 'Months' : 'Names',
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
}
# output data
# (changed code copy from NewsStats::OutputData)
my ($LastIteration, $FileName, $Handle, $OUT);
# define output types
my %LegalOutput;
@LegalOutput{('dump','list','pretty')} = ();
# bail out if format is unknown
&Bleat(2,"Unknown output type '$OptFormat'!") if !exists($LegalOutput{$OptFormat});
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
# save client for later use
my $Client = $Key;
# care for correct sorting order and abstract from month and keys:
# $Caption will be $Month or $Key, according to sorting order,
# and $Key will be $Key or $Month, respectively
my $Caption;
if ($GroupBy eq 'key') {
$Caption = $Key;
$Key = $Month;
} else {
$Caption = $Month;
}
# set output file handle
if (!$OptFileTemplate) {
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
close $OUT if ($LastIteration);
# safeguards for filename creation:
# replace potential problem characters with '_'
$FileName = sprintf('%s-%s',$OptFileTemplate,$Caption);
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
open ($OUT,">$FileName")
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
$FileName));
$Handle = $OUT;
};
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, $Caption,
$Key, $Value, 0, $MaxLength, $MaxValLength, $LastIteration);
# output client versions
if ($OptVersions) {
### get client versions
# $SQLWhereClause without 'ALL' version
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums,"version != 'ALL'","client = '$Client'",
&SQLSetBounds('default',$LowBound,$UppBound));
# save length of longest client
my $ClientMaxLenght = $MaxLength;
my $ClientMaxValLenght = $MaxValLength;
# get length of longest version delivered by query
# for formatting purposes
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
'version','postings',$SQLWhereClause,
'',@SQLBindNames);
if ($MaxLength) {
# add lenght of '- '
$MaxLength += 2;
# set to length of longest client, if longer
$MaxLength = $ClientMaxLenght if $ClientMaxLenght > $MaxLength;
$MaxValLength = $ClientMaxValLenght if $ClientMaxValLenght > $MaxValLength;
}
# prepare query
my $DBVersQuery = $DBHandle->prepare(sprintf('SELECT version,postings FROM %s.%s %s %s %s',
$Conf{'DBDatabase'},$Conf{'DBTable'},
$SQLWhereClause,$SQLGroupClause,
$SQLOrderClause));
# execute query
$DBVersQuery->execute(@SQLBindNames)
or &Bleat(2,sprintf("Can't get version data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr));
# output versions
while (my ($Version, $Postings) = $DBVersQuery->fetchrow_array) {
$Version = '- ' . $Version;
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, '',
$Version, $Postings, 0, $MaxLength, $MaxValLength,
'');
}
}
$LastIteration = $Caption;
};
close $OUT if ($OptFileTemplate);
### close handles
$DBHandle->disconnect;
__END__
################################ Documentation #################################
=head1 NAME
clientstats - create reports on client usage
=head1 SYNOPSIS
B<clientstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
See L<doc/README>.
=head1 DESCRIPTION
This script create reports on newsgroup usage (number of postings
using each client per month) taken from result tables created by
B<gatherstats.pl>.
=head2 Features and options
=head3 Time period and names
The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the
B<--month> option (see below).
B<clientstats> will process all clients by default; you can limit
processing to only some clients by supplying a list of those names by
using the B<--names> option (see below).
=head3 Report type
You can choose between different B<--report> types: postings per month
or all postings summed up; for details, see below.
=head3 Upper and lower boundaries
Furthermore you can set an upper and/or lower boundary to exclude some
results from output via the B<--lower> and B<--upper> options,
respectively. By default, all clients with more and/or less postings
per month will be excluded from the result set (i.e. not shown and
not considered for sum reports).
=head3 Sorting and formatting the output
By default, all results are grouped by month; you can group results by
clients instead via the B<--group-by> option. Within those groups,
the list of clients (or months) is sorted alphabetically
(or chronologically, respectively) ascending. You can change that order
(and sort by number of postings) with the B<--order-by> option. For
details and exceptions, please see below.
The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of names and number of
postings with the B<--format> option. Captions will be added by means of
the B<--caption> option; all comments (and captions) can be supressed by
using B<--nocomments>.
Last but not least you can redirect all output to a number of files, e.g.
one for each month, by submitting the B<--filetemplate> option, see below.
=head2 Configuration
B<clientstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration file
submitted by the B<--conffile> option.
See doc/INSTALL for an overview of possible configuration options.
You can override some configuration options via the B<--db> option.
=head1 OPTIONS
=over 3
=item B<-V>, B<--version>
Print out version and copyright information and exit.
=item B<-h>, B<--help>
Print this man page and exit.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database.
=item B<-n>, B<--names> I<name(s)>
Limit processing to a certain set of client names. I<names(s)>
can be a single name (eternal-september.org), a group of names
(*.inka.de) or a list of either of these, separated by colons, for
example
eternal-september.org:solani.org:*.inka.de
=item B<-s>, B<--sums|--nosums> (sum per month)
Include "virtual" clients named "ALL" for every month in output,
containing the sum of all detected clients for that month.
=item B<-r>, B<--report> I<default|sums>
Choose the report type: I<default> or I<sums>
By default, B<clientstats> will report the number of postings for each
client in each month. But it can also report the total sum of postings
per client for all months.
For report type I<sums>, the B<group-by> option has no meaning and
will be silently ignored (see below).
=item B<-l>, B<--lower> I<lower boundary>
Set the lower boundary. See below.
=item B<-l>, B<--upper> I<upper boundary>
Set the upper boundary.
By default, all clients with more postings per month than the
upper boundary and/or less postings per month than the lower boundary
will be excluded from further processing. For the default report that
means each month only /clients with a number of postings between
the boundaries will be displayed. For the sums report, /clients
with a number of postings exceeding the boundaries in all (!) months
will not be considered.
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
By default, all results are grouped by month, sorted chronologically in
ascending order, like this:
# ----- 2012-01:
arcor-online.net : 9379
individual.net : 19525
news.albasani.net: 9063
# ----- 2012-02:
arcor-online.net : 8606
individual.net : 16768
news.albasani.net: 7879
The results can be grouped by client instead via
B<--group-by> I<name>:
----- individual.net
2012-01: 19525
2012-02: 16768
----- arcor-online.net
2012-01: 9379
2012-02: 8606
----- news.albasani.net
2012-01: 9063
2012-02: 7879
By appending I<-desc> to the group-by option parameter, you can reverse
the sort order - e.g. B<--group-by> I<month-desc> will give:
# ----- 2012-02:
arcor-online.net : 8606
individual.net : 16768
news.albasani.net: 7879
# ----- 2012-01:
arcor-online.net : 9379
individual.net : 19525
news.albasani.net: 9063
Sums reports (see above) will always be grouped by months; this option
will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single client, see above),
the report will be sorted by name (or month) in ascending alphabetical
order by default. You can change the sort order to descending or sort
by number of postings instead.
=item B<-f>, B<--format> I<pretty|list|dump>
Select the output format, I<pretty> being the default:
# ----- 2012-01:
arcor-online.net : 9379
individual.net : 19525
# ----- 2012-02:
arcor-online.net : 8606
individual.net : 16768
I<list> format looks like this:
2012-01 arcor-online.net 9379
2012-01 individual.net 19525
2012-02 arcor-online.net 8606
2012-02 individual.net 16768
And I<dump> format looks like this:
# 2012-01:
arcor-online.net 9379
individual.net 19525
# 2012-02:
arcor-online.net 8606
individual.net 16768
You can remove the comments by using B<--nocomments>, see below.
=item B<-c>, B<--captions|--nocaptions>
Add captions to output, like this:
----- Report for 2012-01 to 2012-02 (number of postings for each month)
----- Names: individual.net
----- Threshold: 8000 => x (counting only month fulfilling this condition)
----- Grouped by Month (ascending), sorted by number of postings descending
False by default.
=item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by default
as logn as B<--filetemplate> is not set.
Use I<--nocomments> to suppress anything except client names or months and
numbers of postings.
=item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<clientstats>
will create one file for each month (or each client, according to the
setting of B<--group-by>, see above), with filenames composed by adding
year and month (or client names) to the I<filename template>, for
example with B<--filetemplate> I<stats>:
stats-2012-01
stats-2012-02
... and so on
=item B<--db> I<database table>
Override I<DBTableClnts> or I<DBTableClnts> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back
=head1 INSTALLATION
See L<doc/INSTALL>.
=head1 EXAMPLES
Show number of postings per group for lasth month in I<pretty> format:
clientstats
Show that report for January of 2010 and *.inka plus individual.net:
clientstats --month 2010-01 --names *.inka:individual.net:
Only show clients with 30 postings or less last month, ordered
by number of postings, descending, in I<pretty> format:
clientstats --upper 30 --order-by postings-desc
List number of postings per host for each month of 2010 and redirect
output to one file for each month, named hosts-2010-01 and so on, in
machine-readable form (without formatting):
clientstats -m 2010-01:2010-12 -f dump --filetemplate hosts
=head1 FILES
=over 4
=item F<bin/clientstats.pl>
The script itself.
=item F<lib/NewsStats.pm>
Library functions for the NewsStats package.
=item F<etc/newsstats.conf>
Runtime configuration file.
=back
=head1 BUGS
Please report any bugs or feature requests to the author or use the
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
=head1 SEE ALSO
=over 2
=item -
L<doc/README>
=item -
L<doc/INSTALL>
=item -
gatherstats -h
=back
This script is part of the B<NewsStats> package.
=head1 AUTHOR
Thomas Hochstein <thh@thh.name>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=cut

View file

@ -46,7 +46,7 @@ my $DBCreate = <<SQLDB;
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
SQLDB
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS);
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS, 'DBTableClnts' => <<CLIENTS);
--
-- Table structure for table DBTableRaw
--
@ -102,6 +102,23 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableHosts'}` (
KEY `host` (`host`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per server';
HOSTS
--
-- Table structure for table DBTableClnts
--
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableClnts'}` (
`id` bigint(20) unsigned NOT NULL auto_increment,
`month` varchar(7) character set ascii NOT NULL,
`client` varchar(150) NOT NULL,
`version` varchar(20) NOT NULL,
`postings` int(11) NOT NULL,
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `month_client_version` (`month`,`client`,`version`),
KEY `month` (`month`),
KEY `client` (`client`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='Postings per client';
CLIENTS
##### --------------------------------------------------------------------------
##### Installation / upgrade instructions

View file

@ -23,6 +23,8 @@ use warnings;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
use DBI;
use Data::Dumper;
use Encode qw(decode encode);
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
@ -31,7 +33,7 @@ Getopt::Long::config ('bundling');
# define types of information that can be gathered
# all / groups (/ clients / hosts)
my %LegalStats;
@LegalStats{('all','groups','hosts')} = ();
@LegalStats{('all','groups','hosts','clients')} = ();
################################# Main program #################################
@ -61,8 +63,8 @@ my %Conf = %{ReadConfig($OptConfFile)};
my %ConfOverride;
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride);
@ -84,40 +86,12 @@ my ($Period) = &GetTimePeriod($OptMonth);
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
### reformat $Conf{'TLH'}
my $TLH;
if ($Conf{'TLH'}) {
# $Conf{'TLH'} is parsed as an array by Config::Auto;
# make a flat list again, separated by :
if (ref($Conf{'TLH'}) eq 'ARRAY') {
$TLH = join(':',@{$Conf{'TLH'}});
} else {
$TLH = $Conf{'TLH'};
}
# strip whitespace
$TLH =~ s/\s//g;
# add trailing dots if none are present yet
# (using negative look-behind assertions)
$TLH =~ s/(?<!\.):/.:/g;
$TLH =~ s/(?<!\.)$/./;
# check for illegal characters
&Bleat(2,'Config error - illegal characters in TLH definition!')
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
# escape dots
$TLH =~ s/\./\\./g;
if ($TLH =~ /:/) {
# reformat $TLH from a:b to (a)|(b),
# e.g. replace ':' by ')|('
$TLH =~ s/:/)|(/g;
$TLH = '(' . $TLH . ')';
};
};
### init database
my $DBHandle = InitDB(\%Conf,1);
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
my $DBHandle = InitDB(\%Conf,1);
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
my $DBClients = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableClnts'});
### get data for each month
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
@ -127,6 +101,34 @@ foreach my $Month (&ListMonth($Period)) {
### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
### reformat $Conf{'TLH'}
my $TLH;
if ($Conf{'TLH'}) {
# $Conf{'TLH'} is parsed as an array by Config::Auto;
# make a flat list again, separated by :
if (ref($Conf{'TLH'}) eq 'ARRAY') {
$TLH = join(':',@{$Conf{'TLH'}});
} else {
$TLH = $Conf{'TLH'};
}
# strip whitespace
$TLH =~ s/\s//g;
# add trailing dots if none are present yet
# (using negative look-behind assertions)
$TLH =~ s/(?<!\.):/.:/g;
$TLH =~ s/(?<!\.)$/./;
# check for illegal characters
&Bleat(2,'Config error - illegal characters in TLH definition!')
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
# escape dots
$TLH =~ s/\./\\./g;
if ($TLH =~ /:/) {
# reformat $TLH from a:b to (a)|(b),
# e.g. replace ':' by ')|('
$TLH =~ s/:/)|(/g;
$TLH = '(' . $TLH . ')';
};
};
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
};
@ -138,7 +140,16 @@ foreach my $Month (&ListMonth($Period)) {
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@KnownHosts);
};
### ClientStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'clients') {
# define agents/clients that shouldn't be counted
my @DropAgents = qw(debian fedora firefox gecko gentoo lightning mandriva mnenhy mozilla
pclinuxos perl php presto suse suse/opensuse thunderbrowse ubuntu version);
push(@DropAgents, 'red hat');
&ClientStats($DBHandle,$DBRaw,$DBClients,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@DropAgents);
};
};
@ -154,10 +165,10 @@ sub GroupStats {
### $DBRaw : database table for raw data (to read from)
### $DBGrps : database table for groups data (to write to)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $TLH : TLHs to collect
### $Checkgroupsfile : filename template for checkgroups file
### (expanded to $Checkgroupsfile-$Month)
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode
### $Debug : debug mode
### OUT: (nothing)
@ -252,46 +263,24 @@ sub HostStats {
### $DBRaw : database table for raw data (to read from)
### $DBHosts : database table for hosts data (to write to)
### $Month : current month to do
### $TLH : TLHs to collect
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode
### $Debug : debug mode
### @KnownHosts : list of known hosts with subdomains
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
my ($DBHandle,$DBRaw,$DBHosts,$Month,$TLH,$MID,$Test,$Debug,@KnownHosts) = @_;
my (%Postings,$DBQuery);
if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
### ----------------------------------------------
print "----- HostStats -----\n" if $Debug;
### parse headers
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
### skip postings with wrong TLH
# remove whitespace from contents of Newsgroups:
chomp($Newsgroups);
$Newsgroups =~ s/\s//;
my $GroupCount;
for (split /,/, $Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH
next if($TLH and !/^$TLH/);
$GroupCount++;
};
next if !$GroupCount;
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
my $Host;
my %Header = ParseHeaders(split(/\n/,$Headers));
@ -344,15 +333,13 @@ sub HostStats {
}
}
# lowercase
$Host = lc($Host);
# count host
if ($Host) {
$Host = lc($Host);
$Postings{$Host}++;
$Postings{'ALL'}++;
} else {
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
}
printf("%s: %s\n", $Header{'message-id'}, $Host) if ($MID or $Debug && $Debug >1);
@ -381,6 +368,357 @@ sub HostStats {
};
};
sub ClientStats {
### ----------------------------------------------------------------------------
### collect number of postings per client (and version)
### IN : $DBHandle : database handle
### $DBRaw : database table for raw data (to read from)
### $DBClients : database table for clients data (to write to)
### $Month : current month to do
### $TLH : TLHs to collect
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode
### $Debug : debug mode
### @DropAgents : list of UserAgent "agents" that won't be counted
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBClients,$Month,$TLH,$MID,$Test,$Debug,@DropAgents) = @_;
my (%Postings,$DBQuery);
my %DropAgent = map { $_ => 1 } @DropAgents;
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
### ----------------------------------------------
print "----- ClientStats -----\n" if $Debug;
### parse headers
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
### skip postings with wrong TLH
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
my (@Clients, $Client, $Version);
my %Header = ParseHeaders(split(/\n/,$Headers));
### X-Mailer
if ($Header{'x-mailer'}) {
# transfer to x-newsreader and parse from there
$Header{'x-newsreader'} = $Header{'x-mailer'};
}
### X-Newsreader
if ($Header{'x-newsreader'}) {
$Header{'x-newsreader'} = RemoveComments($Header{'x-newsreader'});
# remove 'http://' and 'via' (CrossPoint)
$Header{'x-newsreader'} =~ s/https?:\/\///;
$Header{'x-newsreader'} =~ s/ ?via(.+)?$//;
# parse header
# User-Agent style
if ($Header{'x-newsreader'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
# transfer to user-agent and parse from there
$Header{'user-agent'} = $Header{'x-newsreader'};
# "client name version"
} elsif ($Header{'x-newsreader'} =~ / /) {
($Client, $Version) = ParseXNewsreader($Header{'x-newsreader'});
} else {
$Client = $Header{'x-newsreader'};
$Version = '';
}
if ($Client) {
# special cases
$Client = 'CrossPoint' if $Client =~ /^CrossPoint\//;
$Client = 'Virtual Access' if $Client =~ /^Virtual Access/;
my %UserAgent = (agent => $Client,
version => $Version);
push @Clients, { %UserAgent };
} else {
$Header{'user-agent'} = $Header{'x-newsreader'};
}
}
### User-Agent
if(!@Clients && $Header{'user-agent'}) {
$Header{'user-agent'} = RemoveComments($Header{'user-agent'});
### well-formed?
if ($Header{'user-agent'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
@Clients = ParseUserAgent($Header{'user-agent'});
} else {
# snip and add known well-formed agents from the trailing end
while ($Header{'user-agent'} =~ /(((Hamster)|(Hamster-Pg)|(KorrNews)|(OE-Tools)|(Mime-proxy))(\/[^\/ ]+))$/) {
push @Clients, ParseUserAgent($1);
$Header{'user-agent'} =~ s/ [^\/ ]+\/[^\/ ]+$//;
}
### special cases
# remove 'http://open-news-network.org'
$Header{'user-agent'} =~ s/^https?:\/\/open-news-network.org(\S+)?//;
# Thunderbird
if ($Header{'user-agent'} =~ /((Mozilla[- ])?Thunderbird) ?([0-9.]+)?/) {
$Client = 'Thunderbird';
$Version = $3;
# XP
} elsif ($Header{'user-agent'} =~ /((TrueXP|FreeXP|XP2(\/Agent)?)) \/(.+)$/) {
$Client = $1;
$Version = $4;
$Client = 'XP2' if $Client eq 'XP2/Agent';
### most general case
# client version
# client/version
# client/32 version
# - version may end in one non-numeric character
# - including trailing beta/pre/...
# 1) client: (([^0-9]+)|(\D+\/\d+))
# 2) version: (\S+\d\D?)
# 3) trailing: (( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?
} elsif ($Header{'user-agent'} =~ /^(([^0-9]+)|(\D+\/\d+))[\/ ]((\S+\d\D?)(( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?)$/) {
$Client = $1;
$Version = $4;
### some very special cases
# SeaMonkey/nn
} elsif ($Header{'user-agent'} =~ /SeaMonkey\/([0-9.]+)/) {
$Client = 'Seamonkey';
$Version = $1;
# Emacs nn/Gnus nn
} elsif ($Header{'user-agent'} =~ /Emacs [0-9.]+\/Gnus ([0-9.]+)/) {
$Client = 'Gnus';
$Version = $1;
# failed to parse
} else {
$Client = $Header{'user-agent'};
}
# count client, if found
if ($Client) {
my %UserAgent = (agent => $Client,
version => $Version);
push @Clients, { %UserAgent };
} else {
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !@Clients;
}
}
}
if (@Clients) {
$Postings{'ALL'}{'ALL'}++;
foreach (@Clients) {
# filter agents for User-Agent with multiple agents
next if $#Clients && exists($DropAgent{lc($_->{'agent'})});
# encode to utf-8, if necessary
$_->{'agent'} = encode('UTF-8', $_->{'agent'}) if $_->{'agent'} =~ /[\x80-\x{ffff}]/;
$_->{'version'} = encode('UTF-8', $_->{'version'}) if $_->{'version'} and $_->{'version'} =~ /[\x80-\x{ffff}]/;
# special cases
# Mozilla
$_->{'agent'} = 'Mozilla' if $_->{'agent'} eq '•Mozilla';
$_->{'agent'} =~ s/^Mozilla //;
# Forte Agent
$_->{'agent'} = 'Forte Agent' if $_->{'agent'} eq 'ForteAgent';
if ($_->{'agent'} eq 'Forte Agent') {
$_->{'version'} =~ s/-/\//;
$_->{'version'} = '' if $_->{'version'} eq '32Bit';
}
# count client ('ALL') and client/version (if version is present)
$Postings{$_->{'agent'}}{'ALL'}++;
$Postings{$_->{'agent'}}{$_->{'version'}}++ if $_->{'version'};
printf("%s: %s {%s}\n", $Header{'message-id'}, $_->{'agent'},
$_->{'version'} ? $Postings{$_->{'agent'}}{$_->{'version'}} : '')
if ($MID or $Debug && $Debug >1);
}
}
};
# delete old data for that month
if (!$Test) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
$DBClients),undef,$Month)
or &Bleat(2,sprintf("Can't delete old client data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBClients));
};
foreach my $Client (sort keys %Postings) {
foreach my $Version (sort keys %{$Postings{$Client}}) {
printf ("%s {%s}: %d\n",$Client,$Version,$Postings{$Client}{$Version}) if $Debug;
if (!$Test) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
"(month,client,version,postings) ".
"VALUES (?, ?, ?, ?)",$DBClients));
$DBQuery->execute($Month, $Client, $Version, $Postings{$Client}{$Version})
or &Bleat(2,sprintf("Can't write groups data for %s/%s/%s to %s: ".
"$DBI::errstr\n",$Month,$Client,$Version,$DBClients));
$DBQuery->finish;
};
}
};
};
sub GetHeaders {
### ----------------------------------------------------------------------------
### get (newsgroups and) raw headers from database
### IN : $DBHandle: database handle
### $DBRaw : database table for raw data (to read from)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### OUT: DBI statement handle
my ($DBHandle,$DBRaw,$Month,$MID) = @_;
my $DBQuery;
if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
return $DBQuery;
}
sub CheckTLH {
### ----------------------------------------------------------------------------
### count newsgroups from legal TLH(s)
### IN : $Newsgroups: comma separated list of newsgroups
### $TLH : (reference to an array of) legal TLH(s)
### OUT: number of newsgroups from legal TLH(s)
my ($Newsgroups,$TLH) = @_;
my (@TLH,$GroupCount);
# fill @TLH from $TLH, which can be an array reference or a scalar value
if (ref($TLH) eq 'ARRAY') {
@TLH = @{$TLH};
} else {
push @TLH, $TLH;
}
# remove whitespace from contents of Newsgroups:
chomp($Newsgroups);
$Newsgroups =~ s/\s//;
for (split /,/, $Newsgroups) {
my $Newsgroup = $_;
foreach (@TLH) {
# increment $GroupCount if $Newsgroup starts with $TLH
$GroupCount++ if $Newsgroup =~ /^$_/;
}
};
return $GroupCount;
}
sub RemoveComments {
### ----------------------------------------------------------------------------
### remove comments and other junk from header
### IN : $Header: a header
### OUT: the header, with comments and other junk removed
my $Header = shift;
# decode MIME encoded words
if ($Header =~ /=\?\S+\?[BQ]\?/) {
$Header = decode("MIME-Header",$Header);
}
# remove nested comments from '(' to first ')'
while ($Header =~ /\([^)]+\)/) {
$Header =~ s/\([^()]+?\)//;
}
# remove dangling ')'
$Header =~ s/\S+\)//;
# remove from dangling '(' to end of header
$Header =~ s/\(.+$//;
# remove from '[' to first ']'
$Header =~ s/\[[^\[\]]+?\]//;
# remove 'Nr. ... lebt'
$Header =~ s/Nr\. \d+ lebt//;
# remove nn:nn:nn
$Header =~ s/\d\d:\d\d:\d\d//;
# remove 'mm/... '
$Header =~ s/\/mm\/\S+//;
# remove ' DE' / _DE'
$Header =~ s/[ _]DE//;
# remove trailing 'eol' or '-shl'
$Header =~ s/(eol)|(-shl)$//;
# remove from ';' or ',' (CrossPoint)
# or '&' to end of header
$Header =~ s/[;,&].+$//;
# remove from 'by ' or 'unter Windows' or '@ Windows'
# to end of header
$Header =~ s/((by )|(unter +Windows)|(@ Windows)).+$//;
# remove superfluous whitespace in header
# and whitespace around header
$Header =~ s/\s+/ /g;
$Header =~ s/^\s+//;
$Header =~ s/\s+$//;
return $Header;
}
sub ParseXNewsreader {
### ----------------------------------------------------------------------------
### parse X-Newsreader header (client and version, if present)
### IN : $XNR: a X-Newsreader header
### OUT: client and version, if present
my $XNR = shift;
my ($Client, $Version);
foreach (split(/ /,$XNR)) {
# add to client name if no digit present
if (!/\d[0-9.]/ or /\/\d$/) {
$Client .= $_ . ' ' ;
# otherwise, use as version and terminate parsing
} else {
$Version = $_;
last;
}
}
# remove trailing whitespace
$Client =~ s/\s+$// if $Client;
# set $Version
$Version = '' if !$Version;
return $Client, $Version;
}
sub ParseUserAgent {
### ----------------------------------------------------------------------------
### parse User-Agent header (agent and version)
### IN : $UserAgent: a User-Agent header
### OUT: array of hashes (agent/version)
my $UserAgent = shift;
my @UserAgents;
# a well-formed User-Agent header will contain pairs of
# client/version, i.e. 'slrn/0.9.7.3'
foreach (split(/ /,$UserAgent)) {
my %UserAgent;
/^(.+)\/(.+)$/;
$UserAgent{'agent'} = $1;
$UserAgent{'version'} = $2;
push @UserAgents, { %UserAgent };
}
return @UserAgents;
}
__END__
################################ Documentation #################################
@ -391,7 +729,7 @@ gatherstats - process statistical data from a raw source
=head1 SYNOPSIS
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--conffile> I<filename>]
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
@ -438,12 +776,23 @@ override that default through the B<--groupsdb> option.
=item B<hosts> (postings from host per month)
B<gatherstats> will examine Injection-Info:, X-Trace: and Path:
headers and try to normalize them. Groups not in I<TLH> will be
ignored. The sum of all detected hosts will also saved for each month.
headers and try to normalize them. The sum of all detected hosts will
also be saved for each month. Groups not in I<TLH> will be ignored.
Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can
override that default through the B<--hostsdb> option.
=item B<clients> (postings by client per month)
B<gatherstats> will examine User-Agent:, X-Newsreader: and X-Mailer:
headers and try to remove comments and non-standard contents. Clients
and client versions are counted separately. The sum of all detected
clients will also be saved for each month. Groups not in I<TLH> will
be ignored.
Data is written to I<DBTableClnts> (see L<doc/INSTALL>); you can
override that default through the B<--clientsdb> option.
=back
=head2 Configuration
@ -511,10 +860,12 @@ Newsgroups not found in the checkgroups file will be dropped (and
logged to STDERR), and newsgroups found there but having no postings
will be added with a count of 0 (and logged to STDERR).
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
=item B<--hierarchy> I<TLH> (newsgroup hierarchy/hierarchies)
Override I<TLH> from F<newsstats.conf>.
I<TLH> can be a single word or a comma-separated list.
=item B<--rawdb> I<table> (raw data table)
Override I<DBTableRaw> from F<newsstats.conf>.
@ -523,14 +874,14 @@ Override I<DBTableRaw> from F<newsstats.conf>.
Override I<DBTableGrps> from F<newsstats.conf>.
=item B<--clientsdb> I<table> (client data table)
Override I<DBTableClnts> from F<newsstats.conf>.
=item B<--hostsdb> I<table> (host data table)
Override I<DBTableHosts> from F<newsstats.conf>.
=item B<--clientsdb> I<table> (client data table)
Override I<DBTableClnts> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.

View file

@ -685,7 +685,7 @@ L<doc/README>
=item -
l>doc/INSTALL>
L<doc/INSTALL>
=item -

View file

@ -1,9 +1,9 @@
#! /usr/bin/perl
#
# cliservstats.pl
# hoststats.pl
#
# This script will get statistical data on client (newsreader) and
# server (host) usage from a database.
# This script will get statistical data on server (host) usage
# from a database.
#
# It is part of the NewsStats package.
#
@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
### read commandline options
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
$OptReportType,$OptSums,$OptType,$UppBound,$OptConfFile);
$OptReportType,$OptSums,$UppBound,$OptConfFile);
GetOptions ('c|captions!' => \$OptCaptions,
'comments!' => \$OptComments,
'db=s' => \$OptDB,
@ -44,7 +44,6 @@ GetOptions ('c|captions!' => \$OptCaptions,
'o|order-by=s' => \$OptOrderBy,
'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums,
't|type=s' => \$OptType,
'u|upper=i' => \$UppBound,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD,
@ -52,15 +51,6 @@ GetOptions ('c|captions!' => \$OptCaptions,
# parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# parse $OptType
if ($OptType) {
if ($OptType =~ /(host|server)s?/i) {
$OptType = 'host';
} elsif ($OptType =~ /(newsreader|client)s?/i) {
$OptType = 'client';
}
}
&Bleat(2, "Please use '--type server' or '-type newsreader'.") if !$OptType;
# parse $OptReportType
if ($OptReportType) {
if ($OptReportType =~ /sums?/i) {
@ -74,14 +64,8 @@ if ($OptReportType) {
my %Conf = %{ReadConfig($OptConfFile)};
### set DBTable
if ($OptDB) {
$Conf{'DBTable'} = $OptDB;
}
elsif ($OptType eq 'host') {
$Conf{'DBTable'} = $Conf{'DBTableHosts'};
} else {
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
}
$Conf{'DBTable'} = $Conf{'DBTableHosts'};
$Conf{'DBTable'} = $OptDB if $OptDB;
### init database
my $DBHandle = InitDB(\%Conf,1);
@ -97,14 +81,14 @@ my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# with placeholders as well as a list of names to bind to them
my ($SQLWhereNames,@SQLBindNames);
if ($OptNames) {
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,$OptType);
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'host');
# bail out if --names is invalid
&Bleat(2,"--names option has an invalid format!")
if !$SQLWhereNames;
}
### build SQL WHERE clause
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",$OptType);
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",'host');
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums,
&SQLSetBounds('default',$LowBound,$UppBound));
@ -118,8 +102,8 @@ $OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNames and $OptNames !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, $OptType);
# $GroupBy will contain 'month' or 'host'/'client' (parsed result of $OptGroupBy)
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'host');
# $GroupBy will contain 'month' or 'host' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
@ -128,19 +112,19 @@ my $SQLSelect;
my $SQLGroupClause = '';
my $Precision = 0; # number of digits right of decimal point for output
if ($OptReportType and $OptReportType ne 'default') {
$SQLGroupClause = "GROUP BY $OptType";
$SQLGroupClause = "GROUP BY host";
# change $SQLOrderClause: replace everything before 'postings'
$SQLOrderClause =~ s/BY.+postings/BY postings/;
$SQLSelect = "'All months',$OptType,SUM(postings)";
$SQLSelect = "'All months',host,SUM(postings)";
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
$SQLOrderClause =~ s/postings/SUM(postings)/;
} else {
$SQLSelect = "month,$OptType,postings";
$SQLSelect = "month,host,postings";
};
### get length of longest name delivered by query
### for formatting purposes
my $Field = ($GroupBy eq 'month') ? $OptType : 'month';
my $Field = ($GroupBy eq 'month') ? 'host' : 'month';
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
$Field,'postings',$SQLWhereClause,
'',@SQLBindNames);
@ -155,8 +139,8 @@ $DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
$SQLOrderClause));
# execute query
$DBQuery->execute(@SQLBindNames)
or &Bleat(2,sprintf("Can't get %s data for %s from %s.%s: %s\n",
$OptType,$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
or &Bleat(2,sprintf("Can't get host data for %s from %s.%s: %s\n",
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr));
### output results
@ -175,7 +159,7 @@ if ($OptCaptions && $OptComments) {
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
if $OptNames;
# print boundaries, if set
my $CaptionBoundary= '(counting only month fulfilling this condition)';
my $CaptionBoundary= '(counting only months fulfilling this condition)';
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
@ -201,11 +185,11 @@ __END__
=head1 NAME
cliservstats - create reports on host or client usage
hoststats - create reports on host usage
=head1 SYNOPSIS
B<cliservstats> B<-t> I<host|client> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)|client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
B<hoststats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
@ -214,8 +198,7 @@ See L<doc/README>.
=head1 DESCRIPTION
This script create reports on newsgroup usage (number of postings from
each host or using each client per month) taken from result tables
created by B<gatherstats.pl>.
each host) taken from result tables created by B<gatherstats.pl>.
=head2 Features and options
@ -225,9 +208,9 @@ The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the
B<--month> option (see below).
B<cliservstats> will process all hosts or clients by default; you can
limit processing to only some hosts or clients by supplying a list of
those names by using the B<--names> option (see below).
B<hoststats> will process all hosts by default; you can limit
processing to only some hosts by supplying a list of those names by
using the B<--names> option (see below).
=head3 Report type
@ -238,18 +221,18 @@ or all postings summed up; for details, see below.
Furthermore you can set an upper and/or lower boundary to exclude some
results from output via the B<--lower> and B<--upper> options,
respectively. By default, all hosts/clients with more and/or less
postings per month will be excluded from the result set (i.e. not
shown and not considered forsum reports).
respectively. By default, all hosts with more and/or less postings
per month will be excluded from the result set (i.e. not shown and
not considered for sum reports).
=head3 Sorting and formatting the output
By default, all results are grouped by month; you can group results by
hosts/clients instead via the B<--group-by> option. Within those
groups, the list of hosts/clients (or months) is sorted alphabetically
(or chronologically, respectively) ascending. You can change that order
(and sort by number of postings) with the B<--order-by> option. For
details and exceptions, please see below.
hosts instead via the B<--group-by> option. Within those groups, the
list of hosts (or months) is sorted alphabetically (or chronologically,
respectively) ascending. You can change that order (and sort by number
of postings) with the B<--order-by> option. For details and exceptions,
please see below.
The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of names and number of
@ -262,7 +245,7 @@ one for each month, by submitting the B<--filetemplate> option, see below.
=head2 Configuration
B<cliservstats> will read its configuration from F<newsstats.conf>
B<hoststats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration file
submitted by the B<--conffile> option.
@ -282,11 +265,6 @@ Print out version and copyright information and exit.
Print this man page and exit.
=item B<-t>, B<--type> I<host|client>
Create report for hosts (servers) or clients (newsreaders), using
I<DBTableHosts> or I<DBTableClnts> respectively.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time
@ -296,7 +274,7 @@ processing period to process the whole database.
=item B<-n>, B<--names> I<name(s)>
Limit processing to a certain set of host or client names. I<names(s)>
Limit processing to a certain set of hostnames. I<names(s)>
can be a single name (eternal-september.org), a group of names
(*.inka.de) or a list of either of these, separated by colons, for
example
@ -312,9 +290,9 @@ containing the sum of all detected hosts for that month.
Choose the report type: I<default> or I<sums>
By default, B<cliservstats> will report the number of postings for each
host/client in each month. But it can also report the total sum of postings
per host/client for all months.
By default, B<hoststats> will report the number of postings for each
host in each month. But it can also report the total sum of postings
per host for all months.
For report type I<sums>, the B<group-by> option has no meaning and
will be silently ignored (see below).
@ -327,13 +305,13 @@ Set the lower boundary. See below.
Set the upper boundary.
By default, all hosts/clients with more postings per month than the
upper boundary and/or less postings per month than the lower boundary
By default, all hosts with more postings per month than the upper
boundary and/or less postings per month than the lower boundary
will be excluded from further processing. For the default report that
means each month only hosts/clients with a number of postings between
the boundaries will be displayed. For the sums report, hosts/clients
with a number of postings exceeding the boundaries in all (!) months
will not be considered.
means each month only hosts with a number of postings between the
boundaries will be displayed. For the sums report, hosts with a number
of postings exceeding the boundaries in all (!) months will not be
considered.
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
@ -349,8 +327,7 @@ ascending order, like this:
individual.net : 16768
news.albasani.net: 7879
The results can be grouped by host/client instead via
B<--group-by> I<name>:
The results can be grouped by host instead via B<--group-by> I<name>:
----- individual.net
2012-01: 19525
@ -379,8 +356,8 @@ will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single host/client, see above),
the report will be sorted by name (or month) in ascending alphabetical
Within each group (a single month or single host, see above), the
report will be sorted by host (or month) in ascending alphabetical
order by default. You can change the sort order to descending or sort
by number of postings instead.
@ -426,19 +403,19 @@ False by default.
=item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by default
as logn as B<--filetemplate> is not set.
Add comments (group headers) to I<dump> and I<pretty> output. True by
default as long as B<--filetemplate> is not set.
Use I<--nocomments> to suppress anything except host/client names or months and
numbers of postings.
Use I<--nocomments> to suppress anything except host names or months
and numbers of postings.
=item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<cliservstats> will
create one file for each month (or each host/client, accordant to the
Save output to file(s) instead of dumping it to STDOUT. B<hoststats>
will create one file for each month (or each host, according to the
setting of B<--group-by>, see above), with filenames composed by adding
year and month (or host/client names) to the I<filename template>, for
example with B<--filetemplate> I<stats>:
year and month (or hostnames) to the I<filename template>, for example
with B<--filetemplate> I<stats>:
stats-2012-01
stats-2012-02
@ -446,7 +423,7 @@ example with B<--filetemplate> I<stats>:
=item B<--db> I<database table>
Override I<DBTableHosts> or I<DBTableClnts> from F<newsstats.conf>.
Override I<DBTableHosts> from F<newsstats.conf>.
=item B<--conffile> I<filename>
@ -462,29 +439,28 @@ See L<doc/INSTALL>.
Show number of postings per group for lasth month in I<pretty> format:
cliservstats --type host
hoststats
Show that report for January of 2010 and *.inka plus individual.net:
cliservstats --type host --month 2010-01 --names *.inka:individual.net:
hoststats --month 2010-01 --names *.inka:individual.net:
Only show clients with 30 postings or less last month, ordered
Only show hosts with 30 postings or less last month, ordered
by number of postings, descending, in I<pretty> format:
cliservstats --type client --upper 30 --order-by postings-desc
hoststats --upper 30 --order-by postings-desc
List number of postings per host for each month of 2010 and redirect
output to one file for each month, named hosts-2010-01 and so on, in
machine-readable form (without formatting):
cliservstats -t host -m 2010-01:2010-12 -f dump --filetemplate hosts
hoststats -m 2010-01:2010-12 -f dump --filetemplate hosts
=head1 FILES
=over 4
=item F<bin/cliservstats.pl>
=item F<bin/hoststats.pl>
The script itself.
@ -513,7 +489,7 @@ L<doc/README>
=item -
l>doc/INSTALL>
L<doc/INSTALL>
=item -

View file

@ -15,7 +15,8 @@
#
# Usage:
# $~ groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
# $~ cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
# $~ hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts
# $~ clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
#
BEGIN {
@ -53,19 +54,22 @@ if (!$Type) {
$Type = 'GroupStats';
} elsif ($Type =~ /(host|server)s?/i) {
$Type = 'HostStats';
} elsif ($Type =~ /(client|reader)s?/i) {
$Type = 'ClientStats';
};
my $Timestamp = time;
##### ----- configuration --------------------------------------------
my $TLH = 'de';
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month,
'ClientStats' => 'Newsreaderstatistik fuer de.* im Monat '.$Month
);
my %TH = ('counter' => 'Nr.',
'value' => 'Anzahl',
'percentage' => 'Prozent'
);
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN);
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN, 'ClientStats' => <<CLIENTSIN);
From: Thomas Hochstein <thh\@thh.name>
Newsgroups: local.test
Subject: Postingstatistik fuer de.* im Monat $Month
@ -88,7 +92,18 @@ Content-Transfer-Encoding: 7bit
User-Agent: postingstats.pl/$VERSION (NewsStats)
HOSTSIN
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT);
From: Thomas Hochstein <thh\@thh.name>
Newsgroups: local.test
Subject: Newsreaderstatistik fuer de.* im Monat $Month
Message-ID: <destat-clients-$Month.$Timestamp\@mid.news.szaf.org>
Approved: thh\@thh.name
Mime-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
User-Agent: postingstats.pl/$VERSION (NewsStats)
CLIENTSIN
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT, 'ClientStats' => <<CLIENTSOUT);
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
@ -110,6 +125,19 @@ wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
Loeschnachricht) erreicht haben.
HOSTSOUT
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
Loeschnachricht) erreicht haben. Versionsangaben werden nur gezaehlt,
wenn Sie ermittelbar sind; daher kann die Summe der Newsreader-Versionen
kleiner sein als die Postingzahl fuer den Newsreader. Ausserdem koennen
an einem Beitrag mehrere Clients beteiligt sein, bspw. der Newsreader
und ein lokaler Server wie der Hamster. Daher kann die Summe aller
Newsreader groesser sein als die Summer der Postings; auch ergeben die
Prozentzahlen dementsprechend in der Summe mehr als 100%.
CLIENTSOUT
##### ----- subroutines ----------------------------------------------
sub Percentage {
@ -124,35 +152,76 @@ sub Divider {
return ':' . $Symbol x ($MaxLength+TABLEWIDTH) . ":\n";
}
sub SingleVersion {
my ($LastName,$RSubValue,$RValue,$RMaxLength) = @_;
# get version to add to client name
my ($Version) = keys %{$$RSubValue{$LastName}};
$Version =~ s/^- //;
# add version to client name by creating a new name
# and deleting the old one
my ($NameVersion) = $LastName . ' ' . $Version;
$$RValue{$NameVersion} = $$RValue{$LastName};
delete($$RValue{$LastName});
$$RMaxLength = length($NameVersion) if length($NameVersion) > $$RMaxLength;
# delete single version
delete($$RSubValue{$LastName});
}
##### ----- main loop ------------------------------------------------
my (%Value, $SumName, $SumTotal, $MaxLength);
$MaxLength = 0;
my (%Value, %SubValue, $SubCounter, $LastName, $SumName, $SumTotal,
$MaxLength);
if ($Type eq 'GroupStats') {
$SumName = "$TLH.ALL";
$TH{'name'} = 'Newsgroup'
} elsif ($Type eq 'HostStats') {
$SumName = 'ALL';
$TH{'name'} = 'Server'
$TH{'name'} = 'Postingserver'
} elsif ($Type eq 'ClientStats') {
$SumName = 'ALL';
$TH{'name'} = 'Newsreader / Client'
}
# read from STDIN
### read from STDIN
$MaxLength = 0;
while(<>) {
my ($Name, $Value) = split;
my ($Name, $Value) = $_ =~ /(.+) (\d+)$/;
$SumTotal = $Value if $Name eq $SumName;
next if $Name =~ /ALL$/;
$Value{$Name} = $Value;
$MaxLength = length($Name) if length($Name) > $MaxLength;
}
# print to STDOUT
# handle client versions
if ($Type eq 'ClientStats' and $Name =~ /^- /) {
$SubValue{$LastName}{$Name} = $Value;
$SubCounter++;
} else {
# clients with just one version
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
if ($LastName && $SubCounter == 1);
# reset version counter and client name
$SubCounter = 0;
$LastName = $Name;
$Value{$Name} = $Value;
$MaxLength = length($Name) if length($Name) > $MaxLength;
}
}
# clients with just one version (last iteration)
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
if ($LastName && $SubCounter == 1);
### print to STDOUT
# calculate padding for $Heading
my $PaddingLeft = ' ' x int((($MaxLength+TABLEWIDTH-2-length($Heading{$Type}))/2));
my $PaddingRight = $PaddingLeft;
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) < $MaxLength+TABLEWIDTH);
my $Counter = 0;
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) +2 < $MaxLength+TABLEWIDTH);
print $LeadIn{$Type};
# print table header
print &Divider('=',$MaxLength);
printf(": %s%s%s :\n",$PaddingLeft,$Heading{$Type},$PaddingRight);
print &Divider('=',$MaxLength);
@ -163,11 +232,26 @@ printf(": %-3s : %-6s : %-7s : %-*s :\n",
$MaxLength,$TH{'name'});
print &Divider('-',$MaxLength);
foreach my $Name (sort { $Value{$b} <=> $Value {$a}} keys %Value) {
# print table
my $Counter = 0;
foreach my $Name (sort { $Value{$b} <=> $Value{$a} } keys %Value) {
$Counter++;
printf(": %3u. : %6u : %6.2f%% : %-*s :\n",$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),$MaxLength,$Name);
printf(": %3u. : %6u : %6.2f%% : %-*s :\n",
$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),
$MaxLength,$Name);
# handle client versions
if ($SubValue{$Name}) {
foreach my $SubName (sort { $SubValue{$Name}{$b} <=> $SubValue{$Name}{$a} }
keys %{$SubValue{$Name}}) {
printf(": : %6u : %6.2f%% : %-*s :\n",
$SubValue{$Name}{$SubName},
&Percentage($SumTotal,$SubValue{$Name}{$SubName}),
$MaxLength,$SubName);
}
}
}
# print table footer
print &Divider('-',$MaxLength);
printf(": : %6u : %s : %-*s :\n",$SumTotal,'100.00%',$MaxLength,'');
print &Divider('=',$MaxLength);
@ -184,7 +268,7 @@ postingstats - format and post reports
=head1 SYNOPSIS
B<postingstats> B<-t> I<groups|hosts> [B<-Vh> [B<-m> I<YYYY-MM>]
B<postingstats> B<-t> I<groups|hosts|clients> [B<-Vh> [B<-m> I<YYYY-MM>]
=head1 REQUIREMENTS
@ -193,8 +277,8 @@ See L<doc/README>.
=head1 DESCRIPTION
This script will re-format reports on newsgroup usage created by
B<groupstats.pl> or B<cliservstats.pl> and create a message that can
be posted to Usenet.
B<groupstats.pl>, B<hoststats.pl> or B<clientstats.pl> and create a
message that can be posted to Usenet.
=head2 Features and options
@ -228,8 +312,8 @@ sum total.
=item C<%Heading>
Hash with keys for I<GroupStats> and I<HostStats>. Used to display a
heading.
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
Used to display a heading.
=item C<%TH>
@ -242,14 +326,14 @@ Output will be truncated otherwise.
=item C<%LeadIn>
Hash with keys for I<GroupStats> and I<HostStats>. Used to create the
headers for our posting. Can contain other text that will be shown
before C<%Heading>.
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
Used to create the headers for our posting. Can contain other text
that will be shown before C<%Heading>.
=item C<%LeadOut>
Hash with keys for I<GroupStats> and I<HostStats>. Will be shown at the
end of our posting.
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
Will be shown at the end of our posting.
=back
@ -265,9 +349,10 @@ Print out version and copyright information and exit.
Print this man page and exit.
=item B<-t>, B<--type> I<groups|hosts>
=item B<-t>, B<--type> I<groups|hosts|clients>
Set report type to posting statistics or hosts statistics accordingly.
Set report type to posting statistics, hosts statistics or client
statistics accordingly.
=item B<-m>, B<--month> I<YYYY-MM>
@ -291,7 +376,11 @@ Create a posting from a posting statistics report for 2012-01:
Create a posting from a host statistics report for last month:
cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts
Create a posting from a client statistics report for last month:
clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
=head1 FILES
@ -326,7 +415,7 @@ L<doc/README>
=item -
l>doc/INSTALL>
L<doc/INSTALL>
=item -
@ -334,7 +423,11 @@ groupstats -h
=item -
cliservstats -h
hoststats -h
=item -
clientstats -h
=back

View file

@ -2,7 +2,7 @@
# installation path is /srv/newsstats/, please adjust accordingly
if [[ $1 =~ [0-9]{4}-[0-9]{2} ]]; then
/srv/newsstats/bin/groupstats.pl --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
/srv/newsstats/bin/cliservstats.pl -t server --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl -t server --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
/srv/newsstats/bin/hoststats.pl --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl -t server --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
else
echo 'Input error, please use dopostingstats.sh YYYY-MM'
fi

View file

@ -1,3 +1,14 @@
NewsStats 0.4.0 (unreleased)
* Reformat $Conf{TLH} for GroupStats only.
* Extract TLH check from HostStats to subroutine, fix no-op check.
* Extract getting raw headers from HostStats to subroutine.
* Improve documentation for config file.
* ParseHeader: re-merge continuation lines.
* Add ClientStats to gatherstats.
* Move cliservstats to hoststats.
* Add clientstats (for clients).
* Add ClientStats to postingstats.
NewsStats 0.3.0 (2025-05-18)
* Extract GroupStats (in gatherstats) to subroutine.
* Add ParseHeader() to library.

View file

@ -62,8 +62,9 @@ INSTALLATION INSTRUCTIONS
b) Optional configuration options
* TLH = de
Limit examination to that top-level hierarchy.
* TLH = de.alt,news.admin
Limit examination to that top-level hierarchy/hierarchies.
Comma-separated list.
3) Database (mysql) setup

View file

@ -74,8 +74,8 @@ Getting Started
Report generation is handled by specialised scripts for each
report type. Currently reports on the number of postings per group
and month and injection server and month are supported; you can
use 'groupstats.pl' and 'cliservstats.pl' for that. See the
groupstats.pl and cliservstats.pl man pages for more information.
use 'groupstats.pl' and 'hoststats.pl' for that. See the
groupstats.pl and hoststats.pl man pages for more information.
Reporting Bugs

View file

@ -13,7 +13,9 @@ DBDatabase = newsstats
DBTableRaw = raw_de
DBTableGrps = groups_de
DBTableHosts = hosts_de
#DBTableClnts =
DBTableClnts = clnts_de
### hierarchy configuration
# comma-separated list of TLHs to parse
# newsgroups not starting with one of those patterns are not counted
TLH = de

View file

@ -49,7 +49,7 @@ require Exporter;
Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
SQLSetBounds SQLBuildClause GetMaxLength)]);
$VERSION = '0.3.0';
$VERSION = '0.4.0';
use Data::Dumper;
use File::Basename;
@ -280,7 +280,8 @@ sub ParseHeaders {
} elsif (/^\s/) {
# continuation lines
if ($Label) {
$Header{lc($Label)} .= "\n$_";
s/^\s+/ /;
$Header{lc($Label)} .= $_;
} else {
warn (sprintf("Non-header line: %s\n",$_));
}
@ -433,14 +434,13 @@ sub OutputData {
### $LeadIn : print at start of output
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
### $DBQuery : database query handle with executed query,
### containing $Month, $Key, $Value
### containing $Month, $Key, $Value
### $PadField : padding length for key field (optional) for 'pretty'
### $PadValue : padding length for value field (optional) for 'pretty'
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $LeadIn, $FileTempl,
$DBQuery, $PadField, $PadValue) = @_;
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
my ($FileName, $Handle, $OUT);
our $LastIteration;
my ($LastIteration, $FileName, $Handle, $OUT);
# define output types
my %LegalOutput;
@ -480,7 +480,7 @@ sub OutputData {
$Handle = $OUT;
};
print $Handle &FormatOutput($Format, $Comments, $LeadIn, $Caption,
$Key, $Value, $Precision, $PadField, $PadValue);
$Key, $Value, $Precision, $PadField, $PadValue, $LastIteration);
$LastIteration = $Caption;
};
close $OUT if ($FileTempl);
@ -500,24 +500,26 @@ sub FormatOutput {
### $PadValue : padding length for value field (optional) for 'pretty'
### OUT: $Output: formatted output
my ($Format, $Comments, $LeadIn, $Caption, $Key, $Value, $Precision, $PadField,
$PadValue) = @_;
$PadValue, $LastIteration) = @_;
my ($Output);
# keep last caption in mind
our ($LastIteration);
# create one line of output
if ($Format eq 'dump') {
# output as dump (key value)
$Output = sprintf ("# %s:\n",$Caption)
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
if ($Caption and $Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
$Output .= sprintf ("%s %u\n",$Key,$Value);
} elsif ($Format eq 'list') {
# output as list (caption key value)
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
if ($Caption) {
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
} else {
$Output = sprintf ("%s %u\n",$Key,$Value);
}
} elsif ($Format eq 'pretty') {
# output as a table
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) {
$Output = $LeadIn;
$Output .= sprintf ("# ----- %s:\n",$Caption);
$Output .= sprintf ("# ----- %s:\n",$Caption) if $Caption;
}
# increase $PadValue for numbers with decimal point
$PadValue += $Precision+1 if $Precision;
@ -582,7 +584,7 @@ sub SQLSortOrder {
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
### $OrderBy: secondary sort by month/newsgroups (default)
### or number of 'postings'
### $Type : newsgroup, host, client
### $Type : newsgroup, host or client+version
### OUT: a SQL ORDER BY clause
my ($GroupBy,$OrderBy,$Type) = @_;
my ($GroupSort,$OrderSort) = ('','');
@ -640,9 +642,11 @@ sub SQLGroupList {
### OUT: SQL code to become part of a 'WHERE' clause,
### list of names for SQL bindings
my ($Names,$Type) = @_;
my $InvalidCharRegExp;
# substitute '*' wildcard with SQL wildcard character '%'
$Names =~ s/\*/%/g;
return (undef,undef) if !CheckValidNames($Names);
$InvalidCharRegExp = ',;' if $Type eq 'client';
return (undef,undef) if !CheckValidNames($Names,$InvalidCharRegExp);
# just one name/newsgroup?
return (SQLGroupWildcard($Names,$Type),$Names) if $Names !~ /:/;
my ($SQL,@WildcardNames,@NoWildcardNames);
@ -805,10 +809,11 @@ sub SQLBuildClause {
sub CheckValidNames {
################################################################################
### syntax check of a list
### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
### IN : $Names : list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
### InvalidCharRegExp: regular expression for invalid characters
### OUT: boolean
my ($Names) = @_;
my $InvalidCharRegExp = ',; ';
my ($Names,$InvalidCharRegExp) = @_;
$InvalidCharRegExp = ',; ' if (!$InvalidCharRegExp);
return ($Names =~ /[$InvalidCharRegExp]/) ? 0 : 1;
};