Compare commits
16 commits
5a6a3e58bf
...
23a28815b2
Author | SHA1 | Date | |
---|---|---|---|
|
23a28815b2 | ||
|
39e845d552 | ||
|
09a9112679 | ||
|
66a175c7f8 | ||
|
963f07432c | ||
|
a553b374ce | ||
|
3e73346b20 | ||
|
eea296391c | ||
|
d194ef754f | ||
|
c985e29b7e | ||
|
f78d4c2158 | ||
|
995173456b | ||
|
3447cdabff | ||
|
671ae67be0 | ||
|
6122d1a49d | ||
|
988e7b2f13 |
598
bin/clientstats.pl
Normal file
598
bin/clientstats.pl
Normal file
|
@ -0,0 +1,598 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# clientstats.pl
|
||||
#
|
||||
# This script will get statistical data on newsreader (client) usage
|
||||
# from a database.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
|
||||
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
|
||||
$OptReportType,$OptSums,$UppBound,$OptVersions,$OptConfFile);
|
||||
GetOptions ('c|captions!' => \$OptCaptions,
|
||||
'comments!' => \$OptComments,
|
||||
'db=s' => \$OptDB,
|
||||
'filetemplate=s' => \$OptFileTemplate,
|
||||
'f|format=s' => \$OptFormat,
|
||||
'g|group-by=s' => \$OptGroupBy,
|
||||
'l|lower=i' => \$LowBound,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'n|names=s' => \$OptNames,
|
||||
'o|order-by=s' => \$OptOrderBy,
|
||||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'v|versions!' => \$OptVersions,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
# parse parameters
|
||||
# $OptComments defaults to TRUE if --filetemplate is not used
|
||||
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
|
||||
# parse $OptReportType
|
||||
if ($OptReportType) {
|
||||
if ($OptReportType =~ /sums?/i) {
|
||||
$OptReportType = 'sum';
|
||||
} else {
|
||||
$OptReportType = 'default';
|
||||
}
|
||||
}
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### set DBTable
|
||||
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
|
||||
$Conf{'DBTable'} = $OptDB if $OptDB;
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get time period and names, prepare SQL 'WHERE' clause
|
||||
# get time period
|
||||
# and set caption for output and expression for SQL 'WHERE' clause
|
||||
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
||||
# bail out if --month is invalid
|
||||
&Bleat(2,"--month option has an invalid format - ".
|
||||
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
|
||||
# get list of clients and set expression for SQL 'WHERE' clause
|
||||
# with placeholders as well as a list of names to bind to them
|
||||
my ($SQLWhereNames,@SQLBindNames);
|
||||
if ($OptNames) {
|
||||
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'client');
|
||||
# bail out if --names is invalid
|
||||
&Bleat(2,"--names option has an invalid format!")
|
||||
if !$SQLWhereNames;
|
||||
}
|
||||
|
||||
### build SQL WHERE clause
|
||||
my $ExcludeSums = $OptSums ? '' : "client != 'ALL'";
|
||||
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
|
||||
$ExcludeSums,"version = 'ALL'",
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
|
||||
### get sort order and build SQL 'ORDER BY' clause
|
||||
# force to 'month' for $OptReportType 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# default to 'name' if $OptGroupBy is not set and
|
||||
# just one name is requested, but more than one month
|
||||
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
|
||||
and $OptNames and $OptNames !~ /[:*%]/);
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'client, version');
|
||||
# $GroupBy will contain 'month' or 'client, version' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
||||
### get report type and build SQL 'SELECT' query
|
||||
my $SQLSelect;
|
||||
my $SQLGroupClause = '';
|
||||
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$SQLGroupClause = "GROUP BY client, version";
|
||||
# change $SQLOrderClause: replace everything before 'postings'
|
||||
$SQLOrderClause =~ s/BY.+postings/BY postings/;
|
||||
$SQLSelect = "'All months',LEFT(client,40),SUM(postings)";
|
||||
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
|
||||
$SQLOrderClause =~ s/postings/SUM(postings)/;
|
||||
} else {
|
||||
$SQLSelect = "month,LEFT(client,40),postings";
|
||||
};
|
||||
|
||||
### get length of longest name delivered by query
|
||||
### for formatting purposes
|
||||
my $Field = ($GroupBy eq 'month') ? 'LEFT(client,40)' : 'month';
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
|
||||
$Field,'postings',$SQLWhereClause,
|
||||
'',@SQLBindNames);
|
||||
|
||||
### build and execute SQL query
|
||||
my ($DBQuery);
|
||||
# prepare query
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
|
||||
$SQLSelect,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$SQLWhereClause,$SQLGroupClause,
|
||||
$SQLOrderClause));
|
||||
# execute query
|
||||
$DBQuery->execute(@SQLBindNames)
|
||||
or &Bleat(2,sprintf("Can't get client data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$DBI::errstr));
|
||||
|
||||
### output results
|
||||
# set default to 'pretty'
|
||||
$OptFormat = 'pretty' if !$OptFormat;
|
||||
# print captions if --caption is set
|
||||
my $LeadIn;
|
||||
if ($OptCaptions && $OptComments) {
|
||||
# print time period with report type
|
||||
my $CaptionReportType = '(number of postings for each month)';
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$CaptionReportType = '(number of all postings for that time period)';
|
||||
}
|
||||
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
# print name list if --names is set
|
||||
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
|
||||
if $OptNames;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only months fulfilling this condition)';
|
||||
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
if ($LowBound or $UppBound);
|
||||
# print primary and secondary sort order
|
||||
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
($GroupBy eq 'month') ? 'Months' : 'Names',
|
||||
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
|
||||
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
|
||||
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
|
||||
}
|
||||
|
||||
# output data
|
||||
# (changed code copy from NewsStats::OutputData)
|
||||
my ($LastIteration, $FileName, $Handle, $OUT);
|
||||
|
||||
# define output types
|
||||
my %LegalOutput;
|
||||
@LegalOutput{('dump','list','pretty')} = ();
|
||||
# bail out if format is unknown
|
||||
&Bleat(2,"Unknown output type '$OptFormat'!") if !exists($LegalOutput{$OptFormat});
|
||||
|
||||
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
|
||||
# save client for later use
|
||||
my $Client = $Key;
|
||||
# care for correct sorting order and abstract from month and keys:
|
||||
# $Caption will be $Month or $Key, according to sorting order,
|
||||
# and $Key will be $Key or $Month, respectively
|
||||
my $Caption;
|
||||
if ($GroupBy eq 'key') {
|
||||
$Caption = $Key;
|
||||
$Key = $Month;
|
||||
} else {
|
||||
$Caption = $Month;
|
||||
}
|
||||
# set output file handle
|
||||
if (!$OptFileTemplate) {
|
||||
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
|
||||
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
|
||||
close $OUT if ($LastIteration);
|
||||
# safeguards for filename creation:
|
||||
# replace potential problem characters with '_'
|
||||
$FileName = sprintf('%s-%s',$OptFileTemplate,$Caption);
|
||||
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
||||
open ($OUT,">$FileName")
|
||||
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
|
||||
$FileName));
|
||||
$Handle = $OUT;
|
||||
};
|
||||
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, $Caption,
|
||||
$Key, $Value, 0, $MaxLength, $MaxValLength, $LastIteration);
|
||||
# output client versions
|
||||
if ($OptVersions) {
|
||||
### get client versions
|
||||
# $SQLWhereClause without 'ALL' version
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
|
||||
$ExcludeSums,"version != 'ALL'","client = '$Client'",
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
|
||||
# save length of longest client
|
||||
my $ClientMaxLenght = $MaxLength;
|
||||
my $ClientMaxValLenght = $MaxValLength;
|
||||
# get length of longest version delivered by query
|
||||
# for formatting purposes
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
|
||||
'version','postings',$SQLWhereClause,
|
||||
'',@SQLBindNames);
|
||||
if ($MaxLength) {
|
||||
# add lenght of '- '
|
||||
$MaxLength += 2;
|
||||
# set to length of longest client, if longer
|
||||
$MaxLength = $ClientMaxLenght if $ClientMaxLenght > $MaxLength;
|
||||
$MaxValLength = $ClientMaxValLenght if $ClientMaxValLenght > $MaxValLength;
|
||||
}
|
||||
|
||||
# prepare query
|
||||
my $DBVersQuery = $DBHandle->prepare(sprintf('SELECT version,postings FROM %s.%s %s %s %s',
|
||||
$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$SQLWhereClause,$SQLGroupClause,
|
||||
$SQLOrderClause));
|
||||
# execute query
|
||||
$DBVersQuery->execute(@SQLBindNames)
|
||||
or &Bleat(2,sprintf("Can't get version data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$DBI::errstr));
|
||||
# output versions
|
||||
while (my ($Version, $Postings) = $DBVersQuery->fetchrow_array) {
|
||||
$Version = '- ' . $Version;
|
||||
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, '',
|
||||
$Version, $Postings, 0, $MaxLength, $MaxValLength,
|
||||
'');
|
||||
}
|
||||
}
|
||||
$LastIteration = $Caption;
|
||||
};
|
||||
close $OUT if ($OptFileTemplate);
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
clientstats - create reports on client usage
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<clientstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings
|
||||
using each client per month) taken from result tables created by
|
||||
B<gatherstats.pl>.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
=head3 Time period and names
|
||||
|
||||
The time period to act on defaults to last month; you can assign another
|
||||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
|
||||
B<clientstats> will process all clients by default; you can limit
|
||||
processing to only some clients by supplying a list of those names by
|
||||
using the B<--names> option (see below).
|
||||
|
||||
=head3 Report type
|
||||
|
||||
You can choose between different B<--report> types: postings per month
|
||||
or all postings summed up; for details, see below.
|
||||
|
||||
=head3 Upper and lower boundaries
|
||||
|
||||
Furthermore you can set an upper and/or lower boundary to exclude some
|
||||
results from output via the B<--lower> and B<--upper> options,
|
||||
respectively. By default, all clients with more and/or less postings
|
||||
per month will be excluded from the result set (i.e. not shown and
|
||||
not considered for sum reports).
|
||||
|
||||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
clients instead via the B<--group-by> option. Within those groups,
|
||||
the list of clients (or months) is sorted alphabetically
|
||||
(or chronologically, respectively) ascending. You can change that order
|
||||
(and sort by number of postings) with the B<--order-by> option. For
|
||||
details and exceptions, please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of names and number of
|
||||
postings with the B<--format> option. Captions will be added by means of
|
||||
the B<--caption> option; all comments (and captions) can be supressed by
|
||||
using B<--nocomments>.
|
||||
|
||||
Last but not least you can redirect all output to a number of files, e.g.
|
||||
one for each month, by submitting the B<--filetemplate> option, see below.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<clientstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in etc/ via Config::Auto or from a configuration file
|
||||
submitted by the B<--conffile> option.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override some configuration options via the B<--db> option.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). By using the keyword I<all> instead, you can set no
|
||||
processing period to process the whole database.
|
||||
|
||||
=item B<-n>, B<--names> I<name(s)>
|
||||
|
||||
Limit processing to a certain set of client names. I<names(s)>
|
||||
can be a single name (eternal-september.org), a group of names
|
||||
(*.inka.de) or a list of either of these, separated by colons, for
|
||||
example
|
||||
|
||||
eternal-september.org:solani.org:*.inka.de
|
||||
|
||||
=item B<-s>, B<--sums|--nosums> (sum per month)
|
||||
|
||||
Include "virtual" clients named "ALL" for every month in output,
|
||||
containing the sum of all detected clients for that month.
|
||||
|
||||
=item B<-r>, B<--report> I<default|sums>
|
||||
|
||||
Choose the report type: I<default> or I<sums>
|
||||
|
||||
By default, B<clientstats> will report the number of postings for each
|
||||
client in each month. But it can also report the total sum of postings
|
||||
per client for all months.
|
||||
|
||||
For report type I<sums>, the B<group-by> option has no meaning and
|
||||
will be silently ignored (see below).
|
||||
|
||||
=item B<-l>, B<--lower> I<lower boundary>
|
||||
|
||||
Set the lower boundary. See below.
|
||||
|
||||
=item B<-l>, B<--upper> I<upper boundary>
|
||||
|
||||
Set the upper boundary.
|
||||
|
||||
By default, all clients with more postings per month than the
|
||||
upper boundary and/or less postings per month than the lower boundary
|
||||
will be excluded from further processing. For the default report that
|
||||
means each month only /clients with a number of postings between
|
||||
the boundaries will be displayed. For the sums report, /clients
|
||||
with a number of postings exceeding the boundaries in all (!) months
|
||||
will not be considered.
|
||||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
|
||||
|
||||
By default, all results are grouped by month, sorted chronologically in
|
||||
ascending order, like this:
|
||||
|
||||
# ----- 2012-01:
|
||||
arcor-online.net : 9379
|
||||
individual.net : 19525
|
||||
news.albasani.net: 9063
|
||||
# ----- 2012-02:
|
||||
arcor-online.net : 8606
|
||||
individual.net : 16768
|
||||
news.albasani.net: 7879
|
||||
|
||||
The results can be grouped by client instead via
|
||||
B<--group-by> I<name>:
|
||||
|
||||
----- individual.net
|
||||
2012-01: 19525
|
||||
2012-02: 16768
|
||||
----- arcor-online.net
|
||||
2012-01: 9379
|
||||
2012-02: 8606
|
||||
----- news.albasani.net
|
||||
2012-01: 9063
|
||||
2012-02: 7879
|
||||
|
||||
By appending I<-desc> to the group-by option parameter, you can reverse
|
||||
the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
|
||||
# ----- 2012-02:
|
||||
arcor-online.net : 8606
|
||||
individual.net : 16768
|
||||
news.albasani.net: 7879
|
||||
# ----- 2012-01:
|
||||
arcor-online.net : 9379
|
||||
individual.net : 19525
|
||||
news.albasani.net: 9063
|
||||
|
||||
Sums reports (see above) will always be grouped by months; this option
|
||||
will therefore be ignored.
|
||||
|
||||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single client, see above),
|
||||
the report will be sorted by name (or month) in ascending alphabetical
|
||||
order by default. You can change the sort order to descending or sort
|
||||
by number of postings instead.
|
||||
|
||||
=item B<-f>, B<--format> I<pretty|list|dump>
|
||||
|
||||
Select the output format, I<pretty> being the default:
|
||||
|
||||
# ----- 2012-01:
|
||||
arcor-online.net : 9379
|
||||
individual.net : 19525
|
||||
# ----- 2012-02:
|
||||
arcor-online.net : 8606
|
||||
individual.net : 16768
|
||||
|
||||
I<list> format looks like this:
|
||||
|
||||
2012-01 arcor-online.net 9379
|
||||
2012-01 individual.net 19525
|
||||
2012-02 arcor-online.net 8606
|
||||
2012-02 individual.net 16768
|
||||
|
||||
And I<dump> format looks like this:
|
||||
|
||||
# 2012-01:
|
||||
arcor-online.net 9379
|
||||
individual.net 19525
|
||||
# 2012-02:
|
||||
arcor-online.net 8606
|
||||
individual.net 16768
|
||||
|
||||
You can remove the comments by using B<--nocomments>, see below.
|
||||
|
||||
=item B<-c>, B<--captions|--nocaptions>
|
||||
|
||||
Add captions to output, like this:
|
||||
|
||||
----- Report for 2012-01 to 2012-02 (number of postings for each month)
|
||||
----- Names: individual.net
|
||||
----- Threshold: 8000 => x (counting only month fulfilling this condition)
|
||||
----- Grouped by Month (ascending), sorted by number of postings descending
|
||||
|
||||
False by default.
|
||||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by default
|
||||
as logn as B<--filetemplate> is not set.
|
||||
|
||||
Use I<--nocomments> to suppress anything except client names or months and
|
||||
numbers of postings.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<clientstats>
|
||||
will create one file for each month (or each client, according to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or client names) to the I<filename template>, for
|
||||
example with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
... and so on
|
||||
|
||||
=item B<--db> I<database table>
|
||||
|
||||
Override I<DBTableClnts> or I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Load configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Show number of postings per group for lasth month in I<pretty> format:
|
||||
|
||||
clientstats
|
||||
|
||||
Show that report for January of 2010 and *.inka plus individual.net:
|
||||
|
||||
clientstats --month 2010-01 --names *.inka:individual.net:
|
||||
|
||||
Only show clients with 30 postings or less last month, ordered
|
||||
by number of postings, descending, in I<pretty> format:
|
||||
|
||||
clientstats --upper 30 --order-by postings-desc
|
||||
|
||||
List number of postings per host for each month of 2010 and redirect
|
||||
output to one file for each month, named hosts-2010-01 and so on, in
|
||||
machine-readable form (without formatting):
|
||||
|
||||
clientstats -m 2010-01:2010-12 -f dump --filetemplate hosts
|
||||
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/clientstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
gatherstats -h
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
|
@ -46,7 +46,7 @@ my $DBCreate = <<SQLDB;
|
|||
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
|
||||
SQLDB
|
||||
|
||||
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS);
|
||||
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS, 'DBTableClnts' => <<CLIENTS);
|
||||
--
|
||||
-- Table structure for table DBTableRaw
|
||||
--
|
||||
|
@ -102,6 +102,23 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableHosts'}` (
|
|||
KEY `host` (`host`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per server';
|
||||
HOSTS
|
||||
--
|
||||
-- Table structure for table DBTableClnts
|
||||
--
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableClnts'}` (
|
||||
`id` bigint(20) unsigned NOT NULL auto_increment,
|
||||
`month` varchar(7) character set ascii NOT NULL,
|
||||
`client` varchar(150) NOT NULL,
|
||||
`version` varchar(20) NOT NULL,
|
||||
`postings` int(11) NOT NULL,
|
||||
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `month_client_version` (`month`,`client`,`version`),
|
||||
KEY `month` (`month`),
|
||||
KEY `client` (`client`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='Postings per client';
|
||||
CLIENTS
|
||||
|
||||
##### --------------------------------------------------------------------------
|
||||
##### Installation / upgrade instructions
|
||||
|
|
|
@ -23,6 +23,8 @@ use warnings;
|
|||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
|
||||
|
||||
use DBI;
|
||||
use Data::Dumper;
|
||||
use Encode qw(decode encode);
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
|
@ -31,7 +33,7 @@ Getopt::Long::config ('bundling');
|
|||
# define types of information that can be gathered
|
||||
# all / groups (/ clients / hosts)
|
||||
my %LegalStats;
|
||||
@LegalStats{('all','groups','hosts')} = ();
|
||||
@LegalStats{('all','groups','hosts','clients')} = ();
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
|
@ -61,8 +63,8 @@ my %Conf = %{ReadConfig($OptConfFile)};
|
|||
my %ConfOverride;
|
||||
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
|
||||
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
|
||||
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
|
||||
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
|
||||
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
|
||||
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
|
@ -84,40 +86,12 @@ my ($Period) = &GetTimePeriod($OptMonth);
|
|||
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
|
||||
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
|
||||
|
||||
### reformat $Conf{'TLH'}
|
||||
my $TLH;
|
||||
if ($Conf{'TLH'}) {
|
||||
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
||||
# make a flat list again, separated by :
|
||||
if (ref($Conf{'TLH'}) eq 'ARRAY') {
|
||||
$TLH = join(':',@{$Conf{'TLH'}});
|
||||
} else {
|
||||
$TLH = $Conf{'TLH'};
|
||||
}
|
||||
# strip whitespace
|
||||
$TLH =~ s/\s//g;
|
||||
# add trailing dots if none are present yet
|
||||
# (using negative look-behind assertions)
|
||||
$TLH =~ s/(?<!\.):/.:/g;
|
||||
$TLH =~ s/(?<!\.)$/./;
|
||||
# check for illegal characters
|
||||
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
||||
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
|
||||
# escape dots
|
||||
$TLH =~ s/\./\\./g;
|
||||
if ($TLH =~ /:/) {
|
||||
# reformat $TLH from a:b to (a)|(b),
|
||||
# e.g. replace ':' by ')|('
|
||||
$TLH =~ s/:/)|(/g;
|
||||
$TLH = '(' . $TLH . ')';
|
||||
};
|
||||
};
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
|
||||
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
|
||||
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
|
||||
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
|
||||
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
|
||||
my $DBClients = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableClnts'});
|
||||
|
||||
### get data for each month
|
||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
||||
|
@ -127,6 +101,34 @@ foreach my $Month (&ListMonth($Period)) {
|
|||
|
||||
### GroupStats
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||
### reformat $Conf{'TLH'}
|
||||
my $TLH;
|
||||
if ($Conf{'TLH'}) {
|
||||
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
||||
# make a flat list again, separated by :
|
||||
if (ref($Conf{'TLH'}) eq 'ARRAY') {
|
||||
$TLH = join(':',@{$Conf{'TLH'}});
|
||||
} else {
|
||||
$TLH = $Conf{'TLH'};
|
||||
}
|
||||
# strip whitespace
|
||||
$TLH =~ s/\s//g;
|
||||
# add trailing dots if none are present yet
|
||||
# (using negative look-behind assertions)
|
||||
$TLH =~ s/(?<!\.):/.:/g;
|
||||
$TLH =~ s/(?<!\.)$/./;
|
||||
# check for illegal characters
|
||||
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
||||
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
|
||||
# escape dots
|
||||
$TLH =~ s/\./\\./g;
|
||||
if ($TLH =~ /:/) {
|
||||
# reformat $TLH from a:b to (a)|(b),
|
||||
# e.g. replace ':' by ')|('
|
||||
$TLH =~ s/:/)|(/g;
|
||||
$TLH = '(' . $TLH . ')';
|
||||
};
|
||||
};
|
||||
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
|
||||
};
|
||||
|
||||
|
@ -138,7 +140,16 @@ foreach my $Month (&ListMonth($Period)) {
|
|||
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
|
||||
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
|
||||
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
|
||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
||||
};
|
||||
|
||||
### ClientStats
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'clients') {
|
||||
# define agents/clients that shouldn't be counted
|
||||
my @DropAgents = qw(debian fedora firefox gecko gentoo lightning mandriva mnenhy mozilla
|
||||
pclinuxos perl php presto suse suse/opensuse thunderbrowse ubuntu version);
|
||||
push(@DropAgents, 'red hat');
|
||||
&ClientStats($DBHandle,$DBRaw,$DBClients,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@DropAgents);
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -154,10 +165,10 @@ sub GroupStats {
|
|||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBGrps : database table for groups data (to write to)
|
||||
### $Month : current month to do
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $TLH : TLHs to collect
|
||||
### $Checkgroupsfile : filename template for checkgroups file
|
||||
### (expanded to $Checkgroupsfile-$Month)
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### OUT: (nothing)
|
||||
|
@ -252,46 +263,24 @@ sub HostStats {
|
|||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBHosts : database table for hosts data (to write to)
|
||||
### $Month : current month to do
|
||||
### $TLH : TLHs to collect
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### @KnownHosts : list of known hosts with subdomains
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
|
||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$TLH,$MID,$Test,$Debug,@KnownHosts) = @_;
|
||||
|
||||
my (%Postings,$DBQuery);
|
||||
|
||||
if (!$MID) {
|
||||
# get raw header data from raw table for given month
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$DBRaw));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBRaw));
|
||||
} else {
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
|
||||
"WHERE mid = ?", $DBRaw));
|
||||
$DBQuery->execute($MID)
|
||||
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
|
||||
"$DBI::errstr\n",$MID,$DBRaw));
|
||||
}
|
||||
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
|
||||
|
||||
### ----------------------------------------------
|
||||
print "----- HostStats -----\n" if $Debug;
|
||||
### parse headers
|
||||
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
||||
### skip postings with wrong TLH
|
||||
# remove whitespace from contents of Newsgroups:
|
||||
chomp($Newsgroups);
|
||||
$Newsgroups =~ s/\s//;
|
||||
my $GroupCount;
|
||||
for (split /,/, $Newsgroups) {
|
||||
# don't count newsgroup/hierarchy in wrong TLH
|
||||
next if($TLH and !/^$TLH/);
|
||||
$GroupCount++;
|
||||
};
|
||||
next if !$GroupCount;
|
||||
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
|
||||
|
||||
my $Host;
|
||||
my %Header = ParseHeaders(split(/\n/,$Headers));
|
||||
|
@ -344,15 +333,13 @@ sub HostStats {
|
|||
}
|
||||
}
|
||||
|
||||
# lowercase
|
||||
$Host = lc($Host);
|
||||
|
||||
# count host
|
||||
if ($Host) {
|
||||
$Host = lc($Host);
|
||||
$Postings{$Host}++;
|
||||
$Postings{'ALL'}++;
|
||||
} else {
|
||||
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
|
||||
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
|
||||
}
|
||||
|
||||
printf("%s: %s\n", $Header{'message-id'}, $Host) if ($MID or $Debug && $Debug >1);
|
||||
|
@ -381,6 +368,357 @@ sub HostStats {
|
|||
};
|
||||
};
|
||||
|
||||
sub ClientStats {
|
||||
### ----------------------------------------------------------------------------
|
||||
### collect number of postings per client (and version)
|
||||
### IN : $DBHandle : database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBClients : database table for clients data (to write to)
|
||||
### $Month : current month to do
|
||||
### $TLH : TLHs to collect
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### @DropAgents : list of UserAgent "agents" that won't be counted
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBClients,$Month,$TLH,$MID,$Test,$Debug,@DropAgents) = @_;
|
||||
|
||||
my (%Postings,$DBQuery);
|
||||
my %DropAgent = map { $_ => 1 } @DropAgents;
|
||||
|
||||
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
|
||||
|
||||
### ----------------------------------------------
|
||||
print "----- ClientStats -----\n" if $Debug;
|
||||
### parse headers
|
||||
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
||||
### skip postings with wrong TLH
|
||||
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
|
||||
|
||||
my (@Clients, $Client, $Version);
|
||||
my %Header = ParseHeaders(split(/\n/,$Headers));
|
||||
|
||||
### X-Mailer
|
||||
if ($Header{'x-mailer'}) {
|
||||
# transfer to x-newsreader and parse from there
|
||||
$Header{'x-newsreader'} = $Header{'x-mailer'};
|
||||
}
|
||||
### X-Newsreader
|
||||
if ($Header{'x-newsreader'}) {
|
||||
$Header{'x-newsreader'} = RemoveComments($Header{'x-newsreader'});
|
||||
# remove 'http://' and 'via' (CrossPoint)
|
||||
$Header{'x-newsreader'} =~ s/https?:\/\///;
|
||||
$Header{'x-newsreader'} =~ s/ ?via(.+)?$//;
|
||||
# parse header
|
||||
# User-Agent style
|
||||
if ($Header{'x-newsreader'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
|
||||
# transfer to user-agent and parse from there
|
||||
$Header{'user-agent'} = $Header{'x-newsreader'};
|
||||
# "client name version"
|
||||
} elsif ($Header{'x-newsreader'} =~ / /) {
|
||||
($Client, $Version) = ParseXNewsreader($Header{'x-newsreader'});
|
||||
} else {
|
||||
$Client = $Header{'x-newsreader'};
|
||||
$Version = '';
|
||||
}
|
||||
if ($Client) {
|
||||
# special cases
|
||||
$Client = 'CrossPoint' if $Client =~ /^CrossPoint\//;
|
||||
$Client = 'Virtual Access' if $Client =~ /^Virtual Access/;
|
||||
my %UserAgent = (agent => $Client,
|
||||
version => $Version);
|
||||
push @Clients, { %UserAgent };
|
||||
} else {
|
||||
$Header{'user-agent'} = $Header{'x-newsreader'};
|
||||
}
|
||||
}
|
||||
### User-Agent
|
||||
if(!@Clients && $Header{'user-agent'}) {
|
||||
$Header{'user-agent'} = RemoveComments($Header{'user-agent'});
|
||||
### well-formed?
|
||||
if ($Header{'user-agent'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
|
||||
@Clients = ParseUserAgent($Header{'user-agent'});
|
||||
} else {
|
||||
# snip and add known well-formed agents from the trailing end
|
||||
while ($Header{'user-agent'} =~ /(((Hamster)|(Hamster-Pg)|(KorrNews)|(OE-Tools)|(Mime-proxy))(\/[^\/ ]+))$/) {
|
||||
push @Clients, ParseUserAgent($1);
|
||||
$Header{'user-agent'} =~ s/ [^\/ ]+\/[^\/ ]+$//;
|
||||
}
|
||||
### special cases
|
||||
# remove 'http://open-news-network.org'
|
||||
$Header{'user-agent'} =~ s/^https?:\/\/open-news-network.org(\S+)?//;
|
||||
# Thunderbird
|
||||
if ($Header{'user-agent'} =~ /((Mozilla[- ])?Thunderbird) ?([0-9.]+)?/) {
|
||||
$Client = 'Thunderbird';
|
||||
$Version = $3;
|
||||
# XP
|
||||
} elsif ($Header{'user-agent'} =~ /((TrueXP|FreeXP|XP2(\/Agent)?)) \/(.+)$/) {
|
||||
$Client = $1;
|
||||
$Version = $4;
|
||||
$Client = 'XP2' if $Client eq 'XP2/Agent';
|
||||
### most general case
|
||||
# client version
|
||||
# client/version
|
||||
# client/32 version
|
||||
# - version may end in one non-numeric character
|
||||
# - including trailing beta/pre/...
|
||||
# 1) client: (([^0-9]+)|(\D+\/\d+))
|
||||
# 2) version: (\S+\d\D?)
|
||||
# 3) trailing: (( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?
|
||||
} elsif ($Header{'user-agent'} =~ /^(([^0-9]+)|(\D+\/\d+))[\/ ]((\S+\d\D?)(( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?)$/) {
|
||||
$Client = $1;
|
||||
$Version = $4;
|
||||
### some very special cases
|
||||
# SeaMonkey/nn
|
||||
} elsif ($Header{'user-agent'} =~ /SeaMonkey\/([0-9.]+)/) {
|
||||
$Client = 'Seamonkey';
|
||||
$Version = $1;
|
||||
# Emacs nn/Gnus nn
|
||||
} elsif ($Header{'user-agent'} =~ /Emacs [0-9.]+\/Gnus ([0-9.]+)/) {
|
||||
$Client = 'Gnus';
|
||||
$Version = $1;
|
||||
# failed to parse
|
||||
} else {
|
||||
$Client = $Header{'user-agent'};
|
||||
}
|
||||
# count client, if found
|
||||
if ($Client) {
|
||||
my %UserAgent = (agent => $Client,
|
||||
version => $Version);
|
||||
push @Clients, { %UserAgent };
|
||||
} else {
|
||||
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !@Clients;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (@Clients) {
|
||||
$Postings{'ALL'}{'ALL'}++;
|
||||
foreach (@Clients) {
|
||||
# filter agents for User-Agent with multiple agents
|
||||
next if $#Clients && exists($DropAgent{lc($_->{'agent'})});
|
||||
# encode to utf-8, if necessary
|
||||
$_->{'agent'} = encode('UTF-8', $_->{'agent'}) if $_->{'agent'} =~ /[\x80-\x{ffff}]/;
|
||||
$_->{'version'} = encode('UTF-8', $_->{'version'}) if $_->{'version'} and $_->{'version'} =~ /[\x80-\x{ffff}]/;
|
||||
# special cases
|
||||
# Mozilla
|
||||
$_->{'agent'} = 'Mozilla' if $_->{'agent'} eq '•Mozilla';
|
||||
$_->{'agent'} =~ s/^Mozilla //;
|
||||
# Forte Agent
|
||||
$_->{'agent'} = 'Forte Agent' if $_->{'agent'} eq 'ForteAgent';
|
||||
if ($_->{'agent'} eq 'Forte Agent') {
|
||||
$_->{'version'} =~ s/-/\//;
|
||||
$_->{'version'} = '' if $_->{'version'} eq '32Bit';
|
||||
}
|
||||
# count client ('ALL') and client/version (if version is present)
|
||||
$Postings{$_->{'agent'}}{'ALL'}++;
|
||||
$Postings{$_->{'agent'}}{$_->{'version'}}++ if $_->{'version'};
|
||||
|
||||
printf("%s: %s {%s}\n", $Header{'message-id'}, $_->{'agent'},
|
||||
$_->{'version'} ? $Postings{$_->{'agent'}}{$_->{'version'}} : '')
|
||||
if ($MID or $Debug && $Debug >1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$Test) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
|
||||
$DBClients),undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old client data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBClients));
|
||||
};
|
||||
|
||||
foreach my $Client (sort keys %Postings) {
|
||||
foreach my $Version (sort keys %{$Postings{$Client}}) {
|
||||
printf ("%s {%s}: %d\n",$Client,$Version,$Postings{$Client}{$Version}) if $Debug;
|
||||
|
||||
if (!$Test) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
|
||||
"(month,client,version,postings) ".
|
||||
"VALUES (?, ?, ?, ?)",$DBClients));
|
||||
$DBQuery->execute($Month, $Client, $Version, $Postings{$Client}{$Version})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s/%s to %s: ".
|
||||
"$DBI::errstr\n",$Month,$Client,$Version,$DBClients));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
sub GetHeaders {
|
||||
### ----------------------------------------------------------------------------
|
||||
### get (newsgroups and) raw headers from database
|
||||
### IN : $DBHandle: database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $Month : current month to do
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### OUT: DBI statement handle
|
||||
my ($DBHandle,$DBRaw,$Month,$MID) = @_;
|
||||
|
||||
my $DBQuery;
|
||||
|
||||
if (!$MID) {
|
||||
# get raw header data from raw table for given month
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$DBRaw));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBRaw));
|
||||
} else {
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
|
||||
"WHERE mid = ?", $DBRaw));
|
||||
$DBQuery->execute($MID)
|
||||
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
|
||||
"$DBI::errstr\n",$MID,$DBRaw));
|
||||
}
|
||||
return $DBQuery;
|
||||
}
|
||||
|
||||
sub CheckTLH {
|
||||
### ----------------------------------------------------------------------------
|
||||
### count newsgroups from legal TLH(s)
|
||||
### IN : $Newsgroups: comma separated list of newsgroups
|
||||
### $TLH : (reference to an array of) legal TLH(s)
|
||||
### OUT: number of newsgroups from legal TLH(s)
|
||||
my ($Newsgroups,$TLH) = @_;
|
||||
|
||||
my (@TLH,$GroupCount);
|
||||
|
||||
# fill @TLH from $TLH, which can be an array reference or a scalar value
|
||||
if (ref($TLH) eq 'ARRAY') {
|
||||
@TLH = @{$TLH};
|
||||
} else {
|
||||
push @TLH, $TLH;
|
||||
}
|
||||
|
||||
# remove whitespace from contents of Newsgroups:
|
||||
chomp($Newsgroups);
|
||||
$Newsgroups =~ s/\s//;
|
||||
for (split /,/, $Newsgroups) {
|
||||
my $Newsgroup = $_;
|
||||
foreach (@TLH) {
|
||||
# increment $GroupCount if $Newsgroup starts with $TLH
|
||||
$GroupCount++ if $Newsgroup =~ /^$_/;
|
||||
}
|
||||
};
|
||||
|
||||
return $GroupCount;
|
||||
}
|
||||
|
||||
sub RemoveComments {
|
||||
### ----------------------------------------------------------------------------
|
||||
### remove comments and other junk from header
|
||||
### IN : $Header: a header
|
||||
### OUT: the header, with comments and other junk removed
|
||||
my $Header = shift;
|
||||
|
||||
# decode MIME encoded words
|
||||
if ($Header =~ /=\?\S+\?[BQ]\?/) {
|
||||
$Header = decode("MIME-Header",$Header);
|
||||
}
|
||||
|
||||
# remove nested comments from '(' to first ')'
|
||||
while ($Header =~ /\([^)]+\)/) {
|
||||
$Header =~ s/\([^()]+?\)//;
|
||||
}
|
||||
|
||||
# remove dangling ')'
|
||||
$Header =~ s/\S+\)//;
|
||||
|
||||
# remove from dangling '(' to end of header
|
||||
$Header =~ s/\(.+$//;
|
||||
|
||||
# remove from '[' to first ']'
|
||||
$Header =~ s/\[[^\[\]]+?\]//;
|
||||
|
||||
# remove 'Nr. ... lebt'
|
||||
$Header =~ s/Nr\. \d+ lebt//;
|
||||
|
||||
# remove nn:nn:nn
|
||||
$Header =~ s/\d\d:\d\d:\d\d//;
|
||||
|
||||
# remove 'mm/... '
|
||||
$Header =~ s/\/mm\/\S+//;
|
||||
|
||||
# remove ' DE' / _DE'
|
||||
$Header =~ s/[ _]DE//;
|
||||
|
||||
# remove trailing 'eol' or '-shl'
|
||||
$Header =~ s/(eol)|(-shl)$//;
|
||||
|
||||
# remove from ';' or ',' (CrossPoint)
|
||||
# or '&' to end of header
|
||||
$Header =~ s/[;,&].+$//;
|
||||
|
||||
# remove from 'by ' or 'unter Windows' or '@ Windows'
|
||||
# to end of header
|
||||
$Header =~ s/((by )|(unter +Windows)|(@ Windows)).+$//;
|
||||
|
||||
# remove superfluous whitespace in header
|
||||
# and whitespace around header
|
||||
$Header =~ s/\s+/ /g;
|
||||
$Header =~ s/^\s+//;
|
||||
$Header =~ s/\s+$//;
|
||||
|
||||
return $Header;
|
||||
}
|
||||
|
||||
sub ParseXNewsreader {
|
||||
### ----------------------------------------------------------------------------
|
||||
### parse X-Newsreader header (client and version, if present)
|
||||
### IN : $XNR: a X-Newsreader header
|
||||
### OUT: client and version, if present
|
||||
my $XNR = shift;
|
||||
|
||||
my ($Client, $Version);
|
||||
|
||||
foreach (split(/ /,$XNR)) {
|
||||
# add to client name if no digit present
|
||||
if (!/\d[0-9.]/ or /\/\d$/) {
|
||||
$Client .= $_ . ' ' ;
|
||||
# otherwise, use as version and terminate parsing
|
||||
} else {
|
||||
$Version = $_;
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# remove trailing whitespace
|
||||
$Client =~ s/\s+$// if $Client;
|
||||
|
||||
# set $Version
|
||||
$Version = '' if !$Version;
|
||||
|
||||
return $Client, $Version;
|
||||
}
|
||||
|
||||
|
||||
sub ParseUserAgent {
|
||||
### ----------------------------------------------------------------------------
|
||||
### parse User-Agent header (agent and version)
|
||||
### IN : $UserAgent: a User-Agent header
|
||||
### OUT: array of hashes (agent/version)
|
||||
my $UserAgent = shift;
|
||||
|
||||
my @UserAgents;
|
||||
|
||||
# a well-formed User-Agent header will contain pairs of
|
||||
# client/version, i.e. 'slrn/0.9.7.3'
|
||||
foreach (split(/ /,$UserAgent)) {
|
||||
my %UserAgent;
|
||||
/^(.+)\/(.+)$/;
|
||||
$UserAgent{'agent'} = $1;
|
||||
$UserAgent{'version'} = $2;
|
||||
push @UserAgents, { %UserAgent };
|
||||
}
|
||||
|
||||
return @UserAgents;
|
||||
}
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
@ -391,7 +729,7 @@ gatherstats - process statistical data from a raw source
|
|||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--conffile> I<filename>]
|
||||
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
|
@ -438,12 +776,23 @@ override that default through the B<--groupsdb> option.
|
|||
=item B<hosts> (postings from host per month)
|
||||
|
||||
B<gatherstats> will examine Injection-Info:, X-Trace: and Path:
|
||||
headers and try to normalize them. Groups not in I<TLH> will be
|
||||
ignored. The sum of all detected hosts will also saved for each month.
|
||||
headers and try to normalize them. The sum of all detected hosts will
|
||||
also be saved for each month. Groups not in I<TLH> will be ignored.
|
||||
|
||||
Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--hostsdb> option.
|
||||
|
||||
=item B<clients> (postings by client per month)
|
||||
|
||||
B<gatherstats> will examine User-Agent:, X-Newsreader: and X-Mailer:
|
||||
headers and try to remove comments and non-standard contents. Clients
|
||||
and client versions are counted separately. The sum of all detected
|
||||
clients will also be saved for each month. Groups not in I<TLH> will
|
||||
be ignored.
|
||||
|
||||
Data is written to I<DBTableClnts> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--clientsdb> option.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Configuration
|
||||
|
@ -511,10 +860,12 @@ Newsgroups not found in the checkgroups file will be dropped (and
|
|||
logged to STDERR), and newsgroups found there but having no postings
|
||||
will be added with a count of 0 (and logged to STDERR).
|
||||
|
||||
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
|
||||
=item B<--hierarchy> I<TLH> (newsgroup hierarchy/hierarchies)
|
||||
|
||||
Override I<TLH> from F<newsstats.conf>.
|
||||
|
||||
I<TLH> can be a single word or a comma-separated list.
|
||||
|
||||
=item B<--rawdb> I<table> (raw data table)
|
||||
|
||||
Override I<DBTableRaw> from F<newsstats.conf>.
|
||||
|
@ -523,14 +874,14 @@ Override I<DBTableRaw> from F<newsstats.conf>.
|
|||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=item B<--clientsdb> I<table> (client data table)
|
||||
|
||||
Override I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--hostsdb> I<table> (host data table)
|
||||
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--clientsdb> I<table> (client data table)
|
||||
|
||||
Override I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Load configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
|
|
@ -685,7 +685,7 @@ L<doc/README>
|
|||
|
||||
=item -
|
||||
|
||||
l>doc/INSTALL>
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# cliservstats.pl
|
||||
# hoststats.pl
|
||||
#
|
||||
# This script will get statistical data on client (newsreader) and
|
||||
# server (host) usage from a database.
|
||||
# This script will get statistical data on server (host) usage
|
||||
# from a database.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
|
@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
|
|||
### read commandline options
|
||||
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
|
||||
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
|
||||
$OptReportType,$OptSums,$OptType,$UppBound,$OptConfFile);
|
||||
$OptReportType,$OptSums,$UppBound,$OptConfFile);
|
||||
GetOptions ('c|captions!' => \$OptCaptions,
|
||||
'comments!' => \$OptComments,
|
||||
'db=s' => \$OptDB,
|
||||
|
@ -44,7 +44,6 @@ GetOptions ('c|captions!' => \$OptCaptions,
|
|||
'o|order-by=s' => \$OptOrderBy,
|
||||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
't|type=s' => \$OptType,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
|
@ -52,15 +51,6 @@ GetOptions ('c|captions!' => \$OptCaptions,
|
|||
# parse parameters
|
||||
# $OptComments defaults to TRUE if --filetemplate is not used
|
||||
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
|
||||
# parse $OptType
|
||||
if ($OptType) {
|
||||
if ($OptType =~ /(host|server)s?/i) {
|
||||
$OptType = 'host';
|
||||
} elsif ($OptType =~ /(newsreader|client)s?/i) {
|
||||
$OptType = 'client';
|
||||
}
|
||||
}
|
||||
&Bleat(2, "Please use '--type server' or '-type newsreader'.") if !$OptType;
|
||||
# parse $OptReportType
|
||||
if ($OptReportType) {
|
||||
if ($OptReportType =~ /sums?/i) {
|
||||
|
@ -74,14 +64,8 @@ if ($OptReportType) {
|
|||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### set DBTable
|
||||
if ($OptDB) {
|
||||
$Conf{'DBTable'} = $OptDB;
|
||||
}
|
||||
elsif ($OptType eq 'host') {
|
||||
$Conf{'DBTable'} = $Conf{'DBTableHosts'};
|
||||
} else {
|
||||
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
|
||||
}
|
||||
$Conf{'DBTable'} = $Conf{'DBTableHosts'};
|
||||
$Conf{'DBTable'} = $OptDB if $OptDB;
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
@ -97,14 +81,14 @@ my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
|||
# with placeholders as well as a list of names to bind to them
|
||||
my ($SQLWhereNames,@SQLBindNames);
|
||||
if ($OptNames) {
|
||||
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,$OptType);
|
||||
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'host');
|
||||
# bail out if --names is invalid
|
||||
&Bleat(2,"--names option has an invalid format!")
|
||||
if !$SQLWhereNames;
|
||||
}
|
||||
|
||||
### build SQL WHERE clause
|
||||
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",$OptType);
|
||||
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",'host');
|
||||
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
|
||||
$ExcludeSums,
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
|
@ -118,8 +102,8 @@ $OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
|
|||
and $OptNames and $OptNames !~ /[:*%]/);
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, $OptType);
|
||||
# $GroupBy will contain 'month' or 'host'/'client' (parsed result of $OptGroupBy)
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'host');
|
||||
# $GroupBy will contain 'month' or 'host' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
||||
|
@ -128,19 +112,19 @@ my $SQLSelect;
|
|||
my $SQLGroupClause = '';
|
||||
my $Precision = 0; # number of digits right of decimal point for output
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$SQLGroupClause = "GROUP BY $OptType";
|
||||
$SQLGroupClause = "GROUP BY host";
|
||||
# change $SQLOrderClause: replace everything before 'postings'
|
||||
$SQLOrderClause =~ s/BY.+postings/BY postings/;
|
||||
$SQLSelect = "'All months',$OptType,SUM(postings)";
|
||||
$SQLSelect = "'All months',host,SUM(postings)";
|
||||
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
|
||||
$SQLOrderClause =~ s/postings/SUM(postings)/;
|
||||
} else {
|
||||
$SQLSelect = "month,$OptType,postings";
|
||||
$SQLSelect = "month,host,postings";
|
||||
};
|
||||
|
||||
### get length of longest name delivered by query
|
||||
### for formatting purposes
|
||||
my $Field = ($GroupBy eq 'month') ? $OptType : 'month';
|
||||
my $Field = ($GroupBy eq 'month') ? 'host' : 'month';
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
|
||||
$Field,'postings',$SQLWhereClause,
|
||||
'',@SQLBindNames);
|
||||
|
@ -155,8 +139,8 @@ $DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
|
|||
$SQLOrderClause));
|
||||
# execute query
|
||||
$DBQuery->execute(@SQLBindNames)
|
||||
or &Bleat(2,sprintf("Can't get %s data for %s from %s.%s: %s\n",
|
||||
$OptType,$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
or &Bleat(2,sprintf("Can't get host data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$DBI::errstr));
|
||||
|
||||
### output results
|
||||
|
@ -175,7 +159,7 @@ if ($OptCaptions && $OptComments) {
|
|||
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
|
||||
if $OptNames;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only month fulfilling this condition)';
|
||||
my $CaptionBoundary= '(counting only months fulfilling this condition)';
|
||||
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
|
@ -201,11 +185,11 @@ __END__
|
|||
|
||||
=head1 NAME
|
||||
|
||||
cliservstats - create reports on host or client usage
|
||||
hoststats - create reports on host usage
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<cliservstats> B<-t> I<host|client> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)|client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
|
||||
B<hoststats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
|
@ -214,8 +198,7 @@ See L<doc/README>.
|
|||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings from
|
||||
each host or using each client per month) taken from result tables
|
||||
created by B<gatherstats.pl>.
|
||||
each host) taken from result tables created by B<gatherstats.pl>.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
|
@ -225,9 +208,9 @@ The time period to act on defaults to last month; you can assign another
|
|||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
|
||||
B<cliservstats> will process all hosts or clients by default; you can
|
||||
limit processing to only some hosts or clients by supplying a list of
|
||||
those names by using the B<--names> option (see below).
|
||||
B<hoststats> will process all hosts by default; you can limit
|
||||
processing to only some hosts by supplying a list of those names by
|
||||
using the B<--names> option (see below).
|
||||
|
||||
=head3 Report type
|
||||
|
||||
|
@ -238,18 +221,18 @@ or all postings summed up; for details, see below.
|
|||
|
||||
Furthermore you can set an upper and/or lower boundary to exclude some
|
||||
results from output via the B<--lower> and B<--upper> options,
|
||||
respectively. By default, all hosts/clients with more and/or less
|
||||
postings per month will be excluded from the result set (i.e. not
|
||||
shown and not considered forsum reports).
|
||||
respectively. By default, all hosts with more and/or less postings
|
||||
per month will be excluded from the result set (i.e. not shown and
|
||||
not considered for sum reports).
|
||||
|
||||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
hosts/clients instead via the B<--group-by> option. Within those
|
||||
groups, the list of hosts/clients (or months) is sorted alphabetically
|
||||
(or chronologically, respectively) ascending. You can change that order
|
||||
(and sort by number of postings) with the B<--order-by> option. For
|
||||
details and exceptions, please see below.
|
||||
hosts instead via the B<--group-by> option. Within those groups, the
|
||||
list of hosts (or months) is sorted alphabetically (or chronologically,
|
||||
respectively) ascending. You can change that order (and sort by number
|
||||
of postings) with the B<--order-by> option. For details and exceptions,
|
||||
please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of names and number of
|
||||
|
@ -262,7 +245,7 @@ one for each month, by submitting the B<--filetemplate> option, see below.
|
|||
|
||||
=head2 Configuration
|
||||
|
||||
B<cliservstats> will read its configuration from F<newsstats.conf>
|
||||
B<hoststats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in etc/ via Config::Auto or from a configuration file
|
||||
submitted by the B<--conffile> option.
|
||||
|
||||
|
@ -282,11 +265,6 @@ Print out version and copyright information and exit.
|
|||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-t>, B<--type> I<host|client>
|
||||
|
||||
Create report for hosts (servers) or clients (newsreaders), using
|
||||
I<DBTableHosts> or I<DBTableClnts> respectively.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
|
@ -296,7 +274,7 @@ processing period to process the whole database.
|
|||
|
||||
=item B<-n>, B<--names> I<name(s)>
|
||||
|
||||
Limit processing to a certain set of host or client names. I<names(s)>
|
||||
Limit processing to a certain set of hostnames. I<names(s)>
|
||||
can be a single name (eternal-september.org), a group of names
|
||||
(*.inka.de) or a list of either of these, separated by colons, for
|
||||
example
|
||||
|
@ -312,9 +290,9 @@ containing the sum of all detected hosts for that month.
|
|||
|
||||
Choose the report type: I<default> or I<sums>
|
||||
|
||||
By default, B<cliservstats> will report the number of postings for each
|
||||
host/client in each month. But it can also report the total sum of postings
|
||||
per host/client for all months.
|
||||
By default, B<hoststats> will report the number of postings for each
|
||||
host in each month. But it can also report the total sum of postings
|
||||
per host for all months.
|
||||
|
||||
For report type I<sums>, the B<group-by> option has no meaning and
|
||||
will be silently ignored (see below).
|
||||
|
@ -327,13 +305,13 @@ Set the lower boundary. See below.
|
|||
|
||||
Set the upper boundary.
|
||||
|
||||
By default, all hosts/clients with more postings per month than the
|
||||
upper boundary and/or less postings per month than the lower boundary
|
||||
By default, all hosts with more postings per month than the upper
|
||||
boundary and/or less postings per month than the lower boundary
|
||||
will be excluded from further processing. For the default report that
|
||||
means each month only hosts/clients with a number of postings between
|
||||
the boundaries will be displayed. For the sums report, hosts/clients
|
||||
with a number of postings exceeding the boundaries in all (!) months
|
||||
will not be considered.
|
||||
means each month only hosts with a number of postings between the
|
||||
boundaries will be displayed. For the sums report, hosts with a number
|
||||
of postings exceeding the boundaries in all (!) months will not be
|
||||
considered.
|
||||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
|
||||
|
||||
|
@ -349,8 +327,7 @@ ascending order, like this:
|
|||
individual.net : 16768
|
||||
news.albasani.net: 7879
|
||||
|
||||
The results can be grouped by host/client instead via
|
||||
B<--group-by> I<name>:
|
||||
The results can be grouped by host instead via B<--group-by> I<name>:
|
||||
|
||||
----- individual.net
|
||||
2012-01: 19525
|
||||
|
@ -379,8 +356,8 @@ will therefore be ignored.
|
|||
|
||||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single host/client, see above),
|
||||
the report will be sorted by name (or month) in ascending alphabetical
|
||||
Within each group (a single month or single host, see above), the
|
||||
report will be sorted by host (or month) in ascending alphabetical
|
||||
order by default. You can change the sort order to descending or sort
|
||||
by number of postings instead.
|
||||
|
||||
|
@ -426,19 +403,19 @@ False by default.
|
|||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by default
|
||||
as logn as B<--filetemplate> is not set.
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by
|
||||
default as long as B<--filetemplate> is not set.
|
||||
|
||||
Use I<--nocomments> to suppress anything except host/client names or months and
|
||||
numbers of postings.
|
||||
Use I<--nocomments> to suppress anything except host names or months
|
||||
and numbers of postings.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<cliservstats> will
|
||||
create one file for each month (or each host/client, accordant to the
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<hoststats>
|
||||
will create one file for each month (or each host, according to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or host/client names) to the I<filename template>, for
|
||||
example with B<--filetemplate> I<stats>:
|
||||
year and month (or hostnames) to the I<filename template>, for example
|
||||
with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
|
@ -446,7 +423,7 @@ example with B<--filetemplate> I<stats>:
|
|||
|
||||
=item B<--db> I<database table>
|
||||
|
||||
Override I<DBTableHosts> or I<DBTableClnts> from F<newsstats.conf>.
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
|
@ -462,29 +439,28 @@ See L<doc/INSTALL>.
|
|||
|
||||
Show number of postings per group for lasth month in I<pretty> format:
|
||||
|
||||
cliservstats --type host
|
||||
hoststats
|
||||
|
||||
Show that report for January of 2010 and *.inka plus individual.net:
|
||||
|
||||
cliservstats --type host --month 2010-01 --names *.inka:individual.net:
|
||||
hoststats --month 2010-01 --names *.inka:individual.net:
|
||||
|
||||
Only show clients with 30 postings or less last month, ordered
|
||||
Only show hosts with 30 postings or less last month, ordered
|
||||
by number of postings, descending, in I<pretty> format:
|
||||
|
||||
cliservstats --type client --upper 30 --order-by postings-desc
|
||||
hoststats --upper 30 --order-by postings-desc
|
||||
|
||||
List number of postings per host for each month of 2010 and redirect
|
||||
output to one file for each month, named hosts-2010-01 and so on, in
|
||||
machine-readable form (without formatting):
|
||||
|
||||
cliservstats -t host -m 2010-01:2010-12 -f dump --filetemplate hosts
|
||||
|
||||
hoststats -m 2010-01:2010-12 -f dump --filetemplate hosts
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/cliservstats.pl>
|
||||
=item F<bin/hoststats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
|
@ -513,7 +489,7 @@ L<doc/README>
|
|||
|
||||
=item -
|
||||
|
||||
l>doc/INSTALL>
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
|
@ -15,7 +15,8 @@
|
|||
#
|
||||
# Usage:
|
||||
# $~ groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
|
||||
# $~ cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
|
||||
# $~ hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts
|
||||
# $~ clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
|
@ -53,19 +54,22 @@ if (!$Type) {
|
|||
$Type = 'GroupStats';
|
||||
} elsif ($Type =~ /(host|server)s?/i) {
|
||||
$Type = 'HostStats';
|
||||
} elsif ($Type =~ /(client|reader)s?/i) {
|
||||
$Type = 'ClientStats';
|
||||
};
|
||||
my $Timestamp = time;
|
||||
|
||||
##### ----- configuration --------------------------------------------
|
||||
my $TLH = 'de';
|
||||
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
|
||||
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month
|
||||
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
|
||||
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month,
|
||||
'ClientStats' => 'Newsreaderstatistik fuer de.* im Monat '.$Month
|
||||
);
|
||||
my %TH = ('counter' => 'Nr.',
|
||||
'value' => 'Anzahl',
|
||||
'percentage' => 'Prozent'
|
||||
);
|
||||
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN);
|
||||
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN, 'ClientStats' => <<CLIENTSIN);
|
||||
From: Thomas Hochstein <thh\@thh.name>
|
||||
Newsgroups: local.test
|
||||
Subject: Postingstatistik fuer de.* im Monat $Month
|
||||
|
@ -88,7 +92,18 @@ Content-Transfer-Encoding: 7bit
|
|||
User-Agent: postingstats.pl/$VERSION (NewsStats)
|
||||
|
||||
HOSTSIN
|
||||
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT);
|
||||
From: Thomas Hochstein <thh\@thh.name>
|
||||
Newsgroups: local.test
|
||||
Subject: Newsreaderstatistik fuer de.* im Monat $Month
|
||||
Message-ID: <destat-clients-$Month.$Timestamp\@mid.news.szaf.org>
|
||||
Approved: thh\@thh.name
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
User-Agent: postingstats.pl/$VERSION (NewsStats)
|
||||
|
||||
CLIENTSIN
|
||||
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT, 'ClientStats' => <<CLIENTSOUT);
|
||||
|
||||
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
|
||||
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
|
||||
|
@ -110,6 +125,19 @@ wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
|
|||
Loeschnachricht) erreicht haben.
|
||||
HOSTSOUT
|
||||
|
||||
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
|
||||
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
|
||||
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
|
||||
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
|
||||
Loeschnachricht) erreicht haben. Versionsangaben werden nur gezaehlt,
|
||||
wenn Sie ermittelbar sind; daher kann die Summe der Newsreader-Versionen
|
||||
kleiner sein als die Postingzahl fuer den Newsreader. Ausserdem koennen
|
||||
an einem Beitrag mehrere Clients beteiligt sein, bspw. der Newsreader
|
||||
und ein lokaler Server wie der Hamster. Daher kann die Summe aller
|
||||
Newsreader groesser sein als die Summer der Postings; auch ergeben die
|
||||
Prozentzahlen dementsprechend in der Summe mehr als 100%.
|
||||
CLIENTSOUT
|
||||
|
||||
##### ----- subroutines ----------------------------------------------
|
||||
|
||||
sub Percentage {
|
||||
|
@ -124,35 +152,76 @@ sub Divider {
|
|||
return ':' . $Symbol x ($MaxLength+TABLEWIDTH) . ":\n";
|
||||
}
|
||||
|
||||
sub SingleVersion {
|
||||
my ($LastName,$RSubValue,$RValue,$RMaxLength) = @_;
|
||||
|
||||
# get version to add to client name
|
||||
my ($Version) = keys %{$$RSubValue{$LastName}};
|
||||
$Version =~ s/^- //;
|
||||
# add version to client name by creating a new name
|
||||
# and deleting the old one
|
||||
my ($NameVersion) = $LastName . ' ' . $Version;
|
||||
$$RValue{$NameVersion} = $$RValue{$LastName};
|
||||
delete($$RValue{$LastName});
|
||||
$$RMaxLength = length($NameVersion) if length($NameVersion) > $$RMaxLength;
|
||||
|
||||
# delete single version
|
||||
delete($$RSubValue{$LastName});
|
||||
}
|
||||
|
||||
##### ----- main loop ------------------------------------------------
|
||||
|
||||
my (%Value, $SumName, $SumTotal, $MaxLength);
|
||||
$MaxLength = 0;
|
||||
my (%Value, %SubValue, $SubCounter, $LastName, $SumName, $SumTotal,
|
||||
$MaxLength);
|
||||
|
||||
if ($Type eq 'GroupStats') {
|
||||
$SumName = "$TLH.ALL";
|
||||
$TH{'name'} = 'Newsgroup'
|
||||
} elsif ($Type eq 'HostStats') {
|
||||
$SumName = 'ALL';
|
||||
$TH{'name'} = 'Server'
|
||||
$TH{'name'} = 'Postingserver'
|
||||
} elsif ($Type eq 'ClientStats') {
|
||||
$SumName = 'ALL';
|
||||
$TH{'name'} = 'Newsreader / Client'
|
||||
}
|
||||
|
||||
# read from STDIN
|
||||
### read from STDIN
|
||||
$MaxLength = 0;
|
||||
while(<>) {
|
||||
my ($Name, $Value) = split;
|
||||
my ($Name, $Value) = $_ =~ /(.+) (\d+)$/;
|
||||
$SumTotal = $Value if $Name eq $SumName;
|
||||
next if $Name =~ /ALL$/;
|
||||
$Value{$Name} = $Value;
|
||||
$MaxLength = length($Name) if length($Name) > $MaxLength;
|
||||
}
|
||||
|
||||
# print to STDOUT
|
||||
# handle client versions
|
||||
if ($Type eq 'ClientStats' and $Name =~ /^- /) {
|
||||
$SubValue{$LastName}{$Name} = $Value;
|
||||
$SubCounter++;
|
||||
} else {
|
||||
# clients with just one version
|
||||
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
|
||||
if ($LastName && $SubCounter == 1);
|
||||
|
||||
# reset version counter and client name
|
||||
$SubCounter = 0;
|
||||
$LastName = $Name;
|
||||
|
||||
$Value{$Name} = $Value;
|
||||
$MaxLength = length($Name) if length($Name) > $MaxLength;
|
||||
}
|
||||
}
|
||||
# clients with just one version (last iteration)
|
||||
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
|
||||
if ($LastName && $SubCounter == 1);
|
||||
|
||||
### print to STDOUT
|
||||
# calculate padding for $Heading
|
||||
my $PaddingLeft = ' ' x int((($MaxLength+TABLEWIDTH-2-length($Heading{$Type}))/2));
|
||||
my $PaddingRight = $PaddingLeft;
|
||||
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) < $MaxLength+TABLEWIDTH);
|
||||
my $Counter = 0;
|
||||
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) +2 < $MaxLength+TABLEWIDTH);
|
||||
|
||||
print $LeadIn{$Type};
|
||||
|
||||
# print table header
|
||||
print &Divider('=',$MaxLength);
|
||||
printf(": %s%s%s :\n",$PaddingLeft,$Heading{$Type},$PaddingRight);
|
||||
print &Divider('=',$MaxLength);
|
||||
|
@ -163,11 +232,26 @@ printf(": %-3s : %-6s : %-7s : %-*s :\n",
|
|||
$MaxLength,$TH{'name'});
|
||||
print &Divider('-',$MaxLength);
|
||||
|
||||
foreach my $Name (sort { $Value{$b} <=> $Value {$a}} keys %Value) {
|
||||
# print table
|
||||
my $Counter = 0;
|
||||
foreach my $Name (sort { $Value{$b} <=> $Value{$a} } keys %Value) {
|
||||
$Counter++;
|
||||
printf(": %3u. : %6u : %6.2f%% : %-*s :\n",$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),$MaxLength,$Name);
|
||||
printf(": %3u. : %6u : %6.2f%% : %-*s :\n",
|
||||
$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),
|
||||
$MaxLength,$Name);
|
||||
# handle client versions
|
||||
if ($SubValue{$Name}) {
|
||||
foreach my $SubName (sort { $SubValue{$Name}{$b} <=> $SubValue{$Name}{$a} }
|
||||
keys %{$SubValue{$Name}}) {
|
||||
printf(": : %6u : %6.2f%% : %-*s :\n",
|
||||
$SubValue{$Name}{$SubName},
|
||||
&Percentage($SumTotal,$SubValue{$Name}{$SubName}),
|
||||
$MaxLength,$SubName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# print table footer
|
||||
print &Divider('-',$MaxLength);
|
||||
printf(": : %6u : %s : %-*s :\n",$SumTotal,'100.00%',$MaxLength,'');
|
||||
print &Divider('=',$MaxLength);
|
||||
|
@ -184,7 +268,7 @@ postingstats - format and post reports
|
|||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<postingstats> B<-t> I<groups|hosts> [B<-Vh> [B<-m> I<YYYY-MM>]
|
||||
B<postingstats> B<-t> I<groups|hosts|clients> [B<-Vh> [B<-m> I<YYYY-MM>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
|
@ -193,8 +277,8 @@ See L<doc/README>.
|
|||
=head1 DESCRIPTION
|
||||
|
||||
This script will re-format reports on newsgroup usage created by
|
||||
B<groupstats.pl> or B<cliservstats.pl> and create a message that can
|
||||
be posted to Usenet.
|
||||
B<groupstats.pl>, B<hoststats.pl> or B<clientstats.pl> and create a
|
||||
message that can be posted to Usenet.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
|
@ -228,8 +312,8 @@ sum total.
|
|||
|
||||
=item C<%Heading>
|
||||
|
||||
Hash with keys for I<GroupStats> and I<HostStats>. Used to display a
|
||||
heading.
|
||||
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
|
||||
Used to display a heading.
|
||||
|
||||
=item C<%TH>
|
||||
|
||||
|
@ -242,14 +326,14 @@ Output will be truncated otherwise.
|
|||
|
||||
=item C<%LeadIn>
|
||||
|
||||
Hash with keys for I<GroupStats> and I<HostStats>. Used to create the
|
||||
headers for our posting. Can contain other text that will be shown
|
||||
before C<%Heading>.
|
||||
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
|
||||
Used to create the headers for our posting. Can contain other text
|
||||
that will be shown before C<%Heading>.
|
||||
|
||||
=item C<%LeadOut>
|
||||
|
||||
Hash with keys for I<GroupStats> and I<HostStats>. Will be shown at the
|
||||
end of our posting.
|
||||
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
|
||||
Will be shown at the end of our posting.
|
||||
|
||||
=back
|
||||
|
||||
|
@ -265,9 +349,10 @@ Print out version and copyright information and exit.
|
|||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-t>, B<--type> I<groups|hosts>
|
||||
=item B<-t>, B<--type> I<groups|hosts|clients>
|
||||
|
||||
Set report type to posting statistics or hosts statistics accordingly.
|
||||
Set report type to posting statistics, hosts statistics or client
|
||||
statistics accordingly.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM>
|
||||
|
||||
|
@ -291,7 +376,11 @@ Create a posting from a posting statistics report for 2012-01:
|
|||
|
||||
Create a posting from a host statistics report for last month:
|
||||
|
||||
cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
|
||||
hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts
|
||||
|
||||
Create a posting from a client statistics report for last month:
|
||||
|
||||
clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
|
||||
|
||||
=head1 FILES
|
||||
|
||||
|
@ -326,7 +415,7 @@ L<doc/README>
|
|||
|
||||
=item -
|
||||
|
||||
l>doc/INSTALL>
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
|
@ -334,7 +423,11 @@ groupstats -h
|
|||
|
||||
=item -
|
||||
|
||||
cliservstats -h
|
||||
hoststats -h
|
||||
|
||||
=item -
|
||||
|
||||
clientstats -h
|
||||
|
||||
=back
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# installation path is /srv/newsstats/, please adjust accordingly
|
||||
if [[ $1 =~ [0-9]{4}-[0-9]{2} ]]; then
|
||||
/srv/newsstats/bin/groupstats.pl --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
|
||||
/srv/newsstats/bin/cliservstats.pl -t server --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl -t server --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
|
||||
/srv/newsstats/bin/hoststats.pl --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl -t server --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
|
||||
else
|
||||
echo 'Input error, please use dopostingstats.sh YYYY-MM'
|
||||
fi
|
||||
|
|
|
@ -1,3 +1,14 @@
|
|||
NewsStats 0.4.0 (unreleased)
|
||||
* Reformat $Conf{TLH} for GroupStats only.
|
||||
* Extract TLH check from HostStats to subroutine, fix no-op check.
|
||||
* Extract getting raw headers from HostStats to subroutine.
|
||||
* Improve documentation for config file.
|
||||
* ParseHeader: re-merge continuation lines.
|
||||
* Add ClientStats to gatherstats.
|
||||
* Move cliservstats to hoststats.
|
||||
* Add clientstats (for clients).
|
||||
* Add ClientStats to postingstats.
|
||||
|
||||
NewsStats 0.3.0 (2025-05-18)
|
||||
* Extract GroupStats (in gatherstats) to subroutine.
|
||||
* Add ParseHeader() to library.
|
||||
|
|
|
@ -62,8 +62,9 @@ INSTALLATION INSTRUCTIONS
|
|||
|
||||
b) Optional configuration options
|
||||
|
||||
* TLH = de
|
||||
Limit examination to that top-level hierarchy.
|
||||
* TLH = de.alt,news.admin
|
||||
Limit examination to that top-level hierarchy/hierarchies.
|
||||
Comma-separated list.
|
||||
|
||||
3) Database (mysql) setup
|
||||
|
||||
|
|
|
@ -74,8 +74,8 @@ Getting Started
|
|||
Report generation is handled by specialised scripts for each
|
||||
report type. Currently reports on the number of postings per group
|
||||
and month and injection server and month are supported; you can
|
||||
use 'groupstats.pl' and 'cliservstats.pl' for that. See the
|
||||
groupstats.pl and cliservstats.pl man pages for more information.
|
||||
use 'groupstats.pl' and 'hoststats.pl' for that. See the
|
||||
groupstats.pl and hoststats.pl man pages for more information.
|
||||
|
||||
Reporting Bugs
|
||||
|
||||
|
|
|
@ -13,7 +13,9 @@ DBDatabase = newsstats
|
|||
DBTableRaw = raw_de
|
||||
DBTableGrps = groups_de
|
||||
DBTableHosts = hosts_de
|
||||
#DBTableClnts =
|
||||
DBTableClnts = clnts_de
|
||||
|
||||
### hierarchy configuration
|
||||
# comma-separated list of TLHs to parse
|
||||
# newsgroups not starting with one of those patterns are not counted
|
||||
TLH = de
|
||||
|
|
|
@ -49,7 +49,7 @@ require Exporter;
|
|||
Output => [qw(OutputData FormatOutput)],
|
||||
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
|
||||
SQLSetBounds SQLBuildClause GetMaxLength)]);
|
||||
$VERSION = '0.3.0';
|
||||
$VERSION = '0.4.0';
|
||||
|
||||
use Data::Dumper;
|
||||
use File::Basename;
|
||||
|
@ -280,7 +280,8 @@ sub ParseHeaders {
|
|||
} elsif (/^\s/) {
|
||||
# continuation lines
|
||||
if ($Label) {
|
||||
$Header{lc($Label)} .= "\n$_";
|
||||
s/^\s+/ /;
|
||||
$Header{lc($Label)} .= $_;
|
||||
} else {
|
||||
warn (sprintf("Non-header line: %s\n",$_));
|
||||
}
|
||||
|
@ -433,14 +434,13 @@ sub OutputData {
|
|||
### $LeadIn : print at start of output
|
||||
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
|
||||
### $DBQuery : database query handle with executed query,
|
||||
### containing $Month, $Key, $Value
|
||||
### containing $Month, $Key, $Value
|
||||
### $PadField : padding length for key field (optional) for 'pretty'
|
||||
### $PadValue : padding length for value field (optional) for 'pretty'
|
||||
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $LeadIn, $FileTempl,
|
||||
$DBQuery, $PadField, $PadValue) = @_;
|
||||
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
|
||||
my ($FileName, $Handle, $OUT);
|
||||
our $LastIteration;
|
||||
my ($LastIteration, $FileName, $Handle, $OUT);
|
||||
|
||||
# define output types
|
||||
my %LegalOutput;
|
||||
|
@ -480,7 +480,7 @@ sub OutputData {
|
|||
$Handle = $OUT;
|
||||
};
|
||||
print $Handle &FormatOutput($Format, $Comments, $LeadIn, $Caption,
|
||||
$Key, $Value, $Precision, $PadField, $PadValue);
|
||||
$Key, $Value, $Precision, $PadField, $PadValue, $LastIteration);
|
||||
$LastIteration = $Caption;
|
||||
};
|
||||
close $OUT if ($FileTempl);
|
||||
|
@ -500,24 +500,26 @@ sub FormatOutput {
|
|||
### $PadValue : padding length for value field (optional) for 'pretty'
|
||||
### OUT: $Output: formatted output
|
||||
my ($Format, $Comments, $LeadIn, $Caption, $Key, $Value, $Precision, $PadField,
|
||||
$PadValue) = @_;
|
||||
$PadValue, $LastIteration) = @_;
|
||||
my ($Output);
|
||||
# keep last caption in mind
|
||||
our ($LastIteration);
|
||||
# create one line of output
|
||||
if ($Format eq 'dump') {
|
||||
# output as dump (key value)
|
||||
$Output = sprintf ("# %s:\n",$Caption)
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
if ($Caption and $Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
$Output .= sprintf ("%s %u\n",$Key,$Value);
|
||||
} elsif ($Format eq 'list') {
|
||||
# output as list (caption key value)
|
||||
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
||||
if ($Caption) {
|
||||
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
||||
} else {
|
||||
$Output = sprintf ("%s %u\n",$Key,$Value);
|
||||
}
|
||||
} elsif ($Format eq 'pretty') {
|
||||
# output as a table
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) {
|
||||
$Output = $LeadIn;
|
||||
$Output .= sprintf ("# ----- %s:\n",$Caption);
|
||||
$Output .= sprintf ("# ----- %s:\n",$Caption) if $Caption;
|
||||
}
|
||||
# increase $PadValue for numbers with decimal point
|
||||
$PadValue += $Precision+1 if $Precision;
|
||||
|
@ -582,7 +584,7 @@ sub SQLSortOrder {
|
|||
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
|
||||
### $OrderBy: secondary sort by month/newsgroups (default)
|
||||
### or number of 'postings'
|
||||
### $Type : newsgroup, host, client
|
||||
### $Type : newsgroup, host or client+version
|
||||
### OUT: a SQL ORDER BY clause
|
||||
my ($GroupBy,$OrderBy,$Type) = @_;
|
||||
my ($GroupSort,$OrderSort) = ('','');
|
||||
|
@ -640,9 +642,11 @@ sub SQLGroupList {
|
|||
### OUT: SQL code to become part of a 'WHERE' clause,
|
||||
### list of names for SQL bindings
|
||||
my ($Names,$Type) = @_;
|
||||
my $InvalidCharRegExp;
|
||||
# substitute '*' wildcard with SQL wildcard character '%'
|
||||
$Names =~ s/\*/%/g;
|
||||
return (undef,undef) if !CheckValidNames($Names);
|
||||
$InvalidCharRegExp = ',;' if $Type eq 'client';
|
||||
return (undef,undef) if !CheckValidNames($Names,$InvalidCharRegExp);
|
||||
# just one name/newsgroup?
|
||||
return (SQLGroupWildcard($Names,$Type),$Names) if $Names !~ /:/;
|
||||
my ($SQL,@WildcardNames,@NoWildcardNames);
|
||||
|
@ -805,10 +809,11 @@ sub SQLBuildClause {
|
|||
sub CheckValidNames {
|
||||
################################################################################
|
||||
### syntax check of a list
|
||||
### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
|
||||
### IN : $Names : list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
|
||||
### InvalidCharRegExp: regular expression for invalid characters
|
||||
### OUT: boolean
|
||||
my ($Names) = @_;
|
||||
my $InvalidCharRegExp = ',; ';
|
||||
my ($Names,$InvalidCharRegExp) = @_;
|
||||
$InvalidCharRegExp = ',; ' if (!$InvalidCharRegExp);
|
||||
return ($Names =~ /[$InvalidCharRegExp]/) ? 0 : 1;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue