Compare commits
85 commits
Author | SHA1 | Date | |
---|---|---|---|
|
20434ab1dc | ||
|
e176b0d5e8 | ||
|
66890b68d8 | ||
|
0b87e81b08 | ||
|
1a5b9dbcb1 | ||
|
cff76a3c65 | ||
|
18db200aea | ||
|
8afeb09cc2 | ||
|
d02ae5e2ff | ||
|
07e4543717 | ||
|
0102b72971 | ||
|
ed3fb3cda0 | ||
|
462f28505d | ||
|
06bcdfb2be | ||
|
39e845d552 | ||
|
09a9112679 | ||
|
66a175c7f8 | ||
|
963f07432c | ||
|
a553b374ce | ||
|
3e73346b20 | ||
|
eea296391c | ||
|
d194ef754f | ||
|
c985e29b7e | ||
|
f78d4c2158 | ||
|
995173456b | ||
|
3447cdabff | ||
|
671ae67be0 | ||
|
6122d1a49d | ||
|
988e7b2f13 | ||
|
5a6a3e58bf | ||
|
8c9d450d47 | ||
|
9b6bf3e194 | ||
|
4ad63fcb4e | ||
|
6afa9a62b9 | ||
|
7169e2636f | ||
|
0ee389fc42 | ||
|
28157570f1 | ||
|
7dd8a95be3 | ||
|
29e9784048 | ||
|
83d4da5e30 | ||
|
53c2032850 | ||
|
c6346470f9 | ||
|
ea493f3da0 | ||
|
e40e96a1e2 | ||
|
f7485561dd | ||
|
f6b7a1d000 | ||
|
c7206a2eaf | ||
|
799eddab5b | ||
|
c1e6b0161e | ||
|
f5aa649810 | ||
|
93b8d564ba | ||
|
c6432dcd44 | ||
|
57af475b80 | ||
|
867498fdc8 | ||
|
713db80545 | ||
|
9ccb915d77 | ||
|
73a2d70f16 | ||
|
0a0e615ede | ||
|
a31e86444a | ||
|
4cdb771866 | ||
|
bcd668780e | ||
|
1d3c8f9529 | ||
|
992d678460 | ||
|
ea59ab945e | ||
|
09b45fc369 | ||
|
ee29be18c8 | ||
|
cd6f153a9e | ||
|
b5ef572664 | ||
|
91c674c4fe | ||
|
fd0717a15c | ||
|
b3b170c357 | ||
|
44c197097b | ||
|
e39d4207a6 | ||
|
24d2011f32 | ||
|
2871792120 | ||
|
22d3d70a72 | ||
|
599fefbf6a | ||
|
7624accb6e | ||
|
8dc6823e98 | ||
|
17ef44085f | ||
|
ea91003a99 | ||
|
1af57a5390 | ||
|
23ab67a099 | ||
|
dfc2b81c37 | ||
|
2ad99c20bc |
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,3 +1,3 @@
|
|||
tmp/
|
||||
tmp/*
|
||||
newsstats.conf
|
||||
etc/newsstats.conf
|
||||
|
|
17
README.md
Normal file
17
README.md
Normal file
|
@ -0,0 +1,17 @@
|
|||
# NewsStats
|
||||
|
||||
**NewsStats** is a software package to extract live data from an INN newsfeed and generate statistics from it.
|
||||
|
||||
## Description
|
||||
|
||||
**NewsStats** stores overview data and complete headers of all incoming postings (in one or more specific Usenet hierarchies) in real time in a MySQL database. This raw dataset can then be analysed regularly, e.g. monthly, for instance in terms of postings per group and month. The analysis results will also be stored in databases which in turn can be used to generate various reports (postings per group, injection server or posting agent, per month).
|
||||
|
||||
This software is currently used to generate the monthly statistics posted to `de.admin.news.lists` for the de.\* hierarchy.
|
||||
|
||||
## More information
|
||||
|
||||
Please see the [distribution page](https://th-h.de/net/software/newsstats/) (in German).
|
||||
|
||||
* General overview and examples: [README](doc/README)
|
||||
* Installation instructions: [INSTALL](doc/INSTALL)
|
||||
* Changelog: [ChangeLog](doc/ChangeLog)
|
634
bin/clientstats.pl
Executable file
634
bin/clientstats.pl
Executable file
|
@ -0,0 +1,634 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# clientstats.pl
|
||||
#
|
||||
# This script will get statistical data on newsreader (client) usage
|
||||
# from a database.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
|
||||
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
|
||||
$OptReportType,$OptSums,$UppBound,$OptVersions,$OptConfFile);
|
||||
GetOptions ('c|captions!' => \$OptCaptions,
|
||||
'comments!' => \$OptComments,
|
||||
'db=s' => \$OptDB,
|
||||
'filetemplate=s' => \$OptFileTemplate,
|
||||
'f|format=s' => \$OptFormat,
|
||||
'g|group-by=s' => \$OptGroupBy,
|
||||
'l|lower=i' => \$LowBound,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'n|names=s' => \$OptNames,
|
||||
'o|order-by=s' => \$OptOrderBy,
|
||||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'v|versions!' => \$OptVersions,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
# parse parameters
|
||||
# $OptComments defaults to TRUE if --filetemplate is not used
|
||||
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
|
||||
# parse $OptReportType
|
||||
if ($OptReportType) {
|
||||
if ($OptReportType =~ /sums?/i) {
|
||||
$OptReportType = 'sum';
|
||||
} else {
|
||||
$OptReportType = 'default';
|
||||
}
|
||||
}
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### set DBTable
|
||||
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
|
||||
$Conf{'DBTable'} = $OptDB if $OptDB;
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get time period and names, prepare SQL 'WHERE' clause
|
||||
# get time period
|
||||
# and set caption for output and expression for SQL 'WHERE' clause
|
||||
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
||||
# bail out if --month is invalid
|
||||
&Bleat(2,"--month option has an invalid format - ".
|
||||
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
|
||||
# get list of clients and set expression for SQL 'WHERE' clause
|
||||
# with placeholders as well as a list of names to bind to them
|
||||
my ($SQLWhereNames,@SQLBindNames);
|
||||
if ($OptNames) {
|
||||
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'client');
|
||||
# bail out if --names is invalid
|
||||
&Bleat(2,"--names option has an invalid format!")
|
||||
if !$SQLWhereNames;
|
||||
}
|
||||
|
||||
### build SQL WHERE clause
|
||||
my $ExcludeSums = $OptSums ? '' : "client != 'ALL'";
|
||||
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
|
||||
$ExcludeSums,"version = 'ALL'",
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
|
||||
### get sort order and build SQL 'ORDER BY' clause
|
||||
# force to 'month' for $OptReportType 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# default to 'name' if $OptGroupBy is not set and
|
||||
# just one name is requested, but more than one month
|
||||
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
|
||||
and $OptNames and $OptNames !~ /[:*%]/);
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'client, version');
|
||||
# $GroupBy will contain 'month' or 'client, version' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
||||
### get report type and build SQL 'SELECT' query
|
||||
my $SQLSelect;
|
||||
my $SQLGroupClause = '';
|
||||
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$SQLGroupClause = "GROUP BY client, version";
|
||||
# change $SQLOrderClause: replace everything before 'postings'
|
||||
$SQLOrderClause =~ s/BY.+postings/BY postings/;
|
||||
$SQLSelect = "'All months',LEFT(client,40),SUM(postings)";
|
||||
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
|
||||
$SQLOrderClause =~ s/postings/SUM(postings)/;
|
||||
} else {
|
||||
$SQLSelect = "month,LEFT(client,40),postings";
|
||||
};
|
||||
|
||||
### get length of longest name delivered by query
|
||||
### for formatting purposes
|
||||
my $Field = ($GroupBy eq 'month') ? 'LEFT(client,40)' : 'month';
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
|
||||
$Field,'postings',$SQLWhereClause,
|
||||
'',@SQLBindNames);
|
||||
|
||||
### build and execute SQL query
|
||||
my ($DBQuery);
|
||||
# prepare query
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
|
||||
$SQLSelect,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$SQLWhereClause,$SQLGroupClause,
|
||||
$SQLOrderClause));
|
||||
# execute query
|
||||
$DBQuery->execute(@SQLBindNames)
|
||||
or &Bleat(2,sprintf("Can't get client data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$DBI::errstr));
|
||||
|
||||
### output results
|
||||
# set default to 'pretty'
|
||||
$OptFormat = 'pretty' if !$OptFormat;
|
||||
# print captions if --caption is set
|
||||
my $LeadIn;
|
||||
if ($OptCaptions && $OptComments) {
|
||||
# print time period with report type
|
||||
my $CaptionReportType = '(number of postings for each month)';
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$CaptionReportType = '(number of all postings for that time period)';
|
||||
}
|
||||
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
# print name list if --names is set
|
||||
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
|
||||
if $OptNames;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only months fulfilling this condition)';
|
||||
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
if ($LowBound or $UppBound);
|
||||
# print primary and secondary sort order
|
||||
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
($GroupBy eq 'month') ? 'Months' : 'Names',
|
||||
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
|
||||
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
|
||||
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
|
||||
}
|
||||
|
||||
# output data
|
||||
# (changed code copy from NewsStats::OutputData)
|
||||
my ($LastIteration, $FileName, $Handle, $OUT);
|
||||
|
||||
# define output types
|
||||
my %LegalOutput;
|
||||
@LegalOutput{('dump','list','pretty')} = ();
|
||||
# bail out if format is unknown
|
||||
&Bleat(2,"Unknown output type '$OptFormat'!") if !exists($LegalOutput{$OptFormat});
|
||||
|
||||
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
|
||||
# save client for later use
|
||||
my $Client = $Key;
|
||||
# care for correct sorting order and abstract from month and keys:
|
||||
# $Caption will be $Month or $Key, according to sorting order,
|
||||
# and $Key will be $Key or $Month, respectively
|
||||
my $Caption;
|
||||
if ($GroupBy eq 'key') {
|
||||
$Caption = $Key;
|
||||
$Key = $Month;
|
||||
} else {
|
||||
$Caption = $Month;
|
||||
}
|
||||
# set output file handle
|
||||
if (!$OptFileTemplate) {
|
||||
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
|
||||
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
|
||||
close $OUT if ($LastIteration);
|
||||
# safeguards for filename creation:
|
||||
# replace potential problem characters with '_'
|
||||
$FileName = sprintf('%s-%s',$OptFileTemplate,$Caption);
|
||||
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
||||
open ($OUT,">$FileName")
|
||||
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
|
||||
$FileName));
|
||||
$Handle = $OUT;
|
||||
};
|
||||
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, $Caption,
|
||||
$Key, $Value, 0, $MaxLength, $MaxValLength, $LastIteration);
|
||||
# output client versions
|
||||
if ($OptVersions) {
|
||||
### get client versions
|
||||
# $SQLWhereClause without 'ALL' version, with client and month set
|
||||
$SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
|
||||
$ExcludeSums,"version != 'ALL'",
|
||||
'client = ?','month = ?',
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
# push client and month to @SQLVersBindNames
|
||||
my @SQLVersBindNames = @SQLBindNames;
|
||||
push (@SQLVersBindNames, ($Client, $Month));
|
||||
|
||||
# save length of longest client
|
||||
my $ClientMaxLenght = $MaxLength;
|
||||
my $ClientMaxValLenght = $MaxValLength;
|
||||
# get length of longest version delivered by query
|
||||
# for formatting purposes
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
|
||||
'version','postings',$SQLWhereClause,
|
||||
'',@SQLVersBindNames);
|
||||
if ($MaxLength) {
|
||||
# add lenght of '- '
|
||||
$MaxLength += 2;
|
||||
# set to length of longest client, if longer
|
||||
$MaxLength = $ClientMaxLenght if $ClientMaxLenght > $MaxLength;
|
||||
$MaxValLength = $ClientMaxValLenght if $ClientMaxValLenght > $MaxValLength;
|
||||
}
|
||||
|
||||
# prepare query
|
||||
my $DBVersQuery = $DBHandle->prepare(sprintf('SELECT version,postings FROM %s.%s %s %s %s',
|
||||
$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$SQLWhereClause,$SQLGroupClause,
|
||||
$SQLOrderClause));
|
||||
# execute query
|
||||
$DBVersQuery->execute(@SQLVersBindNames)
|
||||
or &Bleat(2,sprintf("Can't get version data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$DBI::errstr));
|
||||
# output versions
|
||||
while (my ($Version, $Postings) = $DBVersQuery->fetchrow_array) {
|
||||
$Version = '- ' . $Version;
|
||||
print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, '',
|
||||
$Version, $Postings, 0, $MaxLength, $MaxValLength,
|
||||
'');
|
||||
}
|
||||
}
|
||||
$LastIteration = $Caption;
|
||||
};
|
||||
close $OUT if ($OptFileTemplate);
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
clientstats - create reports on client usage
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<clientstats> [B<-Vhcsv> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings
|
||||
using each client per month) taken from result tables created by
|
||||
B<gatherstats.pl>.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
=head3 Time period and names
|
||||
|
||||
The time period to act on defaults to last month; you can assign another
|
||||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
|
||||
B<clientstats> will process all clients by default; you can limit
|
||||
processing to only some clients by supplying a list of those names by
|
||||
using the B<--names> option (see below).
|
||||
|
||||
=head3 Report type
|
||||
|
||||
You can choose between different B<--report> types: postings per month
|
||||
or all postings summed up; for details, see below.
|
||||
|
||||
=head3 Upper and lower boundaries
|
||||
|
||||
Furthermore you can set an upper and/or lower boundary to exclude some
|
||||
results from output via the B<--lower> and B<--upper> options,
|
||||
respectively. By default, all clients with more and/or less postings
|
||||
per month will be excluded from the result set (i.e. not shown and
|
||||
not considered for sum reports).
|
||||
|
||||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
clients instead via the B<--group-by> option. Within those groups,
|
||||
the list of clients (or months) is sorted alphabetically
|
||||
(or chronologically, respectively) ascending. You can change that order
|
||||
(and sort by number of postings) with the B<--order-by> option. For
|
||||
details and exceptions, please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of names and number of
|
||||
postings with the B<--format> option. Captions will be added by means
|
||||
of the B<--caption> option; all comments (and captions) can be
|
||||
supressed by using B<--nocomments>.
|
||||
|
||||
Last but not least you can redirect all output to a number of files,
|
||||
e.g. one for each month, by submitting the B<--filetemplate> option,
|
||||
see below.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<clientstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in etc/ via Config::Auto or from a configuration
|
||||
file submitted by the B<--conffile> option.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override some configuration options via the B<--db> option.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). By using the keyword I<all> instead, you can set no
|
||||
processing period to process the whole database. Defaults to last month.
|
||||
|
||||
=item B<-n>, B<--names> I<name(s)>
|
||||
|
||||
Limit processing to a certain set of client names. I<names(s)>
|
||||
can be a single name (Thunderbird), a group of names (Ice*) or a list
|
||||
of either of these, separated by colons, for example
|
||||
|
||||
Forte Agent:Thunderbird:Ice*
|
||||
|
||||
Spaces or special characters like "*" need to be quoted from the shell,
|
||||
like
|
||||
|
||||
-n 'Forte Agent:Thunderbird:Ice*'
|
||||
|
||||
There is no way to limit processing to a specific version, but you can
|
||||
alway grep through the output.
|
||||
|
||||
=item B<-s>, B<--sums|--nosums> (sum per month)
|
||||
|
||||
Include "virtual" clients named "ALL" for every month in output,
|
||||
containing the sum of all detected clients for that month. False
|
||||
by default.
|
||||
|
||||
=item B<-v>, B<--versions|--noversions> (client versions)
|
||||
|
||||
Include a list of all observed versions of each client in output.
|
||||
Version information will be displayed with indents ('-') below each
|
||||
client, sorted in the same way (by postings or alphanumeric). False
|
||||
by default.
|
||||
|
||||
=item B<-r>, B<--report> I<default|sums>
|
||||
|
||||
Choose the report type: I<default> or I<sums>
|
||||
|
||||
By default, B<clientstats> will report the number of postings for each
|
||||
client in each month. But it can also report the total sum of postings
|
||||
per client for all months. Sums of B<--versions> can be included.
|
||||
|
||||
For report type I<sums>, the B<group-by> option has no meaning and
|
||||
will be silently ignored (see below).
|
||||
|
||||
=item B<-l>, B<--lower> I<lower boundary>
|
||||
|
||||
Set the lower boundary. See below.
|
||||
|
||||
=item B<-l>, B<--upper> I<upper boundary>
|
||||
|
||||
Set the upper boundary.
|
||||
|
||||
By default, all clients with more postings per month than the
|
||||
upper boundary and/or less postings per month than the lower boundary
|
||||
will be excluded from further processing. For the default report that
|
||||
means each month only clients with a number of postings between the
|
||||
boundaries will be displayed. For the sums report, clients with a
|
||||
number of postings exceeding the boundaries in all (!) months will
|
||||
not be considered.
|
||||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
|
||||
|
||||
By default, all results are grouped by month, sorted chronologically in
|
||||
ascending order, like this:
|
||||
|
||||
# ----- 2012-01:
|
||||
40tude_Dialog: 5873
|
||||
Forte Agent : 7735
|
||||
Thunderbird : 20925
|
||||
# ----- 2012-02:
|
||||
40tude_Dialog: 4142
|
||||
Forte Agent : 5895
|
||||
Thunderbird : 19091
|
||||
|
||||
The results can be grouped by client instead via
|
||||
B<--group-by> I<name>:
|
||||
|
||||
# ----- 40tude_Dialog:
|
||||
2012-01: 5873
|
||||
2012-02: 4142
|
||||
# ----- Forte Agent:
|
||||
2012-01: 7735
|
||||
2012-02: 5895
|
||||
# ----- Thunderbird:
|
||||
2012-01: 20925
|
||||
2012-02: 19091
|
||||
|
||||
By appending I<-desc> to the group-by option parameter, you can reverse
|
||||
the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
|
||||
# ----- 2012-02:
|
||||
40tude_Dialog: 4142
|
||||
Forte Agent : 5895
|
||||
Thunderbird : 19091
|
||||
# ----- 2012-01:
|
||||
40tude_Dialog: 5873
|
||||
Forte Agent : 7735
|
||||
Thunderbird : 20925
|
||||
|
||||
Sums reports (see above) will always be grouped by months; this option
|
||||
will therefore be ignored.
|
||||
|
||||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single client, see above),
|
||||
the report will be sorted by name (or month) in ascending alphabetical
|
||||
order by default. You can change the sort order to descending or sort
|
||||
by number of postings instead.
|
||||
|
||||
By default, output is sorted alphabetically:
|
||||
|
||||
# ----- 2012-01:
|
||||
40tude_Dialog: 5873
|
||||
Forte Agent : 7735
|
||||
Thunderbird : 20925
|
||||
|
||||
Using B<--order-by> I<postings[-desc]>, it will be sorted from most
|
||||
to least postings:
|
||||
|
||||
# ----- 2012-01:
|
||||
Thunderbird : 20925
|
||||
Forte Agent : 7735
|
||||
40tude_Dialog: 5873
|
||||
|
||||
=item B<-f>, B<--format> I<pretty|list|dump>
|
||||
|
||||
Select the output format, I<pretty> (a kind of table) being the default:
|
||||
|
||||
# ----- 2012-01:
|
||||
40tude_Dialog: 5873
|
||||
Forte Agent : 7735
|
||||
# ----- 2012-02:
|
||||
40tude_Dialog: 4142
|
||||
Forte Agent : 5895
|
||||
|
||||
I<list> format looks like this (each client preceded by month):
|
||||
|
||||
2012-01 40tude_Dialog 5873
|
||||
2012-01 Forte Agent 7735
|
||||
2012-02 40tude_Dialog 4142
|
||||
2012-02 Forte Agent 5895
|
||||
|
||||
And I<dump> format looks like this:
|
||||
|
||||
# 2012-01:
|
||||
40tude_Dialog 5873
|
||||
Forte Agent 7735
|
||||
# 2012-02:
|
||||
40tude_Dialog 4142
|
||||
Forte Agent 5895
|
||||
|
||||
You can remove the comments (lines after '#') by using B<--nocomments>,
|
||||
see below.
|
||||
|
||||
=item B<-c>, B<--captions|--nocaptions>
|
||||
|
||||
Add captions to output, like this:
|
||||
|
||||
----- Report for 2012-01 to 2012-02 (number of postings for each month)
|
||||
----- Names: Thunderbird
|
||||
----- Threshold: 8000 => x (counting only month fulfilling this condition)
|
||||
----- Grouped by Month (ascending), sorted by number of postings descending
|
||||
|
||||
False by default.
|
||||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by
|
||||
default as long as B<--filetemplate> is not set.
|
||||
|
||||
Use I<--nocomments> to suppress anything except client names or months
|
||||
and numbers of postings.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<clientstats>
|
||||
will create one file for each month (or each client, according to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or client names) to the I<filename template>, for
|
||||
example with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
... and so on
|
||||
|
||||
=item B<--db> I<database table>
|
||||
|
||||
Override I<DBTableClnts> or I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Read configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Show number of postings per client for lasth month in I<pretty> format:
|
||||
|
||||
clientstats
|
||||
|
||||
Show that report for January of 2010 and Thunderbird plus Ice*:
|
||||
|
||||
clientstats --month 2010-01 --names 'Thunderbird:Ice*'
|
||||
|
||||
Only show clients with at least 30 postings last month and the versions
|
||||
of those clients, ordered each by number of postings, descending,
|
||||
in I<pretty> format:
|
||||
|
||||
clientstats --lower 30 --versions --order-by postings-desc
|
||||
|
||||
List number of postings per client for each month of 2010 and redirect
|
||||
output to one file for each month, named hosts-2010-01 and so on, in
|
||||
machine-readable form (without formatting):
|
||||
|
||||
clientstats -m 2010-01:2010-12 -f dump --filetemplate hosts
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/clientstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
gatherstats -h
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
|
@ -1,29 +1,26 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# install.pl
|
||||
# dbcreate.pl
|
||||
#
|
||||
# This script will create database tables as necessary.
|
||||
#
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
# we're in .../install, so our module is in ..
|
||||
push(@INC, dirname($0).'/..');
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT);
|
||||
|
||||
use Cwd;
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
@ -31,18 +28,15 @@ Getopt::Long::config ('bundling');
|
|||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptUpdate);
|
||||
my ($OptUpdate,$OptConfFile);
|
||||
GetOptions ('u|update=s' => \$OptUpdate,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
|
||||
### change working directory to .. (as we're in .../install)
|
||||
chdir dirname($FullPath).'/..';
|
||||
my $Path = cwd();
|
||||
|
||||
### read configuration
|
||||
print("Reading configuration.\n");
|
||||
my %Conf = %{ReadConfig($Path.'/newsstats.conf')};
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
##### --------------------------------------------------------------------------
|
||||
##### Database table definitions
|
||||
|
@ -52,10 +46,10 @@ my $DBCreate = <<SQLDB;
|
|||
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
|
||||
SQLDB
|
||||
|
||||
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);
|
||||
--
|
||||
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS, 'DBTableClnts' => <<CLIENTS);
|
||||
--
|
||||
-- Table structure for table DBTableRaw
|
||||
--
|
||||
--
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (
|
||||
`id` bigint(20) unsigned NOT NULL auto_increment,
|
||||
|
@ -76,9 +70,9 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (
|
|||
KEY `peer` (`peer`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data';
|
||||
RAW
|
||||
--
|
||||
--
|
||||
-- Table structure for table DBTableGrps
|
||||
--
|
||||
--
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (
|
||||
`id` bigint(20) unsigned NOT NULL auto_increment,
|
||||
|
@ -88,10 +82,43 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (
|
|||
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `month_newsgroup` (`month`,`newsgroup`),
|
||||
KEY `newsgroup` (`newsgroup`),
|
||||
KEY `postings` (`postings`)
|
||||
KEY `month` (`month`),
|
||||
KEY `newsgroup` (`newsgroup`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per newsgroup';
|
||||
GRPS
|
||||
--
|
||||
-- Table structure for table DBTableHosts
|
||||
--
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableHosts'}` (
|
||||
`id` bigint(20) unsigned NOT NULL auto_increment,
|
||||
`month` varchar(7) character set ascii NOT NULL,
|
||||
`host` varchar(100) NOT NULL,
|
||||
`postings` int(11) NOT NULL,
|
||||
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `month_host` (`month`,`host`),
|
||||
KEY `month` (`month`),
|
||||
KEY `host` (`host`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per server';
|
||||
HOSTS
|
||||
--
|
||||
-- Table structure for table DBTableClnts
|
||||
--
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableClnts'}` (
|
||||
`id` bigint(20) unsigned NOT NULL auto_increment,
|
||||
`month` varchar(7) character set ascii NOT NULL,
|
||||
`client` varchar(150) NOT NULL,
|
||||
`version` varchar(50) NOT NULL,
|
||||
`postings` int(11) NOT NULL,
|
||||
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `month_client_version` (`month`,`client`,`version`),
|
||||
KEY `month` (`month`),
|
||||
KEY `client` (`client`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='Postings per client';
|
||||
CLIENTS
|
||||
|
||||
##### --------------------------------------------------------------------------
|
||||
##### Installation / upgrade instructions
|
||||
|
@ -108,7 +135,7 @@ Things left to do:
|
|||
## gather statistics for NewsStats
|
||||
newsstats!\\
|
||||
:!*,de.*\\
|
||||
:Tc,WmtfbsPNH,Ac:$Path/feedlog.pl
|
||||
:Tc,WmtfbsPNH,Ac:$HomePath/bin/feedlog.pl
|
||||
|
||||
Please
|
||||
|
||||
|
@ -136,14 +163,14 @@ Things left to do:
|
|||
|
||||
Enjoy!
|
||||
|
||||
-thh <thh\@inter.net>
|
||||
-thh <thh\@thh.name>
|
||||
INSTALL
|
||||
|
||||
my $Upgrade ='';
|
||||
if ($OptUpdate) {
|
||||
$Upgrade = <<UPGRADE;
|
||||
$Upgrade = <<UPGRADE;
|
||||
----------
|
||||
Your installation was upgraded from $OptUpdate to $PackageVersion.
|
||||
Your installation was upgraded from $OptUpdate to $VERSION.
|
||||
|
||||
Don't forget to restart your INN feed so that it can pick up the new version:
|
||||
|
||||
|
@ -159,16 +186,16 @@ UPGRADE
|
|||
if (!$OptUpdate) {
|
||||
print "----------\nStarting database creation.\n";
|
||||
# create database
|
||||
# we can't use InitDB() as that will use a table name of
|
||||
# the table that doesn't exist yet ...
|
||||
# we can't use InitDB() as that will use the database name of
|
||||
# the database that doesn't exist yet ...
|
||||
my $DBHandle = DBI->connect(sprintf('DBI:%s:host=%s',$Conf{'DBDriver'},
|
||||
$Conf{'DBHost'}), $Conf{'DBUser'},
|
||||
$Conf{'DBPw'}, { PrintError => 0 });
|
||||
my $DBQuery = $DBHandle->prepare($DBCreate);
|
||||
$DBQuery->execute() or &Bleat(2, sprintf("Can't create database %s: %s%\n",
|
||||
$Conf{'DBDatabase'}, $DBI::errstr));
|
||||
|
||||
printf("Database table %s created succesfully.\n",$Conf{'DBDatabase'});
|
||||
|
||||
printf("Database %s created succesfully.\n",$Conf{'DBDatabase'});
|
||||
$DBHandle->disconnect;
|
||||
};
|
||||
|
||||
|
@ -185,22 +212,13 @@ if (!$OptUpdate) {
|
|||
&CreateTable($Table);
|
||||
};
|
||||
print "Database table generation done.\n";
|
||||
|
||||
|
||||
# Display install instructions
|
||||
print $Install;
|
||||
} else {
|
||||
##### upgrade mode
|
||||
print "----------\nStarting upgrade process.\n";
|
||||
$PackageVersion = '0.03';
|
||||
if ($OptUpdate < $PackageVersion) {
|
||||
if ($OptUpdate < 0.02) {
|
||||
# 0.01 -> 0.02
|
||||
# &DoMySQL('...;');
|
||||
# print "v0.02: Database upgrades ...\n";
|
||||
# &PrintInstructions('0.02',<<" INSTRUCTIONS");
|
||||
# INSTRUCTIONS
|
||||
};
|
||||
};
|
||||
# TBD
|
||||
# Display general upgrade instructions
|
||||
print $Upgrade;
|
||||
};
|
||||
|
@ -221,7 +239,7 @@ sub CreateTable {
|
|||
};
|
||||
my $DBQuery = $DBHandle->prepare($DBCreate{$Table});
|
||||
$DBQuery->execute() or
|
||||
&Bleat(2, sprintf("Can't create table %s in database %s: %s%\n",$Table,
|
||||
&Bleat(2, sprintf("Can't create table %s in database %s: %s\n",$Table,
|
||||
$Conf{'DBDatabase'},$DBI::errstr));
|
||||
printf("Database table %s.%s created succesfully.\n",
|
||||
$Conf{'DBDatabase'},$Conf{$Table});
|
||||
|
@ -251,11 +269,11 @@ __END__
|
|||
|
||||
=head1 NAME
|
||||
|
||||
install - installation script
|
||||
dbcreate - database creation script
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<install> [B<-Vh> [--update I<version>]
|
||||
B<dbcreate> [B<-Vh> [--update I<version>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
|
@ -263,12 +281,14 @@ See L<doc/README>.
|
|||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will create database tables as necessary and configured.
|
||||
This script will create a database and database tables as necessary
|
||||
and configured.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<install> will read its configuration from F<newsstats.conf> via
|
||||
Config::Auto.
|
||||
B<dbcreate> will read its configuration from F<newsstats.conf> which should
|
||||
be present in etc/ via Config::Auto or from a configuration file submitted
|
||||
by the B<--conffile> option.
|
||||
|
||||
See L<doc/INSTALL> for an overview of possible configuration options.
|
||||
|
||||
|
@ -278,31 +298,35 @@ See L<doc/INSTALL> for an overview of possible configuration options.
|
|||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-u>, B<--update> I<version>
|
||||
|
||||
Don't do a fresh install, but update from I<version>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Read configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<install.pl>
|
||||
=item F<bin/dbcreate.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
|
@ -311,7 +335,7 @@ Runtime configuration file.
|
|||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
|
@ -331,11 +355,11 @@ This script is part of the B<NewsStats> package.
|
|||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
|
@ -4,18 +4,18 @@
|
|||
#
|
||||
# This script will log headers and other data to a database
|
||||
# for further analysis by parsing a feed from INN.
|
||||
#
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
push(@INC, dirname($0));
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
@ -68,18 +68,19 @@ sub PrepareDB {
|
|||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptDebug,$OptQuiet);
|
||||
my ($OptDebug,$OptQuiet,$OptConfFile);
|
||||
GetOptions ('d|debug!' => \$OptDebug,
|
||||
'q|test!' => \$OptQuiet,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### init syslog
|
||||
openlog($0, 'nofatal,pid', LOG_NEWS);
|
||||
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
|
||||
syslog(LOG_NOTICE, "$0 $VERSION starting up.") if !$OptQuiet;
|
||||
|
||||
### init database
|
||||
my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
|
||||
|
@ -129,7 +130,7 @@ while (<>) {
|
|||
};
|
||||
};
|
||||
$DBQuery->finish;
|
||||
|
||||
|
||||
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
|
||||
"Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
|
||||
$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
|
||||
|
@ -151,7 +152,7 @@ feedlog - log data from an INN feed to a database
|
|||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<feedlog> [B<-Vhdq>]
|
||||
B<feedlog> [B<-Vhdq>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
|
@ -166,13 +167,15 @@ time.
|
|||
|
||||
All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
|
||||
fails to initiate a database connection at startup, it will log to
|
||||
I<syslog> with I<CRIT> priority and go in an endless loop, as
|
||||
terminating would only result in a rapid respawn.
|
||||
I<syslog> with I<CRIT> priority and go in an endless loop, trying again
|
||||
to connect every 5 seconds, as terminating would only result in a rapid
|
||||
respawn.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<feedlog> will read its configuration from F<newsstats.conf> which
|
||||
should be present in the same directory via Config::Auto.
|
||||
should be present in etc/ via Config::Auto or from a configuration file
|
||||
submitted by the B<--conffile> option.
|
||||
|
||||
See L<doc/INSTALL> for an overview of possible configuration options.
|
||||
|
||||
|
@ -182,21 +185,25 @@ See L<doc/INSTALL> for an overview of possible configuration options.
|
|||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-d>, B<--debug>
|
||||
|
||||
Output debugging information to STDERR while parsing STDIN. You'll
|
||||
Print debugging information to STDERR while parsing STDIN. You'll
|
||||
find that information most probably in your B<INN> F<errlog> file.
|
||||
|
||||
=item B<-q>, B<--quiet>
|
||||
|
||||
Suppress logging to syslog.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Read configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
@ -218,15 +225,15 @@ See L<doc/INSTALL> for further information.
|
|||
|
||||
=over 4
|
||||
|
||||
=item F<feedlog.pl>
|
||||
=item F<bin/feedlog.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
|
@ -235,7 +242,7 @@ Runtime configuration file.
|
|||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
|
@ -255,11 +262,11 @@ This script is part of the B<NewsStats> package.
|
|||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
971
bin/gatherstats.pl
Executable file
971
bin/gatherstats.pl
Executable file
|
@ -0,0 +1,971 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# gatherstats.pl
|
||||
#
|
||||
# This script will gather statistical information from a database
|
||||
# containing headers and other information from a INN feed.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
|
||||
|
||||
use DBI;
|
||||
use Encode qw(decode encode);
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Definitions ##################################
|
||||
|
||||
# define types of information that can be gathered
|
||||
# all / groups (/ clients / hosts)
|
||||
my %LegalStats;
|
||||
@LegalStats{('all','groups','hosts','clients')} = ();
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
|
||||
$OptHostsDB,$OptMID,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,
|
||||
$OptConfFile);
|
||||
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
|
||||
'clientsdb=s' => \$OptClientsDB,
|
||||
'd|debug+' => \$OptDebug,
|
||||
'groupsdb=s' => \$OptGroupsDB,
|
||||
'hierarchy=s' => \$OptTLH,
|
||||
'hostsdb=s' => \$OptHostsDB,
|
||||
'mid=s' => \$OptMID,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'rawdb=s' => \$OptRawDB,
|
||||
's|stats=s' => \$OptStatsType,
|
||||
't|test!' => \$OptTest,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### override configuration via commandline options
|
||||
my %ConfOverride;
|
||||
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
|
||||
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
|
||||
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
|
||||
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
|
||||
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
# set --debug and --test if --mid is set
|
||||
if ($OptMID) {
|
||||
$OptDebug = 1; $OptTest = 1;
|
||||
}
|
||||
|
||||
### get type of information to gather, defaulting to 'all'
|
||||
$OptStatsType = 'all' if !$OptStatsType;
|
||||
&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
|
||||
if !exists($LegalStats{$OptStatsType});
|
||||
|
||||
### get time period from --month
|
||||
# get verbal description of time period, drop SQL code
|
||||
my ($Period) = &GetTimePeriod($OptMonth);
|
||||
# bail out if --month is invalid or set to 'ALL';
|
||||
# we don't support the latter
|
||||
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
|
||||
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
|
||||
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
|
||||
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
|
||||
my $DBClients = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableClnts'});
|
||||
|
||||
### get data for each month
|
||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
||||
foreach my $Month (&ListMonth($Period)) {
|
||||
|
||||
print "---------- $Month ----------\n" if $OptDebug;
|
||||
|
||||
### GroupStats
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||
### reformat $Conf{'TLH'}
|
||||
my $TLH;
|
||||
if ($Conf{'TLH'}) {
|
||||
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
||||
# make a flat list again, separated by :
|
||||
if (ref($Conf{'TLH'}) eq 'ARRAY') {
|
||||
$TLH = join(':',@{$Conf{'TLH'}});
|
||||
} else {
|
||||
$TLH = $Conf{'TLH'};
|
||||
}
|
||||
# strip whitespace
|
||||
$TLH =~ s/\s//g;
|
||||
# add trailing dots if none are present yet
|
||||
# (using negative look-behind assertions)
|
||||
$TLH =~ s/(?<!\.):/.:/g;
|
||||
$TLH =~ s/(?<!\.)$/./;
|
||||
# check for illegal characters
|
||||
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
||||
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
|
||||
# escape dots
|
||||
$TLH =~ s/\./\\./g;
|
||||
if ($TLH =~ /:/) {
|
||||
# reformat $TLH from a:b to (a)|(b),
|
||||
# e.g. replace ':' by ')|('
|
||||
$TLH =~ s/:/)|(/g;
|
||||
$TLH = '(' . $TLH . ')';
|
||||
};
|
||||
};
|
||||
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
|
||||
};
|
||||
|
||||
### HostStats
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'hosts') {
|
||||
# define known hosts using subdomains
|
||||
my @KnownHosts = qw(abavia.com aioe.org arcor-online.net arcor-ip.de astraweb.com read.cnntp.org
|
||||
easynews.com eternal-september.org euro.net fernuni-hagen.de free.fr newsread.freenet.ag
|
||||
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
|
||||
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
|
||||
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
|
||||
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@KnownHosts);
|
||||
};
|
||||
|
||||
### ClientStats
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'clients') {
|
||||
# define agents/clients that shouldn't be counted
|
||||
my @DropAgents = qw(debian fedora firefox gecko gentoo lightning mandriva mnenhy mozilla
|
||||
pclinuxos perl php presto suse suse/opensuse thunderbrowse ubuntu version);
|
||||
push(@DropAgents, 'red hat');
|
||||
&ClientStats($DBHandle,$DBRaw,$DBClients,$Month,$OptTLH,$OptMID,$OptTest,$OptDebug,@DropAgents);
|
||||
};
|
||||
};
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
################################# Subroutines ##################################
|
||||
|
||||
sub GroupStats {
|
||||
### ----------------------------------------------------------------------------
|
||||
### collect number of postings per group
|
||||
### IN : $DBHandle : database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBGrps : database table for groups data (to write to)
|
||||
### $Month : current month to do
|
||||
### $TLH : TLHs to collect
|
||||
### $Checkgroupsfile : filename template for checkgroups file
|
||||
### (expanded to $Checkgroupsfile-$Month)
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$MID,$Test,$Debug) = @_;
|
||||
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash
|
||||
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))}
|
||||
if $CheckgroupsFile;
|
||||
|
||||
my $DBQuery;
|
||||
if (!$MID) {
|
||||
### ----------------------------------------------
|
||||
### get groups data (number of postings per group)
|
||||
# get groups data from raw table for given month
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$DBRaw));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,
|
||||
$DBRaw));
|
||||
} else {
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
|
||||
"WHERE mid = ?", $DBRaw));
|
||||
$DBQuery->execute($MID)
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
|
||||
"$DBI::errstr\n",$MID,
|
||||
$DBRaw));
|
||||
}
|
||||
|
||||
# count postings per group
|
||||
my %Postings;
|
||||
while (($_) = $DBQuery->fetchrow_array) {
|
||||
# get list of newsgroups and hierarchies from Newsgroups:
|
||||
my %Newsgroups = ListNewsgroups($_,$TLH,
|
||||
$CheckgroupsFile ? \%ValidGroups : '');
|
||||
# count each newsgroup and hierarchy once
|
||||
foreach (sort keys %Newsgroups) {
|
||||
$Postings{$_}++;
|
||||
};
|
||||
};
|
||||
|
||||
# add valid but empty groups if --checkgroups is set
|
||||
if (%ValidGroups) {
|
||||
foreach (sort keys %ValidGroups) {
|
||||
if (!defined($Postings{$_})) {
|
||||
# add current newsgroup as empty group
|
||||
$Postings{$_} = 0;
|
||||
warn (sprintf("ADDED: %s as empty group\n",$_));
|
||||
# add empty hierarchies for current newsgroup as needed
|
||||
foreach (ParseHierarchies($_)) {
|
||||
my $Hierarchy = $_ . '.ALL';
|
||||
if (!defined($Postings{$Hierarchy})) {
|
||||
$Postings{$Hierarchy} = 0;
|
||||
warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
|
||||
};
|
||||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$Test) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
|
||||
$DBGrps), undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBGrps));
|
||||
};
|
||||
|
||||
print "----- GroupStats -----\n" if $Debug;
|
||||
foreach my $Newsgroup (sort keys %Postings) {
|
||||
print "$Newsgroup => $Postings{$Newsgroup}\n" if $Debug;
|
||||
if (!$Test) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
|
||||
"(month,newsgroup,postings) ".
|
||||
"VALUES (?, ?, ?)",$DBGrps));
|
||||
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
|
||||
"$DBI::errstr\n",$Month,$Newsgroup,$DBGrps));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
};
|
||||
};
|
||||
### ----------------------------------------------------------------------------
|
||||
|
||||
sub HostStats {
|
||||
### ----------------------------------------------------------------------------
|
||||
### collect number of postings per server
|
||||
### IN : $DBHandle : database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBHosts : database table for hosts data (to write to)
|
||||
### $Month : current month to do
|
||||
### $TLH : TLHs to collect
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### @KnownHosts : list of known hosts with subdomains
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBHosts,$Month,$TLH,$MID,$Test,$Debug,@KnownHosts) = @_;
|
||||
|
||||
my (%Postings,$DBQuery);
|
||||
|
||||
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
|
||||
|
||||
### ----------------------------------------------
|
||||
print "----- HostStats -----\n" if $Debug;
|
||||
### parse headers
|
||||
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
||||
### skip postings with wrong TLH
|
||||
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
|
||||
|
||||
my $Host;
|
||||
my %Header = ParseHeaders(split(/\n/,$Headers));
|
||||
|
||||
# ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname
|
||||
# Injection-Info
|
||||
if($Header{'injection-info'}) {
|
||||
($Host) = $Header{'injection-info'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+);/i;
|
||||
# reset if IP address
|
||||
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||
}
|
||||
# X-Trace
|
||||
if (!$Host && $Header{'x-trace'}) {
|
||||
(undef, $Host) = $Header{'x-trace'} =~ /^(\s|\d)*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
|
||||
# reset if IP address
|
||||
undef($Host) if $Host && $Host !~ /[g-z]/i;
|
||||
}
|
||||
# Path
|
||||
if (!$Host) {
|
||||
if ($Header{'path'} =~ /!([^!]+)!.POSTED!/) {
|
||||
$Host = "$1";
|
||||
} elsif ($Header{'path'} =~ /([^!]+)!.POSTED.[^!]+!?/) {
|
||||
$Host = "$1";
|
||||
} else {
|
||||
# iterate on the Path: header until we have a host name or no more
|
||||
# path elements
|
||||
while (!$Host && $Header{'path'} =~ /!/) {
|
||||
($Host) = $Header{'path'} =~ /!?([a-z0-9-_]+\.[a-z0-9-_.]+)!!?[^!]+!?$/i;
|
||||
undef($Host) if $Host && $Host =~ /\.MISMATCH/;
|
||||
# remove last path element
|
||||
$Header{'path'} =~ s/!!?[^!]+$//;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
# trailing .POSTED
|
||||
($Host) = $Host =~ /(\S+)\.POSTED$/ if $Host =~ /\.POSTED$/;
|
||||
|
||||
# special cases
|
||||
$Host = 'news.highwinds-media.com' if $Host =~ /f(e|x)\d\d\.\S{3}\d?$/
|
||||
or $Host =~ /(newsfe|fed)\d+\.(iad|ams2)$/;
|
||||
$Host = 'newshosting.com' if $Host =~ /post\d*\.iad$/;
|
||||
$Host = 'eternal-september.org' if $Host =~ /dont-email\.me$/;
|
||||
|
||||
# normalize hosts
|
||||
foreach (@KnownHosts) {
|
||||
if ($Host =~ /\.$_$/) {
|
||||
($Host) = $_ ;
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# count host
|
||||
if ($Host) {
|
||||
$Host = lc($Host);
|
||||
$Postings{$Host}++;
|
||||
$Postings{'ALL'}++;
|
||||
} else {
|
||||
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
|
||||
}
|
||||
|
||||
printf("%s: %s\n", $Header{'message-id'}, $Host) if ($MID or $Debug && $Debug >1);
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$Test) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
|
||||
$DBHosts),undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old hosts data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBHosts));
|
||||
};
|
||||
|
||||
foreach my $Host (sort keys %Postings) {
|
||||
print "$Host => $Postings{$Host}\n" if $Debug;
|
||||
if (!$Test) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
|
||||
"(month,host,postings) ".
|
||||
"VALUES (?, ?, ?)",$DBHosts));
|
||||
$DBQuery->execute($Month, $Host, $Postings{$Host})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
|
||||
"$DBI::errstr\n",$Month,$Host,$DBHosts));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
sub ClientStats {
|
||||
### ----------------------------------------------------------------------------
|
||||
### collect number of postings per client (and version)
|
||||
### IN : $DBHandle : database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $DBClients : database table for clients data (to write to)
|
||||
### $Month : current month to do
|
||||
### $TLH : TLHs to collect
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### $Test : test mode
|
||||
### $Debug : debug mode
|
||||
### @DropAgents : list of UserAgent "agents" that won't be counted
|
||||
### OUT: (nothing)
|
||||
my ($DBHandle,$DBRaw,$DBClients,$Month,$TLH,$MID,$Test,$Debug,@DropAgents) = @_;
|
||||
|
||||
my (%Postings,$DBQuery);
|
||||
my %DropAgent = map { $_ => 1 } @DropAgents;
|
||||
|
||||
$DBQuery = GetHeaders($DBHandle,$DBRaw,$Month,$MID);
|
||||
|
||||
### ----------------------------------------------
|
||||
print "----- ClientStats -----\n" if $Debug;
|
||||
### parse headers
|
||||
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
|
||||
### skip postings with wrong TLH
|
||||
next if ($TLH && !CheckTLH($Newsgroups,$TLH));
|
||||
|
||||
my (@Clients, $Client, $Version);
|
||||
my %Header = ParseHeaders(split(/\n/,$Headers));
|
||||
|
||||
### X-Mailer
|
||||
if ($Header{'x-mailer'}) {
|
||||
# transfer to x-newsreader and parse from there
|
||||
$Header{'x-newsreader'} = $Header{'x-mailer'};
|
||||
}
|
||||
### X-Newsreader
|
||||
if ($Header{'x-newsreader'}) {
|
||||
$Header{'x-newsreader'} = RemoveComments($Header{'x-newsreader'});
|
||||
# remove 'http://' and 'via' (CrossPoint)
|
||||
$Header{'x-newsreader'} =~ s/https?:\/\///;
|
||||
$Header{'x-newsreader'} =~ s/ ?via(.+)?$//;
|
||||
# parse header
|
||||
# User-Agent style
|
||||
if ($Header{'x-newsreader'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
|
||||
# transfer to user-agent and parse from there
|
||||
$Header{'user-agent'} = $Header{'x-newsreader'};
|
||||
# "client name version"
|
||||
} elsif ($Header{'x-newsreader'} =~ / /) {
|
||||
($Client, $Version) = ParseXNewsreader($Header{'x-newsreader'});
|
||||
} else {
|
||||
$Client = $Header{'x-newsreader'};
|
||||
$Version = '';
|
||||
}
|
||||
if ($Client) {
|
||||
# special cases
|
||||
$Client = 'CrossPoint' if $Client =~ /^CrossPoint\//;
|
||||
$Client = 'Virtual Access' if $Client =~ /^Virtual Access/;
|
||||
my %UserAgent = (agent => $Client,
|
||||
version => $Version);
|
||||
push @Clients, { %UserAgent };
|
||||
} else {
|
||||
$Header{'user-agent'} = $Header{'x-newsreader'};
|
||||
}
|
||||
}
|
||||
### User-Agent
|
||||
if(!@Clients && $Header{'user-agent'}) {
|
||||
$Header{'user-agent'} = RemoveComments($Header{'user-agent'});
|
||||
### well-formed?
|
||||
if ($Header{'user-agent'} =~ /^([^\/ ]+\/[^\/ ]+ ?)+$/) {
|
||||
@Clients = ParseUserAgent($Header{'user-agent'});
|
||||
} else {
|
||||
# snip and add known well-formed agents from the trailing end
|
||||
while ($Header{'user-agent'} =~ /(((Hamster)|(Hamster-Pg)|(KorrNews)|(OE-Tools)|(Mime-proxy))(\/[^\/ ]+))$/) {
|
||||
push @Clients, ParseUserAgent($1);
|
||||
$Header{'user-agent'} =~ s/ [^\/ ]+\/[^\/ ]+$//;
|
||||
}
|
||||
### special cases
|
||||
# remove 'http://open-news-network.org'
|
||||
$Header{'user-agent'} =~ s/^https?:\/\/open-news-network.org(\S+)?//;
|
||||
# Thunderbird
|
||||
if ($Header{'user-agent'} =~ /((Mozilla[- ])?Thunderbird) ?([0-9.]+)?/) {
|
||||
$Client = 'Thunderbird';
|
||||
$Version = $3;
|
||||
# XP
|
||||
} elsif ($Header{'user-agent'} =~ /((TrueXP|FreeXP|XP2(\/Agent)?)) \/(.+)$/) {
|
||||
$Client = $1;
|
||||
$Version = $4;
|
||||
$Client = 'XP2' if $Client eq 'XP2/Agent';
|
||||
### most general case
|
||||
# client version
|
||||
# client/version
|
||||
# client/32 version
|
||||
# - version may end in one non-numeric character
|
||||
# - including trailing beta/pre/...
|
||||
# 1) client: (([^0-9]+)|(\D+\/\d+))
|
||||
# 2) version: (\S+\d\D?)
|
||||
# 3) trailing: (( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?
|
||||
} elsif ($Header{'user-agent'} =~ /^(([^0-9]+)|(\D+\/\d+))[\/ ]((\S+\d\D?)(( alpha\d?)|( beta\d?)|( rc\d)| pre| trialware)?)$/) {
|
||||
$Client = $1;
|
||||
$Version = $4;
|
||||
### some very special cases
|
||||
# SeaMonkey/nn
|
||||
} elsif ($Header{'user-agent'} =~ /SeaMonkey\/([0-9.]+)/) {
|
||||
$Client = 'Seamonkey';
|
||||
$Version = $1;
|
||||
# Emacs nn/Gnus nn
|
||||
} elsif ($Header{'user-agent'} =~ /Emacs [0-9.]+\/Gnus ([0-9.]+)/) {
|
||||
$Client = 'Gnus';
|
||||
$Version = $1;
|
||||
# failed to parse
|
||||
} else {
|
||||
$Client = $Header{'user-agent'};
|
||||
}
|
||||
# count client, if found
|
||||
if ($Client) {
|
||||
my %UserAgent = (agent => $Client,
|
||||
version => $Version);
|
||||
push @Clients, { %UserAgent };
|
||||
} else {
|
||||
&Bleat(1,sprintf("%s FAILED", $Header{'message-id'})) if !@Clients;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (@Clients) {
|
||||
$Postings{'ALL'}{'ALL'}++;
|
||||
foreach (@Clients) {
|
||||
# filter agents for User-Agent with multiple agents
|
||||
next if $#Clients && exists($DropAgent{lc($_->{'agent'})});
|
||||
# remove whitespace
|
||||
$_->{'agent'} =~ s/^\s+|\s+$//g;
|
||||
$_->{'version'} =~ s/^\s+|\s+$//g if $_->{'version'};
|
||||
# encode to utf-8, if necessary
|
||||
$_->{'agent'} = encode('UTF-8', $_->{'agent'}) if $_->{'agent'} =~ /[\x80-\x{ffff}]/;
|
||||
$_->{'version'} = encode('UTF-8', $_->{'version'}) if $_->{'version'} and $_->{'version'} =~ /[\x80-\x{ffff}]/;
|
||||
# truncate overlong clients or versions
|
||||
$_->{'agent'} = substr($_->{'agent'}, 0, 150) if length($_->{'agent'}) > 150;
|
||||
$_->{'version'} = substr($_->{'version'}, 0, 50) if $_->{'version'} and length($_->{'version'}) > 50;
|
||||
# special cases
|
||||
# Mozilla
|
||||
$_->{'agent'} = 'Mozilla' if $_->{'agent'} eq '•Mozilla';
|
||||
$_->{'agent'} =~ s/^Mozilla //;
|
||||
# Forte Agent
|
||||
$_->{'agent'} = 'Forte Agent' if $_->{'agent'} eq 'ForteAgent';
|
||||
if ($_->{'agent'} eq 'Forte Agent') {
|
||||
$_->{'version'} =~ s/-/\//;
|
||||
$_->{'version'} = '' if $_->{'version'} eq '32Bit';
|
||||
}
|
||||
# count client ('ALL') and client/version (if version is present)
|
||||
$Postings{$_->{'agent'}}{'ALL'}++;
|
||||
$Postings{$_->{'agent'}}{$_->{'version'}}++ if $_->{'version'};
|
||||
|
||||
printf("%s: %s {%s}\n", $Header{'message-id'}, $_->{'agent'},
|
||||
$_->{'version'} ? $Postings{$_->{'agent'}}{$_->{'version'}} : '')
|
||||
if ($MID or $Debug && $Debug >1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$Test) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
|
||||
$DBClients),undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old client data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBClients));
|
||||
};
|
||||
|
||||
foreach my $Client (sort keys %Postings) {
|
||||
foreach my $Version (sort keys %{$Postings{$Client}}) {
|
||||
printf ("%s {%s}: %d\n",$Client,$Version,$Postings{$Client}{$Version}) if $Debug;
|
||||
|
||||
if (!$Test) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
|
||||
"(month,client,version,postings) ".
|
||||
"VALUES (?, ?, ?, ?)",$DBClients));
|
||||
$DBQuery->execute($Month, $Client, $Version, $Postings{$Client}{$Version})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s/%s to %s: ".
|
||||
"$DBI::errstr\n",$Month,$Client,$Version,$DBClients));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
sub GetHeaders {
|
||||
### ----------------------------------------------------------------------------
|
||||
### get (newsgroups and) raw headers from database
|
||||
### IN : $DBHandle: database handle
|
||||
### $DBRaw : database table for raw data (to read from)
|
||||
### $Month : current month to do
|
||||
### $MID : specific Message-ID to fetch (testing purposes)
|
||||
### OUT: DBI statement handle
|
||||
my ($DBHandle,$DBRaw,$Month,$MID) = @_;
|
||||
|
||||
my $DBQuery;
|
||||
|
||||
if (!$MID) {
|
||||
# get raw header data from raw table for given month
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$DBRaw));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
|
||||
"$DBI::errstr\n",$Month,$DBRaw));
|
||||
} else {
|
||||
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
|
||||
"WHERE mid = ?", $DBRaw));
|
||||
$DBQuery->execute($MID)
|
||||
or &Bleat(2,sprintf("Can't get header data for %s from %s: ".
|
||||
"$DBI::errstr\n",$MID,$DBRaw));
|
||||
}
|
||||
return $DBQuery;
|
||||
}
|
||||
|
||||
sub CheckTLH {
|
||||
### ----------------------------------------------------------------------------
|
||||
### count newsgroups from legal TLH(s)
|
||||
### IN : $Newsgroups: comma separated list of newsgroups
|
||||
### $TLH : (reference to an array of) legal TLH(s)
|
||||
### OUT: number of newsgroups from legal TLH(s)
|
||||
my ($Newsgroups,$TLH) = @_;
|
||||
|
||||
my (@TLH,$GroupCount);
|
||||
|
||||
# fill @TLH from $TLH, which can be an array reference or a scalar value
|
||||
if (ref($TLH) eq 'ARRAY') {
|
||||
@TLH = @{$TLH};
|
||||
} else {
|
||||
push @TLH, $TLH;
|
||||
}
|
||||
|
||||
# remove whitespace from contents of Newsgroups:
|
||||
chomp($Newsgroups);
|
||||
$Newsgroups =~ s/\s//;
|
||||
for (split /,/, $Newsgroups) {
|
||||
my $Newsgroup = $_;
|
||||
foreach (@TLH) {
|
||||
# increment $GroupCount if $Newsgroup starts with $TLH
|
||||
$GroupCount++ if $Newsgroup =~ /^$_/;
|
||||
}
|
||||
};
|
||||
|
||||
return $GroupCount;
|
||||
}
|
||||
|
||||
sub RemoveComments {
|
||||
### ----------------------------------------------------------------------------
|
||||
### remove comments and other junk from header
|
||||
### IN : $Header: a header
|
||||
### OUT: the header, with comments and other junk removed
|
||||
my $Header = shift;
|
||||
|
||||
# decode MIME encoded words
|
||||
if ($Header =~ /=\?\S+\?[BQ]\?/) {
|
||||
$Header = decode("MIME-Header",$Header);
|
||||
}
|
||||
|
||||
# remove nested comments from '(' to first ')'
|
||||
while ($Header =~ /\([^)]+\)/) {
|
||||
$Header =~ s/\([^()]+?\)//;
|
||||
}
|
||||
|
||||
# remove dangling ')'
|
||||
$Header =~ s/\S+\)//;
|
||||
|
||||
# remove from dangling '(' to end of header
|
||||
$Header =~ s/\(.+$//;
|
||||
|
||||
# remove from '[' to first ']'
|
||||
$Header =~ s/\[[^\[\]]+?\]//;
|
||||
|
||||
# remove 'Nr. ... lebt'
|
||||
$Header =~ s/Nr\. \d+ lebt//;
|
||||
|
||||
# remove nn:nn:nn
|
||||
$Header =~ s/\d\d:\d\d:\d\d//;
|
||||
|
||||
# remove 'mm/... '
|
||||
$Header =~ s/\/mm\/\S+//;
|
||||
|
||||
# remove ' DE' / _DE'
|
||||
$Header =~ s/[ _]DE//;
|
||||
|
||||
# remove trailing 'eol' or '-shl'
|
||||
# or ml-inews[-sig]
|
||||
$Header =~ s/(eol)|(-shl)|(ml-inews(-sig)?)$//;
|
||||
|
||||
# remove from ';' or ',' (CrossPoint)
|
||||
# or '&' to end of header
|
||||
$Header =~ s/[;,&].+$//;
|
||||
|
||||
# remove from 'by ' or 'unter Windows' or '@ Windows'
|
||||
# to end of header
|
||||
$Header =~ s/((by )|(unter +Windows)|(@ Windows)).+$//;
|
||||
|
||||
# remove superfluous whitespace in header
|
||||
# and whitespace around header
|
||||
$Header =~ s/\s+/ /g;
|
||||
$Header =~ s/^\s+|\s+$//g;
|
||||
|
||||
return $Header;
|
||||
}
|
||||
|
||||
sub ParseXNewsreader {
|
||||
### ----------------------------------------------------------------------------
|
||||
### parse X-Newsreader header (client and version, if present)
|
||||
### IN : $XNR: a X-Newsreader header
|
||||
### OUT: client and version, if present
|
||||
my $XNR = shift;
|
||||
|
||||
my ($Client, $Version);
|
||||
|
||||
foreach (split(/ /,$XNR)) {
|
||||
# add to client name if no digit present
|
||||
if (!/\d[0-9.]/ or /\/\d$/) {
|
||||
$Client .= $_ . ' ' ;
|
||||
# otherwise, use as version and terminate parsing
|
||||
} else {
|
||||
$Version = $_;
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# remove trailing whitespace
|
||||
$Client =~ s/\s+$// if $Client;
|
||||
|
||||
# set $Version
|
||||
$Version = '' if !$Version;
|
||||
|
||||
return $Client, $Version;
|
||||
}
|
||||
|
||||
|
||||
sub ParseUserAgent {
|
||||
### ----------------------------------------------------------------------------
|
||||
### parse User-Agent header (agent and version)
|
||||
### IN : $UserAgent: a User-Agent header
|
||||
### OUT: array of hashes (agent/version)
|
||||
my $UserAgent = shift;
|
||||
|
||||
my @UserAgents;
|
||||
|
||||
# a well-formed User-Agent header will contain pairs of
|
||||
# client/version, i.e. 'slrn/0.9.7.3'
|
||||
foreach (split(/ /,$UserAgent)) {
|
||||
my %UserAgent;
|
||||
/^(.+)\/(.+)$/;
|
||||
$UserAgent{'agent'} = $1;
|
||||
$UserAgent{'version'} = $2;
|
||||
push @UserAgents, { %UserAgent };
|
||||
}
|
||||
|
||||
return @UserAgents;
|
||||
}
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gatherstats - process statistical data from a raw source
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will extract and process statistical information from a
|
||||
database table which is fed from F<feedlog.pl> for a given time period
|
||||
and write its results to (an)other database table(s). Entries marked
|
||||
with I<'disregard'> in the database will be ignored; currently, you
|
||||
have to set this flag yourself, using your database management tools.
|
||||
You can exclude erroneous entries that way (e.g. automatic reposts
|
||||
(think of cancels flood and resurrectors); spam; ...).
|
||||
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another time period or a single month via the B<--month> option (see
|
||||
below).
|
||||
|
||||
By default B<gatherstats> will process all types of information; you
|
||||
can change that using the B<--stats> option and assigning the type of
|
||||
information to process.
|
||||
|
||||
Possible information types include:
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<groups> (postings per group per month)
|
||||
|
||||
B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
|
||||
counted for each single group they appear in. Groups not in I<TLH>
|
||||
will be ignored.
|
||||
|
||||
B<gatherstats> will also add up the number of postings for each
|
||||
hierarchy level, but only count each posting once. A posting to
|
||||
de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
|
||||
respectively. A crossposting to de.alt.test and de.alt.admin, on the
|
||||
other hand, will be counted for de.alt.test and de.alt.admin each, but
|
||||
only once for de.alt.ALL and de.ALL.
|
||||
|
||||
Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--groupsdb> option.
|
||||
|
||||
=item B<hosts> (postings from host per month)
|
||||
|
||||
B<gatherstats> will examine Injection-Info:, X-Trace: and Path:
|
||||
headers and try to normalize them. The sum of all detected hosts will
|
||||
also be saved for each month. Groups not in I<TLH> will be ignored.
|
||||
|
||||
Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--hostsdb> option.
|
||||
|
||||
=item B<clients> (postings by client per month)
|
||||
|
||||
B<gatherstats> will examine User-Agent:, X-Newsreader: and X-Mailer:
|
||||
headers and try to remove comments and non-standard contents. Clients
|
||||
and client versions are counted separately. The sum of all detected
|
||||
clients will also be saved for each month. Groups not in I<TLH> will
|
||||
be ignored.
|
||||
|
||||
Data is written to I<DBTableClnts> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--clientsdb> option.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<gatherstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in etc/ via Config::Auto or from a configuration
|
||||
file submitted by the B<--conffile> option.
|
||||
|
||||
See L<doc/INSTALL> for an overview of possible configuration options.
|
||||
|
||||
You can override configuration options by using the B<--hierarchy>,
|
||||
B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
|
||||
respectively.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-d>, B<--debug>
|
||||
|
||||
Print debugging information to STDOUT while processing (number of
|
||||
postings per group).
|
||||
|
||||
=item B<-t>, B<--test>
|
||||
|
||||
Do not write results to database. You should use B<--debug> in
|
||||
conjunction with B<--test> ... everything else seems a bit pointless.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). Defaults to last month.
|
||||
|
||||
=item B<-s>, B<--stats> I<type>
|
||||
|
||||
Set processing type to one of I<all>, I<groups>, I<hosts> or I<clients>.
|
||||
Defaults to I<all>.
|
||||
|
||||
=item B<-c>, B<--checkgroups> I<filename template>
|
||||
|
||||
Relevant only for newsgroup stats (I<groups>).
|
||||
|
||||
Check each group against a list of valid newsgroups read from a file,
|
||||
one group on each line and ignoring everything after the first
|
||||
whitespace (so you can use a file in checkgroups format or (part of)
|
||||
your INN active file).
|
||||
|
||||
The filename is taken from I<filename template>, amended by each
|
||||
B<--month> B<gatherstats> is processing in the form of I<template-YYYY-MM>,
|
||||
so that
|
||||
|
||||
gatherstats -m 2010-01:2010-12 -c checkgroups
|
||||
|
||||
will check against F<checkgroups-2010-01> for January 2010, against
|
||||
F<checkgroups-2010-02> for February 2010 and so on.
|
||||
|
||||
Newsgroups not found in the checkgroups file will be dropped (and
|
||||
logged to STDERR), and newsgroups found there but having no postings
|
||||
will be added with a count of 0 (and logged to STDERR).
|
||||
|
||||
=item B<--hierarchy> I<TLH> (newsgroup hierarchy/hierarchies)
|
||||
|
||||
Override I<TLH> from F<newsstats.conf>.
|
||||
|
||||
I<TLH> can be a single word or a comma-separated list.
|
||||
|
||||
=item B<--rawdb> I<table> (raw data table)
|
||||
|
||||
Override I<DBTableRaw> from F<newsstats.conf>.
|
||||
|
||||
=item B<--groupsdb> I<table> (postings per group table)
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=item B<--hostsdb> I<table> (host data table)
|
||||
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--clientsdb> I<table> (client data table)
|
||||
|
||||
Override I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Read configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Process all types of information for lasth month:
|
||||
|
||||
gatherstats
|
||||
|
||||
Do a dry run, showing results of processing:
|
||||
|
||||
gatherstats --debug --test
|
||||
|
||||
Process all types of information for January of 2010:
|
||||
|
||||
gatherstats --month 2010-01
|
||||
|
||||
Process only number of postings for the year of 2010,
|
||||
checking against checkgroups-*:
|
||||
|
||||
gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/gatherstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
|
@ -4,18 +4,18 @@
|
|||
#
|
||||
# This script will get statistical data on newgroup usage
|
||||
# from a database.
|
||||
#
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
push(@INC, dirname($0));
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
|
|||
### read commandline options
|
||||
my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
|
||||
$OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
|
||||
$OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
|
||||
$OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound,$OptConfFile);
|
||||
GetOptions ('b|boundary=s' => \$OptBoundType,
|
||||
'c|captions!' => \$OptCaptions,
|
||||
'checkgroups=s' => \$OptCheckgroupsFile,
|
||||
|
@ -47,13 +47,12 @@ GetOptions ('b|boundary=s' => \$OptBoundType,
|
|||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
# parse parameters
|
||||
# $OptComments defaults to TRUE
|
||||
$OptComments = 1 if (!defined($OptComments));
|
||||
# force --nocomments when --filetemplate is used
|
||||
$OptComments = 0 if ($OptFileTemplate);
|
||||
# $OptComments defaults to TRUE if --filetemplate is not used
|
||||
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
|
||||
# parse $OptBoundType
|
||||
if ($OptBoundType) {
|
||||
if ($OptBoundType =~ /level/i) {
|
||||
|
@ -76,12 +75,19 @@ if ($OptReportType) {
|
|||
$OptReportType = 'default';
|
||||
}
|
||||
}
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash reference
|
||||
my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
|
||||
# honor $OptCheckgroupsFile,
|
||||
# warn for $OptSums if set concurrently
|
||||
my $ValidGroups;
|
||||
if ($OptCheckgroupsFile) {
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash reference
|
||||
$ValidGroups = &ReadGroupList($OptCheckgroupsFile);
|
||||
&Bleat(1,"--sums option can't possibly work with --checkgroups option set")
|
||||
if $OptSums;
|
||||
}
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### override configuration via commandline options
|
||||
my %ConfOverride;
|
||||
|
@ -102,7 +108,7 @@ my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
|||
# with placeholders as well as a list of newsgroup to bind to them
|
||||
my ($SQLWhereNewsgroups,@SQLBindNewsgroups);
|
||||
if ($OptNewsgroups) {
|
||||
($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups);
|
||||
($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups,'newsgroup');
|
||||
# bail out if --newsgroups is invalid
|
||||
&Bleat(2,"--newsgroups option has an invalid format!")
|
||||
if !$SQLWhereNewsgroups;
|
||||
|
@ -124,13 +130,18 @@ if ($OptBoundType and $OptBoundType ne 'default') {
|
|||
}
|
||||
|
||||
### get sort order and build SQL 'ORDER BY' clause
|
||||
# force to 'month' for $OptReportType 'average' or 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# default to 'newsgroup' for $OptBoundType 'level' or 'average'
|
||||
$OptGroupBy = 'newsgroup' if (!$OptGroupBy and
|
||||
$OptBoundType and $OptBoundType ne 'default');
|
||||
# force to 'month' for $OptReportType 'average' or 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# default to 'newsgroup' if $OptGroupBy is not set and
|
||||
# just one newsgroup is requested, but more than one month
|
||||
$OptGroupBy = 'newsgroup' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
|
||||
and $OptNewsgroups and $OptNewsgroups !~ /[:*%]/);
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
|
||||
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'newsgroup');
|
||||
# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
@ -213,6 +224,7 @@ $DBQuery->execute(@SQLBindNewsgroups)
|
|||
# set default to 'pretty'
|
||||
$OptFormat = 'pretty' if !$OptFormat;
|
||||
# print captions if --caption is set
|
||||
my $LeadIn;
|
||||
if ($OptCaptions && $OptComments) {
|
||||
# print time period with report type
|
||||
my $CaptionReportType= '(number of postings for each month)';
|
||||
|
@ -222,9 +234,9 @@ if ($OptCaptions && $OptComments) {
|
|||
$CaptionReportType= '(number of all postings for that time period)'
|
||||
if $OptReportType eq 'sum';
|
||||
}
|
||||
printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
# print newsgroup list if --newsgroups is set
|
||||
printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
|
||||
$LeadIn .= sprintf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
|
||||
if $OptNewsgroups;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only month fulfilling this condition)';
|
||||
|
@ -233,21 +245,21 @@ if ($OptCaptions && $OptComments) {
|
|||
$CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
|
||||
$CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
|
||||
}
|
||||
printf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
if ($LowBound or $UppBound);
|
||||
# print primary and secondary sort order
|
||||
printf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
|
||||
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
|
||||
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
|
||||
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
|
||||
}
|
||||
|
||||
|
||||
# output data
|
||||
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
|
||||
$OptCheckgroupsFile ? $ValidGroups : '',
|
||||
$OptCheckgroupsFile ? $ValidGroups : '',$LeadIn,
|
||||
$OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
|
||||
|
||||
### close handles
|
||||
|
@ -263,7 +275,7 @@ groupstats - create reports on newsgroup usage
|
|||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
|
||||
B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
|
@ -271,7 +283,7 @@ See L<doc/README>.
|
|||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script create reports on newsgroup usage (number of postings per
|
||||
This script creates reports on newsgroup usage (number of postings per
|
||||
group per month) taken from result tables created by
|
||||
B<gatherstats.pl>.
|
||||
|
||||
|
@ -279,16 +291,16 @@ B<gatherstats.pl>.
|
|||
|
||||
=head3 Time period and newsgroups
|
||||
|
||||
The time period to act on defaults to last month; you can assign another
|
||||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another time period or a single month (or drop all time constraints)
|
||||
via the B<--month> option (see below).
|
||||
|
||||
B<groupstats> will process all newsgroups by default; you can limit
|
||||
processing to only some newsgroups by supplying a list of those groups via
|
||||
B<--newsgroups> option (see below). You can include hierarchy levels in
|
||||
the output by adding the B<--sums> switch (see below). Optionally
|
||||
newsgroups not present in a checkgroups file can be excluded from output,
|
||||
sse B<--checkgroups> below.
|
||||
processing to only some newsgroups by supplying a list of those groups
|
||||
via B<--newsgroups> option (see below). You can include hierarchy
|
||||
levels in the output by adding the B<--sums> switch (see below).
|
||||
Optionally newsgroups not present in a checkgroups file can be excluded
|
||||
from output, sse B<--checkgroups> below.
|
||||
|
||||
=head3 Report type
|
||||
|
||||
|
@ -309,26 +321,27 @@ below.
|
|||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
newsgroup instead via the B<--groupy-by> option. Within those groups, the
|
||||
list of newsgroups (or months) is sorted alphabetically (or
|
||||
chronologically, respectively) ascending. You can change that order (and
|
||||
sort by number of postings) with the B<--order-by> option. For details and
|
||||
exceptions, please see below.
|
||||
newsgroup instead via the B<--groupy-by> option. Within those groups,
|
||||
the list of newsgroups (or months) is sorted alphabetically (or
|
||||
chronologically, respectively) ascending. You can change that order
|
||||
(and sort by number of postings) with the B<--order-by> option. For
|
||||
details and exceptions, please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of newsgroups and number of
|
||||
postings with the B<--format> option. Captions will be added by means of
|
||||
the B<--caption> option; all comments (and captions) can be supressed by
|
||||
using B<--nocomments>.
|
||||
output format to a simple list or just a list of newsgroups and number
|
||||
of postings with the B<--format> option. Captions will be added by means
|
||||
of the B<--caption> option; all comments (and captions) can be supressed
|
||||
by using B<--nocomments>.
|
||||
|
||||
Last but not least you can redirect all output to a number of files, e.g.
|
||||
one for each month, by submitting the B<--filetemplate> option, see below.
|
||||
Captions and comments are automatically disabled in this case.
|
||||
Last but not least you can redirect all output to a number of files,
|
||||
e.g. one for each month, by submitting the B<--filetemplate> option,
|
||||
see below.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<groupstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in the same directory via Config::Auto.
|
||||
which should be present in etc/ via Config::Auto or from a configuration
|
||||
file submitted by the B<--conffile> option.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
|
@ -340,18 +353,18 @@ You can override some configuration options via the B<--groupsdb> option.
|
|||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). By using the keyword I<all> instead, you can set no
|
||||
processing period to process the whole database.
|
||||
processing period to process the whole database. Defaults to last month.
|
||||
|
||||
=item B<-n>, B<--newsgroups> I<newsgroup(s)>
|
||||
|
||||
|
@ -373,14 +386,23 @@ example:
|
|||
|
||||
See the B<gatherstats> man page for details.
|
||||
|
||||
This option does not work together with the B<--checkgroups> option as
|
||||
all "virtual" groups will not be present in the checkgroups file.
|
||||
|
||||
False by default.
|
||||
|
||||
=item B<--checkgroups> I<filename>
|
||||
|
||||
Restrict output to those newgroups present in a file in checkgroups format
|
||||
(one newgroup name per line; everything after the first whitespace on each
|
||||
line is ignored). All other newsgroups will be removed from output.
|
||||
Restrict output to those newgroups present in a file in checkgroups
|
||||
format (one newgroup name per line; everything after the first
|
||||
whitespace on each line is ignored). All other newsgroups will be
|
||||
removed from output.
|
||||
|
||||
Contrary to B<gatherstats>, I<filename> is not a template, but refers to
|
||||
a single file in checkgroups format.
|
||||
Contrary to B<gatherstats>, I<filename> is not a template, but refers
|
||||
to a single file in checkgroups format.
|
||||
|
||||
The B<--sums> option will not work together with this option as
|
||||
"virtual" groups will not be present in the checkgroups file.
|
||||
|
||||
=item B<-r>, B<--report> I<default|average|sums>
|
||||
|
||||
|
@ -388,8 +410,8 @@ Choose the report type: I<default>, I<average> or I<sums>
|
|||
|
||||
By default, B<groupstats> will report the number of postings for each
|
||||
newsgroup in each month. But it can also report the average number of
|
||||
postings per group for all months or the total sum of postings per group
|
||||
for all months.
|
||||
postings per group for all months or the total sum of postings per
|
||||
group for all months.
|
||||
|
||||
For report types I<average> and I<sums>, the B<group-by> option has no
|
||||
meaning and will be silently ignored (see below).
|
||||
|
@ -408,12 +430,13 @@ Set the boundary type to one of I<default>, I<level>, I<average> or
|
|||
I<sums>.
|
||||
|
||||
By default, all newsgroups with more postings per month than the upper
|
||||
boundary and/or less postings per month than the lower boundary will be
|
||||
excluded from further processing. For the default report that means each
|
||||
month only newsgroups with a number of postings between the boundaries
|
||||
will be displayed. For the other report types, newsgroups with a number of
|
||||
postings exceeding the boundaries in all (!) months will not be
|
||||
considered.
|
||||
boundary and/or less postings per month than the lower boundary will
|
||||
be
|
||||
excluded from further processing. For the default report that means
|
||||
each month only newsgroups with a number of postings between the
|
||||
boundaries will be displayed. For the other report types, newsgroups
|
||||
with a number of postings exceeding the boundaries in all (!) months
|
||||
will not be considered.
|
||||
|
||||
For example, lets take a list of newsgroups like this:
|
||||
|
||||
|
@ -443,22 +466,23 @@ month. If you want to list all newsgroups with more than 25 postings
|
|||
I<in total>, you'll have to set the boundary type to I<sum>, see below.
|
||||
|
||||
A boundary type of I<level> will show only those newsgroups - at all -
|
||||
that satisfy the boundaries in each and every single month. With the above
|
||||
list of newsgroups and
|
||||
that satisfy the boundaries in each and every single month. With the
|
||||
above list of newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
|
||||
you'll get this result:
|
||||
|
||||
----- All months:
|
||||
de.comp.datenbanken.ms-access 293
|
||||
|
||||
de.comp.datenbanken.mysql has not been considered because it had less than
|
||||
25 postings in 2012-02 (only).
|
||||
de.comp.datenbanken.mysql has not been considered because it had less
|
||||
than 25 postings in 2012-02 (only).
|
||||
|
||||
You can use that to get a list of newsgroups that have more (or less) then
|
||||
x postings in every month during the whole reporting period.
|
||||
You can use that to get a list of newsgroups that have more (or less)
|
||||
then x postings in every month during the whole reporting period.
|
||||
|
||||
A boundary type of I<average> will show only those newsgroups - at all -that
|
||||
satisfy the boundaries on average. With the above list of newsgroups and
|
||||
A boundary type of I<average> will show only those newsgroups - at
|
||||
all - that satisfy the boundaries on average. With the above list of
|
||||
newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
|
||||
you'll get this result:
|
||||
|
||||
|
@ -473,8 +497,8 @@ The average number of postings in the three groups is:
|
|||
de.comp.datenbanken.mysql 48.33
|
||||
|
||||
Last but not least, a boundary type of I<sums> will show only those
|
||||
newsgroups - at all - that satisfy the boundaries with the total sum of
|
||||
all postings during the reporting period. With the above list of
|
||||
newsgroups - at all - that satisfy the boundaries with the total sum
|
||||
of all postings during the reporting period. With the above list of
|
||||
newsgroups and
|
||||
C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
|
||||
you'll finally get this result:
|
||||
|
@ -487,8 +511,8 @@ you'll finally get this result:
|
|||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
|
||||
|
||||
By default, all results are grouped by month, sorted chronologically in
|
||||
ascending order, like this:
|
||||
By default, all results are grouped by month, sorted chronologically
|
||||
in ascending order, like this:
|
||||
|
||||
----- 2012-01:
|
||||
de.comp.datenbanken.ms-access 84
|
||||
|
@ -507,8 +531,8 @@ B<--group-by> I<newsgroup>:
|
|||
2012-01 88
|
||||
2012-02 21
|
||||
|
||||
By appending I<-desc> to the group-by option parameter, you can reverse
|
||||
the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
By appending I<-desc> to the group-by option parameter, you can
|
||||
reverse the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
|
||||
----- 2012-02:
|
||||
de.comp.datenbanken.ms-access 126
|
||||
|
@ -523,9 +547,9 @@ this option will therefore be ignored.
|
|||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single newsgroup, see above), the
|
||||
report will be sorted by newsgroup names in ascending alphabetical order
|
||||
by default. You can change the sort order to descending or sort by number
|
||||
of postings instead.
|
||||
report will be sorted by newsgroup names in ascending alphabetical
|
||||
order by default. You can change the sort order to descending or sort
|
||||
by number of postings instead.
|
||||
|
||||
=item B<-f>, B<--format> I<pretty|list|dump>
|
||||
|
||||
|
@ -569,29 +593,32 @@ False by default.
|
|||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by default.
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by
|
||||
default as long as B<--filetemplate> is not set.
|
||||
|
||||
Use I<--nocomments> to suppress anything except newsgroup names/months and
|
||||
numbers of postings. This is enforced when using B<--filetemplate>, see below.
|
||||
Use I<--nocomments> to suppress anything except newsgroup names/months
|
||||
and numbers of postings.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
|
||||
create one file for each month (or each newsgroup, accordant to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or newsgroup names) to the I<filename template>, for
|
||||
example with B<--filetemplate> I<stats>:
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<groupstats>
|
||||
will create one file for each month (or each newsgroup, according to
|
||||
the setting of B<--group-by>, see above), with filenames composed by
|
||||
adding year and month (or newsgroup names) to the I<filename template>,
|
||||
for example with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
... and so on
|
||||
|
||||
B<--nocomments> is enforced, see above.
|
||||
|
||||
=item B<--groupsdb> I<database table>
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Read configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
@ -614,9 +641,9 @@ by number of postings, descending, in I<pretty> format:
|
|||
|
||||
groupstats --upper 30 --order-by postings-desc
|
||||
|
||||
Show the total of all postings for the year of 2010 for all groups that
|
||||
had 30 postings or less in every single month in that year, ordered by
|
||||
number of postings in descending order:
|
||||
Show the total of all postings for the year of 2010 for all groups
|
||||
that had 30 postings or less in every single month in that year,
|
||||
ordered by number of postings in descending order:
|
||||
|
||||
groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
|
||||
|
||||
|
@ -630,20 +657,19 @@ machine-readable form (without formatting):
|
|||
|
||||
groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
|
||||
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<groupstats.pl>
|
||||
=item F<bin/groupstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
|
@ -652,7 +678,7 @@ Runtime configuration file.
|
|||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
|
@ -664,7 +690,7 @@ L<doc/README>
|
|||
|
||||
=item -
|
||||
|
||||
l>doc/INSTALL>
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
|
@ -676,11 +702,11 @@ This script is part of the B<NewsStats> package.
|
|||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
515
bin/hoststats.pl
Executable file
515
bin/hoststats.pl
Executable file
|
@ -0,0 +1,515 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# hoststats.pl
|
||||
#
|
||||
# This script will get statistical data on server (host) usage
|
||||
# from a database.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
|
||||
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
|
||||
$OptReportType,$OptSums,$UppBound,$OptConfFile);
|
||||
GetOptions ('c|captions!' => \$OptCaptions,
|
||||
'comments!' => \$OptComments,
|
||||
'db=s' => \$OptDB,
|
||||
'filetemplate=s' => \$OptFileTemplate,
|
||||
'f|format=s' => \$OptFormat,
|
||||
'g|group-by=s' => \$OptGroupBy,
|
||||
'l|lower=i' => \$LowBound,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'n|names=s' => \$OptNames,
|
||||
'o|order-by=s' => \$OptOrderBy,
|
||||
'r|report=s' => \$OptReportType,
|
||||
's|sums!' => \$OptSums,
|
||||
'u|upper=i' => \$UppBound,
|
||||
'conffile=s' => \$OptConfFile,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
# parse parameters
|
||||
# $OptComments defaults to TRUE if --filetemplate is not used
|
||||
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
|
||||
# parse $OptReportType
|
||||
if ($OptReportType) {
|
||||
if ($OptReportType =~ /sums?/i) {
|
||||
$OptReportType = 'sum';
|
||||
} else {
|
||||
$OptReportType = 'default';
|
||||
}
|
||||
}
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($OptConfFile)};
|
||||
|
||||
### set DBTable
|
||||
$Conf{'DBTable'} = $Conf{'DBTableHosts'};
|
||||
$Conf{'DBTable'} = $OptDB if $OptDB;
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get time period and names, prepare SQL 'WHERE' clause
|
||||
# get time period
|
||||
# and set caption for output and expression for SQL 'WHERE' clause
|
||||
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
|
||||
# bail out if --month is invalid
|
||||
&Bleat(2,"--month option has an invalid format - ".
|
||||
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
|
||||
# get list of hosts and set expression for SQL 'WHERE' clause
|
||||
# with placeholders as well as a list of names to bind to them
|
||||
my ($SQLWhereNames,@SQLBindNames);
|
||||
if ($OptNames) {
|
||||
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'host');
|
||||
# bail out if --names is invalid
|
||||
&Bleat(2,"--names option has an invalid format!")
|
||||
if !$SQLWhereNames;
|
||||
}
|
||||
|
||||
### build SQL WHERE clause
|
||||
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",'host');
|
||||
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
|
||||
$ExcludeSums,
|
||||
&SQLSetBounds('default',$LowBound,$UppBound));
|
||||
|
||||
### get sort order and build SQL 'ORDER BY' clause
|
||||
# force to 'month' for $OptReportType 'sum'
|
||||
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
|
||||
# default to 'name' if $OptGroupBy is not set and
|
||||
# just one name is requested, but more than one month
|
||||
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
|
||||
and $OptNames and $OptNames !~ /[:*%]/);
|
||||
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
|
||||
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
|
||||
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'host');
|
||||
# $GroupBy will contain 'month' or 'host' (parsed result of $OptGroupBy)
|
||||
# set it to 'month' or 'key' for OutputData()
|
||||
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
|
||||
|
||||
### get report type and build SQL 'SELECT' query
|
||||
my $SQLSelect;
|
||||
my $SQLGroupClause = '';
|
||||
my $Precision = 0; # number of digits right of decimal point for output
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$SQLGroupClause = "GROUP BY host";
|
||||
# change $SQLOrderClause: replace everything before 'postings'
|
||||
$SQLOrderClause =~ s/BY.+postings/BY postings/;
|
||||
$SQLSelect = "'All months',host,SUM(postings)";
|
||||
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
|
||||
$SQLOrderClause =~ s/postings/SUM(postings)/;
|
||||
} else {
|
||||
$SQLSelect = "month,host,postings";
|
||||
};
|
||||
|
||||
### get length of longest name delivered by query
|
||||
### for formatting purposes
|
||||
my $Field = ($GroupBy eq 'month') ? 'host' : 'month';
|
||||
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
|
||||
$Field,'postings',$SQLWhereClause,
|
||||
'',@SQLBindNames);
|
||||
|
||||
### build and execute SQL query
|
||||
my ($DBQuery);
|
||||
# prepare query
|
||||
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
|
||||
$SQLSelect,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$SQLWhereClause,$SQLGroupClause,
|
||||
$SQLOrderClause));
|
||||
# execute query
|
||||
$DBQuery->execute(@SQLBindNames)
|
||||
or &Bleat(2,sprintf("Can't get host data for %s from %s.%s: %s\n",
|
||||
$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
|
||||
$DBI::errstr));
|
||||
|
||||
### output results
|
||||
# set default to 'pretty'
|
||||
$OptFormat = 'pretty' if !$OptFormat;
|
||||
# print captions if --caption is set
|
||||
my $LeadIn;
|
||||
if ($OptCaptions && $OptComments) {
|
||||
# print time period with report type
|
||||
my $CaptionReportType = '(number of postings for each month)';
|
||||
if ($OptReportType and $OptReportType ne 'default') {
|
||||
$CaptionReportType = '(number of all postings for that time period)';
|
||||
}
|
||||
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
|
||||
# print name list if --names is set
|
||||
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
|
||||
if $OptNames;
|
||||
# print boundaries, if set
|
||||
my $CaptionBoundary= '(counting only months fulfilling this condition)';
|
||||
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
|
||||
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
|
||||
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
|
||||
if ($LowBound or $UppBound);
|
||||
# print primary and secondary sort order
|
||||
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
|
||||
($GroupBy eq 'month') ? 'Months' : 'Names',
|
||||
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
|
||||
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
|
||||
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
|
||||
}
|
||||
|
||||
# output data
|
||||
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,'',$LeadIn,
|
||||
$OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
hoststats - create reports on host usage
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<hoststats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script creates reports on newsgroup usage (number of postings from
|
||||
each host) taken from result tables created by B<gatherstats.pl>.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
=head3 Time period and names
|
||||
|
||||
The time period to act on defaults to last month; you can assign another
|
||||
time period or a single month (or drop all time constraints) via the
|
||||
B<--month> option (see below).
|
||||
|
||||
B<hoststats> will process all hosts by default; you can limit
|
||||
processing to only some hosts by supplying a list of those names by
|
||||
using the B<--names> option (see below).
|
||||
|
||||
=head3 Report type
|
||||
|
||||
You can choose between different B<--report> types: postings per month
|
||||
or all postings summed up; for details, see below.
|
||||
|
||||
=head3 Upper and lower boundaries
|
||||
|
||||
Furthermore you can set an upper and/or lower boundary to exclude some
|
||||
results from output via the B<--lower> and B<--upper> options,
|
||||
respectively. By default, all hosts with more and/or less postings
|
||||
per month will be excluded from the result set (i.e. not shown and
|
||||
not considered for sum reports).
|
||||
|
||||
=head3 Sorting and formatting the output
|
||||
|
||||
By default, all results are grouped by month; you can group results by
|
||||
hosts instead via the B<--group-by> option. Within those groups, the
|
||||
list of hosts (or months) is sorted alphabetically (or chronologically,
|
||||
respectively) ascending. You can change that order (and sort by number
|
||||
of postings) with the B<--order-by> option. For details and exceptions,
|
||||
please see below.
|
||||
|
||||
The results will be formatted as a kind of table; you can change the
|
||||
output format to a simple list or just a list of names and number of
|
||||
postings with the B<--format> option. Captions will be added by means
|
||||
of the B<--caption> option; all comments (and captions) can be
|
||||
supressed by using B<--nocomments>.
|
||||
|
||||
Last but not least you can redirect all output to a number of files,
|
||||
e.g. one for each month, by submitting the B<--filetemplate> option,
|
||||
see below.
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<hoststats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in etc/ via Config::Auto or from a configuration
|
||||
file submitted by the B<--conffile> option.
|
||||
|
||||
See doc/INSTALL for an overview of possible configuration options.
|
||||
|
||||
You can override some configuration options via the B<--db> option.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon). By using the keyword I<all> instead, you can set no
|
||||
processing period to process the whole database. Defaults to last month.
|
||||
|
||||
=item B<-n>, B<--names> I<name(s)>
|
||||
|
||||
Limit processing to a certain set of hostnames. I<names(s)>
|
||||
can be a single name (eternal-september.org), a group of names
|
||||
(*.inka.de) or a list of either of these, separated by colons, for
|
||||
example
|
||||
|
||||
eternal-september.org:solani.org:*.inka.de
|
||||
|
||||
=item B<-s>, B<--sums|--nosums> (sum per month)
|
||||
|
||||
Include a "virtual" host named "ALL" for every month in output,
|
||||
containing the sum of all detected hosts for that month. False
|
||||
by default.
|
||||
|
||||
=item B<-r>, B<--report> I<default|sums>
|
||||
|
||||
Choose the report type: I<default> or I<sums>
|
||||
|
||||
By default, B<hoststats> will report the number of postings for each
|
||||
host in each month. But it can also report the total sum of postings
|
||||
per host for all months.
|
||||
|
||||
For report type I<sums>, the B<group-by> option has no meaning and
|
||||
will be silently ignored (see below).
|
||||
|
||||
=item B<-l>, B<--lower> I<lower boundary>
|
||||
|
||||
Set the lower boundary. See below.
|
||||
|
||||
=item B<-l>, B<--upper> I<upper boundary>
|
||||
|
||||
Set the upper boundary.
|
||||
|
||||
By default, all hosts with more postings per month than the upper
|
||||
boundary and/or less postings per month than the lower boundary
|
||||
will be excluded from further processing. For the default report that
|
||||
means each month only hosts with a number of postings between the
|
||||
boundaries will be displayed. For the sums report, hosts with a number
|
||||
of postings exceeding the boundaries in all (!) months will not be
|
||||
considered.
|
||||
|
||||
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
|
||||
|
||||
By default, all results are grouped by month, sorted chronologically
|
||||
in ascending order, like this:
|
||||
|
||||
# ----- 2012-01:
|
||||
arcor-online.net : 9379
|
||||
individual.net : 19525
|
||||
news.albasani.net: 9063
|
||||
# ----- 2012-02:
|
||||
arcor-online.net : 8606
|
||||
individual.net : 16768
|
||||
news.albasani.net: 7879
|
||||
|
||||
The results can be grouped by host instead via B<--group-by> I<name>:
|
||||
|
||||
----- individual.net
|
||||
2012-01: 19525
|
||||
2012-02: 16768
|
||||
----- arcor-online.net
|
||||
2012-01: 9379
|
||||
2012-02: 8606
|
||||
----- news.albasani.net
|
||||
2012-01: 9063
|
||||
2012-02: 7879
|
||||
|
||||
By appending I<-desc> to the group-by option parameter, you can
|
||||
reverse the sort order - e.g. B<--group-by> I<month-desc> will give:
|
||||
|
||||
# ----- 2012-02:
|
||||
arcor-online.net : 8606
|
||||
individual.net : 16768
|
||||
news.albasani.net: 7879
|
||||
# ----- 2012-01:
|
||||
arcor-online.net : 9379
|
||||
individual.net : 19525
|
||||
news.albasani.net: 9063
|
||||
|
||||
Sums reports (see above) will always be grouped by months; this option
|
||||
will therefore be ignored.
|
||||
|
||||
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
|
||||
|
||||
Within each group (a single month or single host, see above), the
|
||||
report will be sorted by host (or month) in ascending alphabetical
|
||||
order by default. You can change the sort order to descending or sort
|
||||
by number of postings instead.
|
||||
|
||||
=item B<-f>, B<--format> I<pretty|list|dump>
|
||||
|
||||
Select the output format, I<pretty> being the default:
|
||||
|
||||
# ----- 2012-01:
|
||||
arcor-online.net : 9379
|
||||
individual.net : 19525
|
||||
# ----- 2012-02:
|
||||
arcor-online.net : 8606
|
||||
individual.net : 16768
|
||||
|
||||
I<list> format looks like this:
|
||||
|
||||
2012-01 arcor-online.net 9379
|
||||
2012-01 individual.net 19525
|
||||
2012-02 arcor-online.net 8606
|
||||
2012-02 individual.net 16768
|
||||
|
||||
And I<dump> format looks like this:
|
||||
|
||||
# 2012-01:
|
||||
arcor-online.net 9379
|
||||
individual.net 19525
|
||||
# 2012-02:
|
||||
arcor-online.net 8606
|
||||
individual.net 16768
|
||||
|
||||
You can remove the comments by using B<--nocomments>, see below.
|
||||
|
||||
=item B<-c>, B<--captions|--nocaptions>
|
||||
|
||||
Add captions to output, like this:
|
||||
|
||||
----- Report for 2012-01 to 2012-02 (number of postings for each month)
|
||||
----- Names: individual.net
|
||||
----- Threshold: 8000 => x (counting only month fulfilling this condition)
|
||||
----- Grouped by Month (ascending), sorted by number of postings descending
|
||||
|
||||
False by default.
|
||||
|
||||
=item B<--comments|--nocomments>
|
||||
|
||||
Add comments (group headers) to I<dump> and I<pretty> output. True by
|
||||
default as long as B<--filetemplate> is not set.
|
||||
|
||||
Use I<--nocomments> to suppress anything except host names or months
|
||||
and numbers of postings.
|
||||
|
||||
=item B<--filetemplate> I<filename template>
|
||||
|
||||
Save output to file(s) instead of dumping it to STDOUT. B<hoststats>
|
||||
will create one file for each month (or each host, according to the
|
||||
setting of B<--group-by>, see above), with filenames composed by adding
|
||||
year and month (or hostnames) to the I<filename template>, for example
|
||||
with B<--filetemplate> I<stats>:
|
||||
|
||||
stats-2012-01
|
||||
stats-2012-02
|
||||
... and so on
|
||||
|
||||
=item B<--db> I<database table>
|
||||
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--conffile> I<filename>
|
||||
|
||||
Read configuration from I<filename> instead of F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Show number of postings per host for lasth month in I<pretty> format:
|
||||
|
||||
hoststats
|
||||
|
||||
Show that report for January of 2010 and *.inka plus individual.net:
|
||||
|
||||
hoststats --month 2010-01 --names *.inka:individual.net:
|
||||
|
||||
Only show hosts with 30 postings or less last month, ordered
|
||||
by number of postings, descending, in I<pretty> format:
|
||||
|
||||
hoststats --upper 30 --order-by postings-desc
|
||||
|
||||
List number of postings per host for each month of 2010 and redirect
|
||||
output to one file for each month, named hosts-2010-01 and so on, in
|
||||
machine-readable form (without formatting):
|
||||
|
||||
hoststats -m 2010-01:2010-12 -f dump --filetemplate hosts
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/hoststats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
gatherstats -h
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
449
bin/postingstats.pl
Executable file
449
bin/postingstats.pl
Executable file
|
@ -0,0 +1,449 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
# postingstats.pl
|
||||
#
|
||||
# This script will create statistic postings from NewsStats output.
|
||||
# It defaults to statistics for de.* posted to de.admin.lists, but
|
||||
# defaults can be changed at ----- configuration -----.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2012, 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
#
|
||||
# Usage:
|
||||
# $~ groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
|
||||
# $~ hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts
|
||||
# $~ clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
use File::Basename;
|
||||
# we're in .../bin, so our module is in ../lib
|
||||
push(@INC, dirname($0).'/../lib');
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT LastMonth);
|
||||
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
use constant TABLEWIDTH => 28; # width of table without newsgroup name
|
||||
|
||||
##### ----- pre-config -----------------------------------------------
|
||||
### read commandline options
|
||||
my ($Month, $Type);
|
||||
GetOptions ('m|month=s' => \$Month,
|
||||
't|type=s' => \$Type,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
$Month = &LastMonth if !$Month;
|
||||
if ($Month !~ /^\d{4}-\d{2}$/) {
|
||||
$Month = &LastMonth;
|
||||
&Bleat(1,"--month option has an invalid format - set to $Month.");
|
||||
};
|
||||
# parse $Type
|
||||
if (!$Type) {
|
||||
# default
|
||||
$Type = 'GroupStats';
|
||||
} elsif ($Type =~ /(news)?groups?/i) {
|
||||
$Type = 'GroupStats';
|
||||
} elsif ($Type =~ /(host|server)s?/i) {
|
||||
$Type = 'HostStats';
|
||||
} elsif ($Type =~ /(client|reader)s?/i) {
|
||||
$Type = 'ClientStats';
|
||||
};
|
||||
my $Timestamp = time;
|
||||
|
||||
##### ----- configuration --------------------------------------------
|
||||
my $TLH = 'de';
|
||||
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
|
||||
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month,
|
||||
'ClientStats' => 'Newsreaderstatistik fuer de.* im Monat '.$Month
|
||||
);
|
||||
my %TH = ('counter' => 'Nr.',
|
||||
'value' => 'Anzahl',
|
||||
'percentage' => 'Prozent'
|
||||
);
|
||||
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN, 'ClientStats' => <<CLIENTSIN);
|
||||
From: Thomas Hochstein <thh\@thh.name>
|
||||
Newsgroups: local.test
|
||||
Subject: Postingstatistik fuer de.* im Monat $Month
|
||||
Message-ID: <destat-postings-$Month.$Timestamp\@mid.news.szaf.org>
|
||||
Approved: thh\@thh.name
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
Content-Transfer-Encoding: 7bit
|
||||
User-Agent: postingstats.pl/$VERSION (NewsStats)
|
||||
|
||||
GROUPSIN
|
||||
From: Thomas Hochstein <thh\@thh.name>
|
||||
Newsgroups: local.test
|
||||
Subject: Serverstatistik fuer de.* im Monat $Month
|
||||
Message-ID: <destat-hosts-$Month.$Timestamp\@mid.news.szaf.org>
|
||||
Approved: thh\@thh.name
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
Content-Transfer-Encoding: 7bit
|
||||
User-Agent: postingstats.pl/$VERSION (NewsStats)
|
||||
|
||||
HOSTSIN
|
||||
From: Thomas Hochstein <thh\@thh.name>
|
||||
Newsgroups: local.test
|
||||
Subject: Newsreaderstatistik fuer de.* im Monat $Month
|
||||
Message-ID: <destat-clients-$Month.$Timestamp\@mid.news.szaf.org>
|
||||
Approved: thh\@thh.name
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
User-Agent: postingstats.pl/$VERSION (NewsStats)
|
||||
|
||||
CLIENTSIN
|
||||
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT, 'ClientStats' => <<CLIENTSOUT);
|
||||
|
||||
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
|
||||
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
|
||||
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
|
||||
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
|
||||
Loeschnachricht) erreicht haben. Crosspostings werden in jeder Gruppe,
|
||||
in die sie gerichtet sind, gezaehlt, aber bei Ermittlung der Summe be-
|
||||
reinigt; daher ist die Postinganzahl fuer de.* gesamt niedriger als die
|
||||
Summe der Postinganzahlen der Einzelgruppen.
|
||||
|
||||
Die Daten stehen graphisch aufbereitet unter <http://usenet.dex.de/> zur
|
||||
Verfuegung.
|
||||
GROUPSOUT
|
||||
|
||||
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
|
||||
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
|
||||
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
|
||||
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
|
||||
Loeschnachricht) erreicht haben.
|
||||
HOSTSOUT
|
||||
|
||||
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
|
||||
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
|
||||
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
|
||||
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
|
||||
Loeschnachricht) erreicht haben. Versionsangaben werden nur gezaehlt,
|
||||
wenn Sie ermittelbar sind; daher kann die Summe der Newsreader-Versionen
|
||||
kleiner sein als die Postingzahl fuer den Newsreader. Ausserdem koennen
|
||||
an einem Beitrag mehrere Clients beteiligt sein, bspw. der Newsreader
|
||||
und ein lokaler Server wie der Hamster. Daher kann die Summe aller
|
||||
Newsreader groesser sein als die Summe der Postings; auch ergeben die
|
||||
Prozentzahlen dementsprechend in der Summe mehr als 100%.
|
||||
CLIENTSOUT
|
||||
|
||||
##### ----- subroutines ----------------------------------------------
|
||||
|
||||
sub Percentage {
|
||||
# calculate percentage rate from base value and percentage
|
||||
my ($Base,$Percentage) = @_;
|
||||
return ($Percentage * 100 / $Base);
|
||||
}
|
||||
|
||||
sub Divider {
|
||||
# build a divider line of $Symbol as wide as the table is
|
||||
my ($Symbol,$MaxLength) = @_;
|
||||
return ':' . $Symbol x ($MaxLength+TABLEWIDTH) . ":\n";
|
||||
}
|
||||
|
||||
sub SingleVersion {
|
||||
my ($LastName,$RSubValue,$RValue,$RMaxLength) = @_;
|
||||
|
||||
# get version to add to client name
|
||||
my ($Version) = keys %{$$RSubValue{$LastName}};
|
||||
$Version =~ s/^- //;
|
||||
# add version to client name by creating a new name
|
||||
# and deleting the old one
|
||||
my ($NameVersion) = $LastName . ' ' . $Version;
|
||||
$$RValue{$NameVersion} = $$RValue{$LastName};
|
||||
delete($$RValue{$LastName});
|
||||
$$RMaxLength = length($NameVersion) if length($NameVersion) > $$RMaxLength;
|
||||
|
||||
# delete single version
|
||||
delete($$RSubValue{$LastName});
|
||||
}
|
||||
|
||||
##### ----- main loop ------------------------------------------------
|
||||
|
||||
my (%Value, %SubValue, $SubCounter, $LastName, $SumName, $SumTotal,
|
||||
$MaxLength);
|
||||
|
||||
if ($Type eq 'GroupStats') {
|
||||
$SumName = "$TLH.ALL";
|
||||
$TH{'name'} = 'Newsgroup'
|
||||
} elsif ($Type eq 'HostStats') {
|
||||
$SumName = 'ALL';
|
||||
$TH{'name'} = 'Postingserver'
|
||||
} elsif ($Type eq 'ClientStats') {
|
||||
$SumName = 'ALL';
|
||||
$TH{'name'} = 'Newsreader / Client'
|
||||
}
|
||||
|
||||
### read from STDIN
|
||||
$MaxLength = 0;
|
||||
while(<>) {
|
||||
my ($Name, $Value) = $_ =~ /(.+) (\d+)$/;
|
||||
$SumTotal = $Value if $Name eq $SumName;
|
||||
next if $Name =~ /ALL$/;
|
||||
|
||||
# handle client versions
|
||||
if ($Type eq 'ClientStats' and $Name =~ /^- /) {
|
||||
$SubValue{$LastName}{$Name} = $Value;
|
||||
$SubCounter++;
|
||||
} else {
|
||||
# clients with just one version
|
||||
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
|
||||
if ($LastName && $SubCounter == 1);
|
||||
|
||||
# reset version counter and client name
|
||||
$SubCounter = 0;
|
||||
$LastName = $Name;
|
||||
|
||||
$Value{$Name} = $Value;
|
||||
$MaxLength = length($Name) if length($Name) > $MaxLength;
|
||||
}
|
||||
}
|
||||
# clients with just one version (last iteration)
|
||||
&SingleVersion($LastName,\%SubValue,\%Value,\$MaxLength)
|
||||
if ($LastName && $SubCounter == 1);
|
||||
|
||||
### print to STDOUT
|
||||
# calculate padding for $Heading
|
||||
my $PaddingLeft = ' ' x int((($MaxLength+TABLEWIDTH-2-length($Heading{$Type}))/2));
|
||||
my $PaddingRight = $PaddingLeft;
|
||||
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) +2 < $MaxLength+TABLEWIDTH);
|
||||
|
||||
print $LeadIn{$Type};
|
||||
|
||||
# print table header
|
||||
print &Divider('=',$MaxLength);
|
||||
printf(": %s%s%s :\n",$PaddingLeft,$Heading{$Type},$PaddingRight);
|
||||
print &Divider('=',$MaxLength);
|
||||
printf(": %-3s : %-6s : %-7s : %-*s :\n",
|
||||
substr($TH{'counter'},0,3),
|
||||
substr($TH{'value'},0,6),
|
||||
substr($TH{'percentage'},0,7),
|
||||
$MaxLength,$TH{'name'});
|
||||
print &Divider('-',$MaxLength);
|
||||
|
||||
# print table
|
||||
my $Counter = 0;
|
||||
foreach my $Name (sort { $Value{$b} <=> $Value{$a} } keys %Value) {
|
||||
$Counter++;
|
||||
printf(": %3u. : %6u : %6.2f%% : %-*s :\n",
|
||||
$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),
|
||||
$MaxLength,$Name);
|
||||
# handle client versions
|
||||
if ($SubValue{$Name}) {
|
||||
foreach my $SubName (sort { $SubValue{$Name}{$b} <=> $SubValue{$Name}{$a} }
|
||||
keys %{$SubValue{$Name}}) {
|
||||
printf(": : %6u : %6.2f%% : %-*s :\n",
|
||||
$SubValue{$Name}{$SubName},
|
||||
&Percentage($SumTotal,$SubValue{$Name}{$SubName}),
|
||||
$MaxLength,$SubName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# print table footer
|
||||
print &Divider('-',$MaxLength);
|
||||
printf(": : %6u : %s : %-*s :\n",$SumTotal,'100.00%',$MaxLength,'');
|
||||
print &Divider('=',$MaxLength);
|
||||
|
||||
print $LeadOut{$Type};
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
postingstats - format and post reports
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<postingstats> [B<-Vh>] [B<-t> I<groups|hosts|clients>] [B<-m> I<YYYY-MM>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will re-format reports on newsgroup usage created by
|
||||
B<groupstats.pl>, B<hoststats.pl> or B<clientstats.pl> and create a
|
||||
message that can be posted to Usenet.
|
||||
|
||||
=head2 Features and options
|
||||
|
||||
B<postingstats> will create a table with entries numbered from most
|
||||
to least and percentages calculated from the sum total of all values.
|
||||
|
||||
It depends on a sorted list on STDIN in I<dump> format with I<sums>;
|
||||
I<versions> from B<clientstas.pl> are optional.
|
||||
|
||||
B<postingstats> needs a B<--type> and a B<--month> to create a caption
|
||||
and select matching lead-ins and lead-outs. B<--type> is also needed
|
||||
to catch the correct sum total from input which differs between I<groups>
|
||||
on one hand and I<hosts> or I<clients> on the other hand.
|
||||
|
||||
It will default to posting statistics (number of postings per group)
|
||||
and last month.
|
||||
|
||||
Output from B<postingstats> can be piped to any C<inews> implementation,
|
||||
e.g. C<tinews.pl> from L<ftp://ftp.tin.org/pub/news/clients/tin/tools/tinews.pl>
|
||||
(present in C</contrib/>).
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
Configuration is done by changing the code in the
|
||||
C<----- configuration -----> section.
|
||||
|
||||
=over 3
|
||||
|
||||
=item C<$TLH>
|
||||
|
||||
Top level hierarchy the report was created for. Used for display and
|
||||
sum total (only for I<groups>).
|
||||
|
||||
=item C<%Heading>
|
||||
|
||||
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
|
||||
Used to display a heading.
|
||||
|
||||
=item C<%TH>
|
||||
|
||||
Hash with keys for I<counter>, I<value> and I<percentage>. Used to
|
||||
create the table header for I<number>, I<quantity> and I<percentage>.
|
||||
|
||||
I<counter> must not be longer than 3 characters, I<value> no longer
|
||||
than 6 characters and I<percentage> no longer than 7 characters.
|
||||
Output will be truncated otherwise.
|
||||
|
||||
=item C<%LeadIn>
|
||||
|
||||
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
|
||||
Used to create the headers for the postings. Can contain other text
|
||||
that will be shown before C<%Heading>.
|
||||
|
||||
=item C<%LeadOut>
|
||||
|
||||
Hash with keys for I<GroupStats>, I<HostStats> and I<ClientStats>.
|
||||
Will be shown at the end of the posting.
|
||||
|
||||
=back
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Display version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Display this man page and exit.
|
||||
|
||||
=item B<-t>, B<--type> I<groups|hosts|clients>
|
||||
|
||||
Set report type to posting statistics, hosts statistics or client
|
||||
statistics accordingly.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM>
|
||||
|
||||
Set month (for display only).
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 USAGE
|
||||
|
||||
Create a posting from a posting statistics report for last month:
|
||||
|
||||
groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
|
||||
|
||||
Create a posting from a posting statistics report for 2012-01:
|
||||
|
||||
groupstats.pl --nocomments --sums --format dump -m 2012-01 | postingstats.pl -t groups -m 2012-01
|
||||
|
||||
Create a posting from a host statistics report for last month:
|
||||
|
||||
hoststats.pl --nocomments --sums --format dump | postingstats.pl -t hosts
|
||||
|
||||
Create a posting from a client statistics report for last month:
|
||||
|
||||
clientstats.pl --nocomments --sums --versions --format dump | postingstats.pl -t clients
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<bin/postingstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<lib/NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<etc/newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=item -
|
||||
|
||||
groupstats -h
|
||||
|
||||
=item -
|
||||
|
||||
hoststats -h
|
||||
|
||||
=item -
|
||||
|
||||
clientstats -h
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012, 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
13
contrib/dopostingstats.sh
Executable file
13
contrib/dopostingstats.sh
Executable file
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
# installation path is /srv/newsstats/, please adjust accordingly
|
||||
|
||||
# get month
|
||||
MONTH=$1
|
||||
if ! [[ $1 =~ [0-9]{4}-[0-9]{2} ]]; then
|
||||
MONTH=$(date -d "$(date +%Y-%m-15) -1 month" '+%Y-%m')
|
||||
fi
|
||||
|
||||
# post stats
|
||||
/srv/newsstats/bin/groupstats.pl --nocomments --sums --format dump --month $MONTH | /srv/newsstats/bin/postingstats.pl --month $MONTH | /srv/newsstats/contrib/tinews.pl -X -Y
|
||||
/srv/newsstats/bin/hoststats.pl --nocomments --sums --format dump --month $MONTH | /srv/newsstats/bin/postingstats.pl -t server --month $MONTH | /srv/newsstats/contrib/tinews.pl -X -Y
|
||||
/srv/newsstats/bin/clientstats.pl --nocomments --sums --versions --format dump --month $MONTH | /srv/newsstats/bin/postingstats.pl -t client --month $MONTH | /srv/newsstats/contrib/tinews.pl -X -Y
|
1506
contrib/tinews.pl
Executable file
1506
contrib/tinews.pl
Executable file
File diff suppressed because it is too large
Load diff
11
contrib/yearstats.sh
Executable file
11
contrib/yearstats.sh
Executable file
|
@ -0,0 +1,11 @@
|
|||
#!/bin/bash
|
||||
# installation path is /srv/newsstats/, please adjust accordingly
|
||||
# $1: newsgroup
|
||||
echo "Stats for $1"
|
||||
cd /srv/newsstats/
|
||||
for year in {2012..2022}
|
||||
do
|
||||
echo -n "${year}: "
|
||||
bin/groupstats.pl -m $year-01:$year-12 -r sums -n $1
|
||||
done
|
||||
|
|
@ -1,4 +1,62 @@
|
|||
NewsStats 0.01 (2013-09-02)
|
||||
NewsStats 0.4.0 (2025-06-02)
|
||||
* Reformat $Conf{TLH} for GroupStats only.
|
||||
* Extract TLH check from HostStats to subroutine, fix no-op check.
|
||||
* Extract getting raw headers from HostStats to subroutine.
|
||||
* Improve documentation for config file.
|
||||
* ParseHeader: re-merge continuation lines.
|
||||
* Add ClientStats to gatherstats.
|
||||
* Move cliservstats to hoststats.
|
||||
* Add clientstats (for clients).
|
||||
* Add ClientStats to postingstats.
|
||||
* gatherstats: Don't die on parsing errors.
|
||||
* DBClnts: set version length to 50.
|
||||
* gatherstats: Truncate overlong clients or versions.
|
||||
* gatherstats: Remove whitespace from client and version.
|
||||
* Fix version queries.
|
||||
* Add ClientStats to dopostingstats.
|
||||
* Let dopostingstats default to last month.
|
||||
* Set executable bit for new scripts.
|
||||
* Update documentation.
|
||||
|
||||
NewsStats 0.3.0 (2025-05-18)
|
||||
* Extract GroupStats (in gatherstats) to subroutine.
|
||||
* Add ParseHeader() to library.
|
||||
* Add HostStats to gatherstats.
|
||||
* Add DBTableHosts structure to install script.
|
||||
* Add cliservstats (for hosts and clients).
|
||||
- Refactor SQL generators.
|
||||
* Add --mid option to gatherstats for debugging purposes.
|
||||
* Don't parse NNTP-Posting-Host to determine the server name.
|
||||
* Add more known hosts.
|
||||
* Implement hierarchy check on gatherstats.
|
||||
* Add sums per month to HostStats.
|
||||
* Add postingstats and refactor it:
|
||||
- Make all text configurable (i18n).
|
||||
- Generalize to make it usable for HostStats.
|
||||
- Fallback to last month if no month is given.
|
||||
- Add option handling, import VERSION, add POD.
|
||||
* Update README, INSTALL and ChangeLog.
|
||||
* Don't enforce --nocomment for --filetemplate, just default to it.
|
||||
Change caption handling, update documentation accordingly.
|
||||
* Fix call to GetMaxLength() in cliservstats.
|
||||
* Fix typos in documentation, update sample config file.
|
||||
* Move database creation from install/install.pl to bin/dbcreate.pl
|
||||
* Add tinews.pl and some shell scripts to /contrib.
|
||||
|
||||
NewsStats 0.2.0 (2025-05-10)
|
||||
* Redo directory structure:
|
||||
- Move all scripts to /bin
|
||||
- Move configuration to /etc
|
||||
- Move NewsStats.pm to /lib and add /lib to @INC
|
||||
* Make configuration file configurable.
|
||||
* Warn if '--sums' is combined with '--checkgroups'
|
||||
* Set default sorting order to 'newsgroup' for single newsgroup.
|
||||
* Update documentation.
|
||||
* Accept an upper/lower boundary of 0 (zero).
|
||||
* Update copyright dates, version numbers, author address and
|
||||
repository location.
|
||||
* Unify version across all scripts and modules, change to
|
||||
semantic versioning.
|
||||
|
||||
NewsStats 0.01 (2013-09-02)
|
||||
* Initial public release.
|
||||
Thomas Hochstein <thh@inter.net>
|
77
doc/INSTALL
77
doc/INSTALL
|
@ -1,11 +1,12 @@
|
|||
NewsStats 0.1 (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
NewsStats is a software package used to gather statistical information
|
||||
from a live Usenet feed and for its subsequent examination.
|
||||
NewsStats is a software package that can be used to collect
|
||||
statistical information from a live Usenet feed and then analyze it
|
||||
to create statistical reports.
|
||||
|
||||
This script package is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Public License as published by
|
||||
the Free Software Foundation.
|
||||
This package is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Public License as published by the Free
|
||||
Software Foundation.
|
||||
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
@ -14,11 +15,12 @@ INSTALLATION INSTRUCTIONS
|
|||
1) Install the scripts
|
||||
|
||||
* Download the current version of NewsStats from
|
||||
<http://th-h.de/download/scripts.php>.
|
||||
<https://th-h.de/net/software/newsstats/>.
|
||||
|
||||
* Untar it into a directory of your choice:
|
||||
* Untar it into a directory of your choice, i.e. /srv/newsstats:
|
||||
|
||||
# tar -xzf newsstats-nn.tar.gz
|
||||
$ cd /srv
|
||||
$ tar -xzf newsstats-n.n.n.tar.gz
|
||||
|
||||
Scripts in this path - at least feedlog.pl - should be executable by the
|
||||
news user.
|
||||
|
@ -28,21 +30,21 @@ INSTALLATION INSTRUCTIONS
|
|||
* Copy the sample configuration file newsstats.conf.sample to
|
||||
newsstats.conf and modify it for your purposes:
|
||||
|
||||
# cp newsstats.conf.sample newsstats.conf
|
||||
# vim newsstats.conf
|
||||
$ cp etc/newsstats.conf.sample etc/newsstats.conf
|
||||
$ vim etc/newsstats.conf
|
||||
|
||||
a) Mandatory configuration options
|
||||
|
||||
* DBDriver = mysql
|
||||
Database driver used; currently only mysql is supported.
|
||||
|
||||
|
||||
* DBHost = localhost
|
||||
The host your mysql server is running on.
|
||||
|
||||
* DBUser =
|
||||
The username to connect to the database server.
|
||||
|
||||
* DBPw =
|
||||
* DBPw =
|
||||
Matching password for your username.
|
||||
|
||||
* DBDatabase = newsstats
|
||||
|
@ -57,26 +59,35 @@ INSTALLATION INSTRUCTIONS
|
|||
* DBTableGrps = groups_de
|
||||
Table holding data on postings per group.
|
||||
|
||||
* DBTableHosts = hosts_de
|
||||
Table holding data on postings per server.
|
||||
|
||||
* DBTableClnts = clients_de
|
||||
Table holding data on postings per client.
|
||||
|
||||
b) Optional configuration options
|
||||
|
||||
* TLH = de
|
||||
Limit examination to that top-level hierarchy.
|
||||
|
||||
* TLH = de.alt,news.admin
|
||||
Limit examination to that top-level hierarchy/hierarchies.
|
||||
Comma-separated list.
|
||||
|
||||
3) Database (mysql) setup
|
||||
|
||||
* Setup your database server with a username, password and
|
||||
database matching the NewsStats configuration (see 2 a).
|
||||
* Setup your database server with an username, a password and
|
||||
(optionally) a database matching the NewsStats configuration
|
||||
(see 2 a).
|
||||
|
||||
* Start the installation script:
|
||||
|
||||
# install/install.pl
|
||||
* Start the database creation script:
|
||||
|
||||
It will setup the necessary database tables and display some
|
||||
information on the next steps.
|
||||
$ bin/dbcreate.pl
|
||||
|
||||
It will create the database (if not already present), create the
|
||||
necessary database tables and display some information on the
|
||||
next steps.
|
||||
|
||||
4) Feed (INN) setup
|
||||
|
||||
You have to setup an INN feed to feedlog.pl.
|
||||
You have to set up an INN feed to feedlog.pl.
|
||||
|
||||
* Edit your 'newsfeeds' file and insert something like
|
||||
|
||||
|
@ -86,39 +97,39 @@ INSTALLATION INSTRUCTIONS
|
|||
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
|
||||
|
||||
* You should only feed that hierarchy (those hierarchies ...) to
|
||||
feedlog.pl you'll want to cover with your statistical
|
||||
examination. It may be a good idea to setup different feeds (to
|
||||
different databases ...) for different hierarchies.
|
||||
feedlog.pl that you want to cover with your statistical analysis.
|
||||
It may be a good idea to setup different feeds (to different
|
||||
databases ...) for different hierarchies.
|
||||
|
||||
* Please double check that your path to feedlog.pl is correct and
|
||||
feedlog.pl can be executed by the news user
|
||||
|
||||
* Check your 'newsfeeds' syntax:
|
||||
|
||||
# ctlinnd checkfile
|
||||
$ ctlinnd checkfile
|
||||
|
||||
* Reload 'newsfeeds':
|
||||
|
||||
# ctlinnd reload newsfeeds 'Adding newsstats! feed'
|
||||
$ ctlinnd reload newsfeeds 'Adding newsstats! feed'
|
||||
|
||||
* Watch your 'news.notice' and 'errlog' files:
|
||||
|
||||
# tail -f /var/log/news/news.notice
|
||||
$ tail -f /var/log/news/news.notice
|
||||
...
|
||||
# tail -f /var/log/news/errlog
|
||||
$ tail -f /var/log/news/errlog
|
||||
|
||||
Everything should be going smoothly now.
|
||||
|
||||
* If INN is spewing error messages to 'errlog' or reporting
|
||||
continous respawns of feedlog.pl to 'news.notice', stop your feed:
|
||||
|
||||
# ctlinnd drop 'newsstats!'
|
||||
$ ctlinnd drop 'newsstats!'
|
||||
|
||||
and investigate. 'errlog' may be helpful here.
|
||||
|
||||
* You can restart the feed with
|
||||
|
||||
# ctlinnd begin 'newsstats!'
|
||||
$ ctlinnd begin 'newsstats!'
|
||||
|
||||
later.
|
||||
|
||||
|
|
99
doc/README
99
doc/README
|
@ -1,21 +1,21 @@
|
|||
NewsStats 0.1 (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
|
||||
NewsStats is a software package for gathering statistical data live
|
||||
from a Usenet feed and subsequent examination.
|
||||
from a Usenet feed and subsequent analysis.
|
||||
|
||||
This script package is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Public License as published by
|
||||
the Free Software Foundation.
|
||||
This package is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Public License as published by the Free
|
||||
Software Foundation.
|
||||
|
||||
---------------------------------------------------------------------
|
||||
|
||||
What's that?
|
||||
|
||||
There's a multitude of tools for the statistical examination of
|
||||
newsgroups: number of postings per month or per person, longest
|
||||
threads, and so on (see <http://th-h.de/infos/usenet/stats.php>
|
||||
[German language] for an incomplete list). Most of them use a per-
|
||||
newsgroup approach while NewsStats is hierarchy oriented.
|
||||
There's a multitude of tools to create statistics about newsgroup
|
||||
usage: number of postings per month or per person, longest threads,
|
||||
and so on (see <https://th-h.de/net/usenet/stats/> [German language]
|
||||
for an incomplete list). Most of them use a per-newsgroup approach
|
||||
while NewsStats is hierarchy oriented.
|
||||
|
||||
NewsStats will accumulate data from a live INN feed, allowing you
|
||||
to process the saved information later on.
|
||||
|
@ -40,50 +40,94 @@ Prerequisites
|
|||
|
||||
* Perl 5.8.x with standard modules
|
||||
- Cwd
|
||||
- Encode
|
||||
- File::Basename
|
||||
- Getopt::Long
|
||||
- Sys::Syslog
|
||||
|
||||
* Perl modules form CPAN
|
||||
* Perl modules from CPAN
|
||||
- Config::Auto
|
||||
- Date::Format
|
||||
- DBI
|
||||
|
||||
* mysql 5.0.x
|
||||
|
||||
* working installation of INN
|
||||
* a working installation of INN
|
||||
|
||||
Installation instructions
|
||||
|
||||
See INSTALL.
|
||||
See INSTALL.
|
||||
|
||||
Documentation is in /doc, configuration in /etc, the NewsStats
|
||||
module in /lib and most scripts in /bin, while /contrib has some
|
||||
sample scripts that may have to be adjusted to work in your
|
||||
configuration.
|
||||
|
||||
Getting Started
|
||||
|
||||
'feedlog.pl' will continuously feed raw data to your raw data
|
||||
table. See the feedlog.pl man page for more information.
|
||||
|
||||
You can process that data via 'gatherstats.pl'; currently only the
|
||||
tabulation of postings per group and month is supported. More to
|
||||
come. See the gatherstats.pl man page for more information.
|
||||
You can process that data via 'gatherstats.pl'; currently the
|
||||
tabulation of postings per group, injection server and posting
|
||||
agent (newsreader) per month is supported. See the gatherstats.pl
|
||||
man page for more information.
|
||||
|
||||
Example:
|
||||
|
||||
bin/gatherstats.pl
|
||||
|
||||
will parse raw data from the last month and save the results in
|
||||
tables for postings per group, server and client, respectively.
|
||||
|
||||
Report generation is handled by specialised scripts for each
|
||||
report type. Currently only reports on the number of postings per
|
||||
group and month are supported; you can use 'groupstats.pl' for
|
||||
this. See the groupstats.pl man page for more information.
|
||||
report type: 'groupstats.pl' for postings per group
|
||||
(s), 'hoststats.pl' for postings per injection server
|
||||
(s) and 'clientstats.pl' for postings per posting agent. See the
|
||||
groupstats.pl, hoststats.pl and clientstats.pl man pages for more
|
||||
information.
|
||||
|
||||
Example:
|
||||
|
||||
bin/groupstats.pl -o postings-desc
|
||||
bin/hoststats.pl -o postings-desc
|
||||
bin/clientstats.pl -o postings-desc -v
|
||||
|
||||
will show reports for postings per group, per injection server and
|
||||
per client (with detailed client versions) for the last month,
|
||||
using the result tables filled by gatherstats.
|
||||
|
||||
To post those reports to Usenet, change postingstats.pl according
|
||||
to your needs (sender, newsgroups and other headers, translation
|
||||
of table headers and text templates) and display a test posting
|
||||
by piping report data into postingstats.pl:
|
||||
|
||||
bin/groupstats.pl --nocomments -s -f dump | bin/postingstats.pl
|
||||
|
||||
If the result is to your liking, add a pipe to a inews
|
||||
implementation.
|
||||
|
||||
Example:
|
||||
|
||||
bin/groupstats.pl --nocomments -s -f dump | bin/postingstats.pl | contrib/tinews.pl -X
|
||||
|
||||
More information
|
||||
|
||||
See the man pages for 'gatherstats' and the report generating
|
||||
scripts.
|
||||
|
||||
Reporting Bugs
|
||||
|
||||
You can report bugs or feature requests to the author using the
|
||||
bug tracker at <http://bugs.th-h.de/>.
|
||||
issue tracker at <https://code.virtcomm.de/thh/newsstats/issues>.
|
||||
|
||||
Please have a look at the TODO list before suggesting
|
||||
improvements.
|
||||
|
||||
More Information
|
||||
Development
|
||||
|
||||
This program is maintained using the Git version control system.
|
||||
You may clone <git://code.th-h.de/usenet/newsstats.git> to check
|
||||
out the current development tree or browse it on the web via
|
||||
<http://code.th-h.de/?p=usenet/newsstats.git>.
|
||||
This package is maintained using the Git version control system at
|
||||
<https://code.virtcomm.de/thh/newsstats/>.
|
||||
|
||||
Related projects
|
||||
|
||||
|
@ -93,6 +137,5 @@ Related projects
|
|||
|
||||
Author
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
<http://th-h.de/>
|
||||
|
||||
Thomas Hochstein <thh@thh.name>
|
||||
<https://th-h.de/>
|
||||
|
|
41
doc/TODO
41
doc/TODO
|
@ -1,24 +1,19 @@
|
|||
NewsStats To-Do List
|
||||
====================
|
||||
|
||||
This is a list of planned bug fixes, improvements and enhancements for
|
||||
This is a list of possible bug fixes, improvements and enhancements for
|
||||
NewsStats.
|
||||
|
||||
Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
|
||||
|
||||
* General
|
||||
- Improve Documentation
|
||||
The documentation is rather sparse and could use some improvement.
|
||||
- Add a test suite
|
||||
There is currently no kind of test suite or regression tests. Something like
|
||||
that is badly needed.
|
||||
- separate directories (bin, config, ...)
|
||||
- make NewsStats installable in accordance with LFS and customary procedures,
|
||||
i.e. scripts to /usr/local/news/bin or /usr/lib/news/bin, configuration
|
||||
to /usr/local/news/etc or /etc/news and so on
|
||||
|
||||
* Additional features
|
||||
- Add hierarchy information (GroupInfo - Bugs #19 #20 #21 #22 #23 #24 #25 #26)
|
||||
- Add hierarchy information (GroupInfo)
|
||||
NewsStats should be able to recognize invalid (i.e. officially not existing)
|
||||
newsgroups and - optionally - drop them from the list of groups. On the
|
||||
other hand, it should recognize existing, but empty groups and add them with
|
||||
|
@ -30,13 +25,23 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
|
|||
for late creation and deletion), optionally including the previously
|
||||
mentioned information; and you should be able to get the history of any
|
||||
group.
|
||||
- Add other reports
|
||||
NewsStats should include some other kinds of reports (stats on used clients,
|
||||
on postings hosts/servers, ...)
|
||||
- Add tools for database management
|
||||
NewsStats should offer tools e.g. to inject postings into the 'raw' database,
|
||||
or to split databases.
|
||||
|
||||
* GroupInfo project
|
||||
- Create a hierarchy information database, containing information on each
|
||||
newsgroup, its creation and removal time, its tagline, charter and
|
||||
moderation status, including the moderator contact address.
|
||||
- Automatically update hierarchy information, e.g. by parsing control messages
|
||||
(with verification!).
|
||||
- Track changes in meta information (changes to tagline, charter, moderation
|
||||
status etc.)
|
||||
- Add tools to query for hierarchy information:
|
||||
- canonical list of newsgroups for any given time
|
||||
- generate list of changes for a time period
|
||||
- find newsgroups (including wildcards) and display their history
|
||||
|
||||
* Individual improvements
|
||||
+ NewsStats.pm
|
||||
- Improve error handling when reading config
|
||||
|
@ -44,23 +49,11 @@ Bug numbers refer to the Mantis issue tracker at <http://bugs.th-h.de/>.
|
|||
Some other tests - working database connection, valid database and table
|
||||
names - would be nice.
|
||||
|
||||
+ install/install.pl
|
||||
- Read current version from a file dropped and updated by installer
|
||||
- Add / enhance / test error handling
|
||||
- General tests and optimisations
|
||||
|
||||
+ feedlog.pl
|
||||
- Add / enhance / test error handling
|
||||
- General tests and optimisations
|
||||
|
||||
+ gatherstats.pl
|
||||
- Use hierarchy information (see GroupInfo above)
|
||||
- Add gathering of other stats (clients, hosts, ...)
|
||||
- better modularisation (code reuse for other reports!)
|
||||
- Add / enhance / test error handling
|
||||
- General tests and optimisations
|
||||
|
||||
+ groupstats.pl
|
||||
- better modularisation (code reuse for other reports!)
|
||||
+ groupstats.pl, hoststats.pl, clientstats.pl
|
||||
- better modularisation (code reuse)
|
||||
- Add / enhance / test error handling
|
||||
- General tests and optimisations
|
||||
|
|
|
@ -12,8 +12,10 @@ DBDatabase = newsstats
|
|||
#
|
||||
DBTableRaw = raw_de
|
||||
DBTableGrps = groups_de
|
||||
#DBTableClnts =
|
||||
#DBTableHosts =
|
||||
DBTableHosts = hosts_de
|
||||
DBTableClnts = clnts_de
|
||||
|
||||
### hierarchy configuration
|
||||
# comma-separated list of TLHs to parse
|
||||
# newsgroups not starting with one of those patterns are not counted
|
||||
TLH = de
|
417
gatherstats.pl
417
gatherstats.pl
|
@ -1,417 +0,0 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# gatherstats.pl
|
||||
#
|
||||
# This script will gather statistical information from a database
|
||||
# containing headers and other information from a INN feed.
|
||||
#
|
||||
# It is part of the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
#
|
||||
# It can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
BEGIN {
|
||||
our $VERSION = "0.01";
|
||||
use File::Basename;
|
||||
push(@INC, dirname($0));
|
||||
}
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
|
||||
|
||||
use DBI;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
Getopt::Long::config ('bundling');
|
||||
|
||||
################################# Definitions ##################################
|
||||
|
||||
# define types of information that can be gathered
|
||||
# all / groups (/ clients / hosts)
|
||||
my %LegalStats;
|
||||
@LegalStats{('all','groups')} = ();
|
||||
|
||||
################################# Main program #################################
|
||||
|
||||
### read commandline options
|
||||
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
|
||||
$OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
|
||||
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
|
||||
'clientsdb=s' => \$OptClientsDB,
|
||||
'd|debug!' => \$OptDebug,
|
||||
'groupsdb=s' => \$OptGroupsDB,
|
||||
'hierarchy=s' => \$OptTLH,
|
||||
'hostsdb=s' => \$OptHostsDB,
|
||||
'm|month=s' => \$OptMonth,
|
||||
'rawdb=s' => \$OptRawDB,
|
||||
's|stats=s' => \$OptStatsType,
|
||||
't|test!' => \$OptTest,
|
||||
'h|help' => \&ShowPOD,
|
||||
'V|version' => \&ShowVersion) or exit 1;
|
||||
|
||||
### read configuration
|
||||
my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
|
||||
|
||||
### override configuration via commandline options
|
||||
my %ConfOverride;
|
||||
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
|
||||
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
|
||||
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
|
||||
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
|
||||
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
|
||||
&OverrideConfig(\%Conf,\%ConfOverride);
|
||||
|
||||
### get type of information to gather, defaulting to 'all'
|
||||
$OptStatsType = 'all' if !$OptStatsType;
|
||||
&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
|
||||
if !exists($LegalStats{$OptStatsType});
|
||||
|
||||
### get time period from --month
|
||||
# get verbal description of time period, drop SQL code
|
||||
my ($Period) = &GetTimePeriod($OptMonth);
|
||||
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
|
||||
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
|
||||
|
||||
### reformat $Conf{'TLH'}
|
||||
my $TLH;
|
||||
if ($Conf{'TLH'}) {
|
||||
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
||||
# make a flat list again, separated by :
|
||||
if (ref($Conf{'TLH'}) eq 'ARRAY') {
|
||||
$TLH = join(':',@{$Conf{'TLH'}});
|
||||
} else {
|
||||
$TLH = $Conf{'TLH'};
|
||||
}
|
||||
# strip whitespace
|
||||
$TLH =~ s/\s//g;
|
||||
# add trailing dots if none are present yet
|
||||
# (using negative look-behind assertions)
|
||||
$TLH =~ s/(?<!\.):/.:/g;
|
||||
$TLH =~ s/(?<!\.)$/./;
|
||||
# check for illegal characters
|
||||
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
||||
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
|
||||
# escape dots
|
||||
$TLH =~ s/\./\\./g;
|
||||
if ($TLH =~ /:/) {
|
||||
# reformat $TLH from a:b to (a)|(b),
|
||||
# e.g. replace ':' by ')|('
|
||||
$TLH =~ s/:/)|(/g;
|
||||
$TLH = '(' . $TLH . ')';
|
||||
};
|
||||
};
|
||||
|
||||
### init database
|
||||
my $DBHandle = InitDB(\%Conf,1);
|
||||
|
||||
### get data for each month
|
||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
|
||||
foreach my $Month (&ListMonth($Period)) {
|
||||
|
||||
print "---------- $Month ----------\n" if $OptDebug;
|
||||
|
||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||
# read list of newsgroups from --checkgroups
|
||||
# into a hash
|
||||
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
|
||||
if $OptCheckgroupsFile;
|
||||
|
||||
### ----------------------------------------------
|
||||
### get groups data (number of postings per group)
|
||||
# get groups data from raw table for given month
|
||||
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
|
||||
"WHERE day LIKE ? AND NOT disregard",
|
||||
$Conf{'DBDatabase'},
|
||||
$Conf{'DBTableRaw'}));
|
||||
$DBQuery->execute($Month.'-%')
|
||||
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
|
||||
|
||||
# count postings per group
|
||||
my %Postings;
|
||||
while (($_) = $DBQuery->fetchrow_array) {
|
||||
# get list of newsgroups and hierarchies from Newsgroups:
|
||||
my %Newsgroups = ListNewsgroups($_,$TLH,
|
||||
$OptCheckgroupsFile ? \%ValidGroups : '');
|
||||
# count each newsgroup and hierarchy once
|
||||
foreach (sort keys %Newsgroups) {
|
||||
$Postings{$_}++;
|
||||
};
|
||||
};
|
||||
|
||||
# add valid but empty groups if --checkgroups is set
|
||||
if (%ValidGroups) {
|
||||
foreach (sort keys %ValidGroups) {
|
||||
if (!defined($Postings{$_})) {
|
||||
# add current newsgroup as empty group
|
||||
$Postings{$_} = 0;
|
||||
warn (sprintf("ADDED: %s as empty group\n",$_));
|
||||
# add empty hierarchies for current newsgroup as needed
|
||||
foreach (ParseHierarchies($_)) {
|
||||
my $Hierarchy = $_ . '.ALL';
|
||||
if (!defined($Postings{$Hierarchy})) {
|
||||
$Postings{$Hierarchy} = 0;
|
||||
warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
|
||||
};
|
||||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
# delete old data for that month
|
||||
if (!$OptTest) {
|
||||
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
|
||||
undef,$Month)
|
||||
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
|
||||
};
|
||||
|
||||
print "----- GroupStats -----\n" if $OptDebug;
|
||||
foreach my $Newsgroup (sort keys %Postings) {
|
||||
print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
|
||||
if (!$OptTest) {
|
||||
# write to database
|
||||
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
|
||||
"(month,newsgroup,postings) ".
|
||||
"VALUES (?, ?, ?)",
|
||||
$Conf{'DBDatabase'},
|
||||
$Conf{'DBTableGrps'}));
|
||||
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
|
||||
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
|
||||
"$DBI::errstr\n",$Month,$Newsgroup,
|
||||
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
|
||||
$DBQuery->finish;
|
||||
};
|
||||
};
|
||||
} else {
|
||||
# other types of information go here - later on
|
||||
};
|
||||
};
|
||||
|
||||
### close handles
|
||||
$DBHandle->disconnect;
|
||||
|
||||
__END__
|
||||
|
||||
################################ Documentation #################################
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gatherstats - process statistical data from a raw source
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
|
||||
|
||||
=head1 REQUIREMENTS
|
||||
|
||||
See L<doc/README>.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This script will extract and process statistical information from a
|
||||
database table which is fed from F<feedlog.pl> for a given time period
|
||||
and write its results to (an)other database table(s). Entries marked
|
||||
with I<'disregard'> in the database will be ignored; currently, you
|
||||
have to set this flag yourself, using your database management tools.
|
||||
You can exclude erroneous entries that way (e.g. automatic reposts
|
||||
(think of cancels flood and resurrectors); spam; ...).
|
||||
|
||||
The time period to act on defaults to last month; you can assign
|
||||
another time period or a single month via the B<--month> option (see
|
||||
below).
|
||||
|
||||
By default B<gatherstats> will process all types of information; you
|
||||
can change that using the B<--stats> option and assigning the type of
|
||||
information to process. Currently that doesn't matter yet as only
|
||||
processing of the number of postings per group per month is
|
||||
implemented anyway.
|
||||
|
||||
Possible information types include:
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<groups> (postings per group per month)
|
||||
|
||||
B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
|
||||
counted for each single group they appear in. Groups not in I<TLH>
|
||||
will be ignored.
|
||||
|
||||
B<gatherstats> will also add up the number of postings for each
|
||||
hierarchy level, but only count each posting once. A posting to
|
||||
de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
|
||||
respectively. A crossposting to de.alt.test and de.alt.admin, on the
|
||||
other hand, will be counted for de.alt.test and de.alt.admin each, but
|
||||
only once for de.alt.ALL and de.ALL.
|
||||
|
||||
Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
|
||||
override that default through the B<--groupsdb> option.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Configuration
|
||||
|
||||
B<gatherstats> will read its configuration from F<newsstats.conf>
|
||||
which should be present in the same directory via Config::Auto.
|
||||
|
||||
See L<doc/INSTALL> for an overview of possible configuration options.
|
||||
|
||||
You can override configuration options via the B<--hierarchy>,
|
||||
B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
|
||||
respectively.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 3
|
||||
|
||||
=item B<-V>, B<--version>
|
||||
|
||||
Print out version and copyright information and exit.
|
||||
|
||||
=item B<-h>, B<--help>
|
||||
|
||||
Print this man page and exit.
|
||||
|
||||
=item B<-d>, B<--debug>
|
||||
|
||||
Output debugging information to STDOUT while processing (number of
|
||||
postings per group).
|
||||
|
||||
=item B<-t>, B<--test>
|
||||
|
||||
Do not write results to database. You should use B<--debug> in
|
||||
conjunction with B<--test> ... everything else seems a bit pointless.
|
||||
|
||||
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]>
|
||||
|
||||
Set processing period to a single month in YYYY-MM format or to a time
|
||||
period between two month in YYYY-MM:YYYY-MM format (two month, separated
|
||||
by a colon).
|
||||
|
||||
=item B<-s>, B<--stats> I<type>
|
||||
|
||||
Set processing type to one of I<all> and I<groups>. Defaults to all
|
||||
(and is currently rather pointless as only I<groups> has been
|
||||
implemented).
|
||||
|
||||
=item B<-c>, B<--checkgroups> I<filename template>
|
||||
|
||||
Check each group against a list of valid newsgroups read from a file,
|
||||
one group on each line and ignoring everything after the first
|
||||
whitespace (so you can use a file in checkgroups format or (part of)
|
||||
your INN active file).
|
||||
|
||||
The filename is taken from I<filename template>, amended by each
|
||||
B<--month> B<gatherstats> is processing in the form of I<template-YYYY-MM>,
|
||||
so that
|
||||
|
||||
gatherstats -m 2010-01:2010-12 -c checkgroups
|
||||
|
||||
will check against F<checkgroups-2010-01> for January 2010, against
|
||||
F<checkgroups-2010-02> for February 2010 and so on.
|
||||
|
||||
Newsgroups not found in the checkgroups file will be dropped (and
|
||||
logged to STDERR), and newsgroups found there but having no postings
|
||||
will be added with a count of 0 (and logged to STDERR).
|
||||
|
||||
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
|
||||
|
||||
Override I<TLH> from F<newsstats.conf>.
|
||||
|
||||
=item B<--rawdb> I<table> (raw data table)
|
||||
|
||||
Override I<DBTableRaw> from F<newsstats.conf>.
|
||||
|
||||
=item B<--groupsdb> I<table> (postings per group table)
|
||||
|
||||
Override I<DBTableGrps> from F<newsstats.conf>.
|
||||
|
||||
=item B<--clientsdb> I<table> (client data table)
|
||||
|
||||
Override I<DBTableClnts> from F<newsstats.conf>.
|
||||
|
||||
=item B<--hostsdb> I<table> (host data table)
|
||||
|
||||
Override I<DBTableHosts> from F<newsstats.conf>.
|
||||
|
||||
=back
|
||||
|
||||
=head1 INSTALLATION
|
||||
|
||||
See L<doc/INSTALL>.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Process all types of information for lasth month:
|
||||
|
||||
gatherstats
|
||||
|
||||
Do a dry run, showing results of processing:
|
||||
|
||||
gatherstats --debug --test
|
||||
|
||||
Process all types of information for January of 2010:
|
||||
|
||||
gatherstats --month 2010-01
|
||||
|
||||
Process only number of postings for the year of 2010,
|
||||
checking against checkgroups-*:
|
||||
|
||||
gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
|
||||
|
||||
=head1 FILES
|
||||
|
||||
=over 4
|
||||
|
||||
=item F<gatherstats.pl>
|
||||
|
||||
The script itself.
|
||||
|
||||
=item F<NewsStats.pm>
|
||||
|
||||
Library functions for the NewsStats package.
|
||||
|
||||
=item F<newsstats.conf>
|
||||
|
||||
Runtime configuration file.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report any bugs or feature requests to the author or use the
|
||||
bug tracker at L<http://bugs.th-h.de/>!
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=over 2
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/README>
|
||||
|
||||
=item -
|
||||
|
||||
L<doc/INSTALL>
|
||||
|
||||
=back
|
||||
|
||||
This script is part of the B<NewsStats> package.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Thomas Hochstein <thh@inter.net>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=cut
|
|
@ -2,9 +2,9 @@
|
|||
#
|
||||
# Library functions for the NewsStats package.
|
||||
#
|
||||
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
||||
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
|
||||
#
|
||||
# This module can be redistributed and/or modified under the same terms under
|
||||
# This module can be redistributed and/or modified under the same terms under
|
||||
# which Perl itself is published.
|
||||
|
||||
package NewsStats;
|
||||
|
@ -16,8 +16,7 @@ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
|
|||
require Exporter;
|
||||
@ISA = qw(Exporter);
|
||||
@EXPORT = qw(
|
||||
$MyVersion
|
||||
$PackageVersion
|
||||
$VERSION
|
||||
$FullPath
|
||||
$HomePath
|
||||
ShowVersion
|
||||
|
@ -35,6 +34,7 @@ require Exporter;
|
|||
ListNewsgroups
|
||||
ParseHierarchies
|
||||
ReadGroupList
|
||||
ParseHeaders
|
||||
OutputData
|
||||
FormatOutput
|
||||
SQLHierarchies
|
||||
|
@ -49,23 +49,23 @@ require Exporter;
|
|||
Output => [qw(OutputData FormatOutput)],
|
||||
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
|
||||
SQLSetBounds SQLBuildClause GetMaxLength)]);
|
||||
$VERSION = '0.01';
|
||||
our $PackageVersion = '0.01';
|
||||
$VERSION = '0.4.0';
|
||||
|
||||
use Data::Dumper;
|
||||
use File::Basename;
|
||||
use Cwd qw(realpath);
|
||||
|
||||
use Config::Auto;
|
||||
use DBI;
|
||||
|
||||
#####-------------------------------- Vars --------------------------------#####
|
||||
|
||||
# trim the path
|
||||
# save $0 in $FullPath
|
||||
our $FullPath = $0;
|
||||
our $HomePath = dirname($0);
|
||||
# strip filename and /bin or /install directory to create the $HomePath
|
||||
our $HomePath = dirname(realpath($0));
|
||||
$HomePath =~ s/\/(bin|install)//;
|
||||
# trim $0
|
||||
$0 =~ s%.*/%%;
|
||||
# set version string
|
||||
our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
|
||||
|
||||
#####------------------------------- Basics -------------------------------#####
|
||||
|
||||
|
@ -75,8 +75,8 @@ our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
|
|||
sub ShowVersion {
|
||||
################################################################################
|
||||
### display version and exit
|
||||
print "NewsStats v$PackageVersion\n$MyVersion\n";
|
||||
print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
|
||||
print "$0 from NewsStats v$VERSION\n";
|
||||
print "Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh\@thh.name>\n";
|
||||
print "This program is free software; you may redistribute it ".
|
||||
"and/or modify it under the same terms as Perl itself.\n";
|
||||
exit(100);
|
||||
|
@ -99,6 +99,8 @@ sub ReadConfig {
|
|||
### IN : $ConfFile: config filename
|
||||
### OUT: reference to a hash containing the configuration
|
||||
my ($ConfFile) = @_;
|
||||
# set default
|
||||
$ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
|
||||
# mandatory configuration options
|
||||
my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
|
||||
'DBTableRaw','DBTableGrps');
|
||||
|
@ -238,7 +240,7 @@ sub ReadGroupList {
|
|||
### ignoring everything after the first whitespace and so accepting files
|
||||
### in checkgroups format as well as (parts of) an INN active file)
|
||||
### IN : $Filename : file to read
|
||||
### OUT: \%ValidGroups: hash containing all valid newsgroups
|
||||
### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups
|
||||
my ($Filename) = @_;
|
||||
my %ValidGroups;
|
||||
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
|
||||
|
@ -252,6 +254,43 @@ sub ReadGroupList {
|
|||
return \%ValidGroups;
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub ParseHeaders {
|
||||
################################################################################
|
||||
### return a hash of all headers (ignoring duplicate headers)
|
||||
### parsed from raw headers
|
||||
### -> taken and modified from pgpverify
|
||||
### -> Written April 1996, <tale@isc.org> (David C Lawrence)
|
||||
### -> Currently maintained by Russ Allbery <eagle@eyrie.org>
|
||||
### IN : $RawHeaders : raw headers as found in posting
|
||||
### OUT: %Headers : hash containing header contents,
|
||||
### keyed by lower-case header name
|
||||
my (%Header, $Label, $Value);
|
||||
foreach (@_) {
|
||||
s/\r?\n$//;
|
||||
|
||||
last if /^$/;
|
||||
|
||||
if (/^(\S+):[ \t](.+)/) {
|
||||
($Label, $Value) = ($1, $2);
|
||||
# discard all duplicate headers
|
||||
next if $Header{lc($Label)};
|
||||
$Header{lc($Label)} = $Value;
|
||||
} elsif (/^\s/) {
|
||||
# continuation lines
|
||||
if ($Label) {
|
||||
s/^\s+/ /;
|
||||
$Header{lc($Label)} .= $_;
|
||||
} else {
|
||||
warn (sprintf("Non-header line: %s\n",$_));
|
||||
}
|
||||
} else {
|
||||
warn (sprintf("Non-header line: %s\n",$_));
|
||||
}
|
||||
}
|
||||
return %Header;
|
||||
};
|
||||
|
||||
################################################################################
|
||||
|
||||
#####----------------------------- TimePeriods ----------------------------#####
|
||||
|
@ -269,12 +308,12 @@ sub GetTimePeriod {
|
|||
my ($Verbal, $SQL);
|
||||
# define a regular expression for a month
|
||||
my $REMonth = '\d{4}-\d{2}';
|
||||
|
||||
|
||||
# default to last month if option is not set
|
||||
if(!$Month) {
|
||||
$Month = &LastMonth;
|
||||
}
|
||||
|
||||
|
||||
# check for valid input
|
||||
if ($Month =~ /^$REMonth$/) {
|
||||
# single month (YYYY-MM)
|
||||
|
@ -293,7 +332,7 @@ sub GetTimePeriod {
|
|||
# invalid input
|
||||
return (undef,undef);
|
||||
}
|
||||
|
||||
|
||||
return ($Verbal,$SQL);
|
||||
};
|
||||
|
||||
|
@ -391,17 +430,17 @@ sub OutputData {
|
|||
### $GroupBy : primary sorting order (month or key)
|
||||
### $Precision: number of digits right of decimal point (0 or 2)
|
||||
### $ValidKeys: reference to a hash containing all valid keys
|
||||
### $LeadIn : print at start of output
|
||||
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
|
||||
### $DBQuery : database query handle with executed query,
|
||||
### containing $Month, $Key, $Value
|
||||
### containing $Month, $Key, $Value
|
||||
### $PadField : padding length for key field (optional) for 'pretty'
|
||||
### $PadValue : padding length for value field (optional) for 'pretty'
|
||||
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
|
||||
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $LeadIn, $FileTempl,
|
||||
$DBQuery, $PadField, $PadValue) = @_;
|
||||
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
|
||||
my ($FileName, $Handle, $OUT);
|
||||
our $LastIteration;
|
||||
|
||||
my ($LastIteration, $FileName, $Handle, $OUT);
|
||||
|
||||
# define output types
|
||||
my %LegalOutput;
|
||||
@LegalOutput{('dump','list','pretty')} = ();
|
||||
|
@ -433,14 +472,14 @@ sub OutputData {
|
|||
# safeguards for filename creation:
|
||||
# replace potential problem characters with '_'
|
||||
$FileName = sprintf('%s-%s',$FileTempl,$Caption);
|
||||
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
||||
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
||||
open ($OUT,">$FileName")
|
||||
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
|
||||
$FileName));
|
||||
$Handle = $OUT;
|
||||
};
|
||||
print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
|
||||
$Precision, $PadField, $PadValue);
|
||||
print $Handle &FormatOutput($Format, $Comments, $LeadIn, $Caption,
|
||||
$Key, $Value, $Precision, $PadField, $PadValue, $LastIteration);
|
||||
$LastIteration = $Caption;
|
||||
};
|
||||
close $OUT if ($FileTempl);
|
||||
|
@ -459,24 +498,28 @@ sub FormatOutput {
|
|||
### $PadField : padding length for key field (optional) for 'pretty'
|
||||
### $PadValue : padding length for value field (optional) for 'pretty'
|
||||
### OUT: $Output: formatted output
|
||||
my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
|
||||
$PadValue) = @_;
|
||||
my ($Format, $Comments, $LeadIn, $Caption, $Key, $Value, $Precision, $PadField,
|
||||
$PadValue, $LastIteration) = @_;
|
||||
my ($Output);
|
||||
# keep last caption in mind
|
||||
our ($LastIteration);
|
||||
# create one line of output
|
||||
if ($Format eq 'dump') {
|
||||
# output as dump (key value)
|
||||
$Output = sprintf ("# %s:\n",$Caption)
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
if ($Caption and $Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
$Output .= sprintf ("%s %u\n",$Key,$Value);
|
||||
} elsif ($Format eq 'list') {
|
||||
# output as list (caption key value)
|
||||
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
||||
if ($Caption) {
|
||||
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
||||
} else {
|
||||
$Output = sprintf ("%s %u\n",$Key,$Value);
|
||||
}
|
||||
} elsif ($Format eq 'pretty') {
|
||||
# output as a table
|
||||
$Output = sprintf ("# ----- %s:\n",$Caption)
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
||||
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) {
|
||||
$Output = $LeadIn;
|
||||
$Output .= sprintf ("# ----- %s:\n",$Caption) if $Caption;
|
||||
}
|
||||
# increase $PadValue for numbers with decimal point
|
||||
$PadValue += $Precision+1 if $Precision;
|
||||
# add padding if $PadField is set; $PadValue HAS to be set then
|
||||
|
@ -540,21 +583,22 @@ sub SQLSortOrder {
|
|||
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
|
||||
### $OrderBy: secondary sort by month/newsgroups (default)
|
||||
### or number of 'postings'
|
||||
### $Type : newsgroup, host or client+version
|
||||
### OUT: a SQL ORDER BY clause
|
||||
my ($GroupBy,$OrderBy) = @_;
|
||||
my ($GroupBy,$OrderBy,$Type) = @_;
|
||||
my ($GroupSort,$OrderSort) = ('','');
|
||||
# $GroupBy (primary sorting)
|
||||
if (!$GroupBy) {
|
||||
$GroupBy = 'month';
|
||||
} else {
|
||||
($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
|
||||
if ($GroupBy =~ /group/i) {
|
||||
$GroupBy = 'newsgroup';
|
||||
if ($GroupBy =~ /name/i) {
|
||||
$GroupBy = $Type;
|
||||
} else {
|
||||
$GroupBy = 'month';
|
||||
}
|
||||
}
|
||||
my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
|
||||
my $Secondary = ($GroupBy eq 'month') ? $Type : 'month';
|
||||
# $OrderBy (secondary sorting)
|
||||
if (!$OrderBy) {
|
||||
$OrderBy = $Secondary;
|
||||
|
@ -590,44 +634,47 @@ sub SQLParseOrder {
|
|||
################################################################################
|
||||
sub SQLGroupList {
|
||||
################################################################################
|
||||
### explode list of newsgroups separated by : (with wildcards)
|
||||
### explode list of names separated by : (with wildcards)
|
||||
### to a SQL 'WHERE' expression
|
||||
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
||||
### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
|
||||
### $Type : newsgroup, host, client
|
||||
### OUT: SQL code to become part of a 'WHERE' clause,
|
||||
### list of newsgroups for SQL bindings
|
||||
my ($Newsgroups) = @_;
|
||||
### list of names for SQL bindings
|
||||
my ($Names,$Type) = @_;
|
||||
my $InvalidCharRegExp;
|
||||
# substitute '*' wildcard with SQL wildcard character '%'
|
||||
$Newsgroups =~ s/\*/%/g;
|
||||
return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
|
||||
# just one newsgroup?
|
||||
return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
|
||||
my ($SQL,@WildcardGroups,@NoWildcardGroups);
|
||||
# list of newsgroups separated by ':'
|
||||
my @GroupList = split /:/, $Newsgroups;
|
||||
foreach (@GroupList) {
|
||||
$Names =~ s/\*/%/g;
|
||||
$InvalidCharRegExp = ',;' if $Type eq 'client';
|
||||
return (undef,undef) if !CheckValidNames($Names,$InvalidCharRegExp);
|
||||
# just one name/newsgroup?
|
||||
return (SQLGroupWildcard($Names,$Type),$Names) if $Names !~ /:/;
|
||||
my ($SQL,@WildcardNames,@NoWildcardNames);
|
||||
# list of names/newsgroups separated by ':'
|
||||
my @NameList = split /:/, $Names;
|
||||
foreach (@NameList) {
|
||||
if ($_ !~ /%/) {
|
||||
# add to list of newsgroup names WITHOUT wildcard
|
||||
push (@NoWildcardGroups,$_);
|
||||
# add to list of names/newsgroup names WITHOUT wildcard
|
||||
push (@NoWildcardNames,$_);
|
||||
} else {
|
||||
# add to list of newsgroup names WITH wildcard
|
||||
push (@WildcardGroups,$_);
|
||||
# add to list of names WITH wildcard
|
||||
push (@WildcardNames,$_);
|
||||
# add wildcard to SQL clause
|
||||
# 'OR' if SQL clause is not empty
|
||||
$SQL .= ' OR ' if $SQL;
|
||||
$SQL .= 'newsgroup LIKE ?'
|
||||
$SQL .= "$Type LIKE ?"
|
||||
}
|
||||
};
|
||||
if (scalar(@NoWildcardGroups)) {
|
||||
if (scalar(@NoWildcardNames)) {
|
||||
# add 'OR' if SQL clause is not empty
|
||||
$SQL .= ' OR ' if $SQL;
|
||||
if (scalar(@NoWildcardGroups) < 2) {
|
||||
# special case: just one newsgroup without wildcard
|
||||
$SQL .= 'newsgroup = ?';
|
||||
if (scalar(@NoWildcardNames) < 2) {
|
||||
# special case: just one name without wildcard
|
||||
$SQL .= "$Type = ?";
|
||||
} else {
|
||||
# create list of newsgroups to include: 'newsgroup IN (...)'
|
||||
$SQL .= 'newsgroup IN (';
|
||||
# create list of names to include: e.g. 'newsgroup IN (...)'
|
||||
$SQL .= "$Type IN (";
|
||||
my $SQLin;
|
||||
foreach (@NoWildcardGroups) {
|
||||
foreach (@NoWildcardNames) {
|
||||
$SQLin .= ',' if $SQLin;
|
||||
$SQLin .= '?';
|
||||
}
|
||||
|
@ -635,27 +682,28 @@ sub SQLGroupList {
|
|||
$SQL .= $SQLin .= ')';
|
||||
}
|
||||
}
|
||||
# add brackets '()' to SQL clause as needed (more than one wildcard group)
|
||||
if (scalar(@WildcardGroups)) {
|
||||
# add brackets '()' to SQL clause as needed (more than one wildcard name)
|
||||
if (scalar(@WildcardNames)) {
|
||||
$SQL = '(' . $SQL .')';
|
||||
}
|
||||
# rebuild @GroupList in (now) correct order
|
||||
@GroupList = (@WildcardGroups,@NoWildcardGroups);
|
||||
return ($SQL,@GroupList);
|
||||
# rebuild @NameList in (now) correct order
|
||||
@NameList = (@WildcardNames,@NoWildcardNames);
|
||||
return ($SQL,@NameList);
|
||||
};
|
||||
|
||||
################################################################################
|
||||
sub SQLGroupWildcard {
|
||||
################################################################################
|
||||
### build a valid SQL 'WHERE' expression with or without wildcards
|
||||
### IN : $Newsgroup: newsgroup expression, probably with wildcard
|
||||
### (group.name or group.name.%)
|
||||
### IN : $Name: expression, probably with wildcard
|
||||
### (group.name or group.name.%)
|
||||
### $Type: newsgroup, host, client
|
||||
### OUT: SQL code to become part of a 'WHERE' clause
|
||||
my ($Newsgroup) = @_;
|
||||
if ($Newsgroup !~ /%/) {
|
||||
return 'newsgroup = ?';
|
||||
my ($Name,$Type) = @_;
|
||||
if ($Name !~ /%/) {
|
||||
return "$Type = ?";
|
||||
} else {
|
||||
return 'newsgroup LIKE ?';
|
||||
return "$Type LIKE ?";
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -668,7 +716,7 @@ sub SQLSetBounds {
|
|||
### OUT: SQL code to become part of a WHERE or HAVING clause
|
||||
my ($Type,$LowBound,$UppBound) = @_;
|
||||
($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
|
||||
if($LowBound and $UppBound and $LowBound > $UppBound) {
|
||||
if($LowBound and defined($UppBound) and $LowBound > $UppBound) {
|
||||
&Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
|
||||
"$UppBound, exchanging boundaries.");
|
||||
($LowBound,$UppBound) = ($UppBound,$LowBound);
|
||||
|
@ -684,7 +732,7 @@ sub SQLSetBounds {
|
|||
} elsif ($Type eq 'sum') {
|
||||
$WhereHavingFunction = 'SUM(postings)'
|
||||
}
|
||||
$LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
|
||||
$LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if defined($LowBound);
|
||||
# set $LowBound to SQL statement:
|
||||
# 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
|
||||
if ($Type eq 'level') {
|
||||
|
@ -694,7 +742,7 @@ sub SQLSetBounds {
|
|||
} elsif ($Type eq 'sum') {
|
||||
$WhereHavingFunction = 'SUM(postings)'
|
||||
}
|
||||
$UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
|
||||
$UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if defined($UppBound);
|
||||
return ($LowBound,$UppBound);
|
||||
};
|
||||
|
||||
|
@ -757,18 +805,17 @@ sub SQLBuildClause {
|
|||
#####--------------------------- Verifications ----------------------------#####
|
||||
|
||||
################################################################################
|
||||
sub CheckValidNewsgroups {
|
||||
sub CheckValidNames {
|
||||
################################################################################
|
||||
### syntax check of newgroup list
|
||||
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
||||
### syntax check of a list
|
||||
### IN : $Names : list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
|
||||
### InvalidCharRegExp: regular expression for invalid characters
|
||||
### OUT: boolean
|
||||
my ($Newsgroups) = @_;
|
||||
my $InvalidCharRegExp = ',; ';
|
||||
return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
|
||||
my ($Names,$InvalidCharRegExp) = @_;
|
||||
$InvalidCharRegExp = ',; ' if (!$InvalidCharRegExp);
|
||||
return ($Names =~ /[$InvalidCharRegExp]/) ? 0 : 1;
|
||||
};
|
||||
|
||||
|
||||
#####------------------------------- done ---------------------------------#####
|
||||
1;
|
||||
|
||||
|
Loading…
Reference in a new issue