Compare commits

..

No commits in common. "5a6a3e58bff6a76dc723bac7b4c0a5275dfb8bdf" and "4cdb7718661c655f0ca73d739278b8c55e8121f2" have entirely different histories.

14 changed files with 193 additions and 2888 deletions

View file

@ -1,537 +0,0 @@
#! /usr/bin/perl
#
# cliservstats.pl
#
# This script will get statistical data on client (newsreader) and
# server (host) usage from a database.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Main program #################################
### read commandline options
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
$OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
$OptReportType,$OptSums,$OptType,$UppBound,$OptConfFile);
GetOptions ('c|captions!' => \$OptCaptions,
'comments!' => \$OptComments,
'db=s' => \$OptDB,
'filetemplate=s' => \$OptFileTemplate,
'f|format=s' => \$OptFormat,
'g|group-by=s' => \$OptGroupBy,
'l|lower=i' => \$LowBound,
'm|month=s' => \$OptMonth,
'n|names=s' => \$OptNames,
'o|order-by=s' => \$OptOrderBy,
'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums,
't|type=s' => \$OptType,
'u|upper=i' => \$UppBound,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
# parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# parse $OptType
if ($OptType) {
if ($OptType =~ /(host|server)s?/i) {
$OptType = 'host';
} elsif ($OptType =~ /(newsreader|client)s?/i) {
$OptType = 'client';
}
}
&Bleat(2, "Please use '--type server' or '-type newsreader'.") if !$OptType;
# parse $OptReportType
if ($OptReportType) {
if ($OptReportType =~ /sums?/i) {
$OptReportType = 'sum';
} else {
$OptReportType = 'default';
}
}
### read configuration
my %Conf = %{ReadConfig($OptConfFile)};
### set DBTable
if ($OptDB) {
$Conf{'DBTable'} = $OptDB;
}
elsif ($OptType eq 'host') {
$Conf{'DBTable'} = $Conf{'DBTableHosts'};
} else {
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
}
### init database
my $DBHandle = InitDB(\%Conf,1);
### get time period and names, prepare SQL 'WHERE' clause
# get time period
# and set caption for output and expression for SQL 'WHERE' clause
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# bail out if --month is invalid
&Bleat(2,"--month option has an invalid format - ".
"please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
# get list of hosts and set expression for SQL 'WHERE' clause
# with placeholders as well as a list of names to bind to them
my ($SQLWhereNames,@SQLBindNames);
if ($OptNames) {
($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,$OptType);
# bail out if --names is invalid
&Bleat(2,"--names option has an invalid format!")
if !$SQLWhereNames;
}
### build SQL WHERE clause
my $ExcludeSums = $OptSums ? '' : sprintf("%s != 'ALL'",$OptType);
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
$ExcludeSums,
&SQLSetBounds('default',$LowBound,$UppBound));
### get sort order and build SQL 'ORDER BY' clause
# force to 'month' for $OptReportType 'sum'
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
# default to 'name' if $OptGroupBy is not set and
# just one name is requested, but more than one month
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNames and $OptNames !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, $OptType);
# $GroupBy will contain 'month' or 'host'/'client' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
### get report type and build SQL 'SELECT' query
my $SQLSelect;
my $SQLGroupClause = '';
my $Precision = 0; # number of digits right of decimal point for output
if ($OptReportType and $OptReportType ne 'default') {
$SQLGroupClause = "GROUP BY $OptType";
# change $SQLOrderClause: replace everything before 'postings'
$SQLOrderClause =~ s/BY.+postings/BY postings/;
$SQLSelect = "'All months',$OptType,SUM(postings)";
# change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
$SQLOrderClause =~ s/postings/SUM(postings)/;
} else {
$SQLSelect = "month,$OptType,postings";
};
### get length of longest name delivered by query
### for formatting purposes
my $Field = ($GroupBy eq 'month') ? $OptType : 'month';
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
$Field,'postings',$SQLWhereClause,
'',@SQLBindNames);
### build and execute SQL query
my ($DBQuery);
# prepare query
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
$SQLSelect,
$Conf{'DBDatabase'},$Conf{'DBTable'},
$SQLWhereClause,$SQLGroupClause,
$SQLOrderClause));
# execute query
$DBQuery->execute(@SQLBindNames)
or &Bleat(2,sprintf("Can't get %s data for %s from %s.%s: %s\n",
$OptType,$CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
$DBI::errstr));
### output results
# set default to 'pretty'
$OptFormat = 'pretty' if !$OptFormat;
# print captions if --caption is set
my $LeadIn;
if ($OptCaptions && $OptComments) {
# print time period with report type
my $CaptionReportType = '(number of postings for each month)';
if ($OptReportType and $OptReportType ne 'default') {
$CaptionReportType = '(number of all postings for that time period)';
}
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
# print name list if --names is set
$LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
if $OptNames;
# print boundaries, if set
my $CaptionBoundary= '(counting only month fulfilling this condition)';
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
if ($LowBound or $UppBound);
# print primary and secondary sort order
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
($GroupBy eq 'month') ? 'Months' : 'Names',
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
}
# output data
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,'',$LeadIn,
$OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
### close handles
$DBHandle->disconnect;
__END__
################################ Documentation #################################
=head1 NAME
cliservstats - create reports on host or client usage
=head1 SYNOPSIS
B<cliservstats> B<-t> I<host|client> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<server(s)|client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
See L<doc/README>.
=head1 DESCRIPTION
This script create reports on newsgroup usage (number of postings from
each host or using each client per month) taken from result tables
created by B<gatherstats.pl>.
=head2 Features and options
=head3 Time period and names
The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the
B<--month> option (see below).
B<cliservstats> will process all hosts or clients by default; you can
limit processing to only some hosts or clients by supplying a list of
those names by using the B<--names> option (see below).
=head3 Report type
You can choose between different B<--report> types: postings per month
or all postings summed up; for details, see below.
=head3 Upper and lower boundaries
Furthermore you can set an upper and/or lower boundary to exclude some
results from output via the B<--lower> and B<--upper> options,
respectively. By default, all hosts/clients with more and/or less
postings per month will be excluded from the result set (i.e. not
shown and not considered forsum reports).
=head3 Sorting and formatting the output
By default, all results are grouped by month; you can group results by
hosts/clients instead via the B<--group-by> option. Within those
groups, the list of hosts/clients (or months) is sorted alphabetically
(or chronologically, respectively) ascending. You can change that order
(and sort by number of postings) with the B<--order-by> option. For
details and exceptions, please see below.
The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of names and number of
postings with the B<--format> option. Captions will be added by means of
the B<--caption> option; all comments (and captions) can be supressed by
using B<--nocomments>.
Last but not least you can redirect all output to a number of files, e.g.
one for each month, by submitting the B<--filetemplate> option, see below.
=head2 Configuration
B<cliservstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration file
submitted by the B<--conffile> option.
See doc/INSTALL for an overview of possible configuration options.
You can override some configuration options via the B<--db> option.
=head1 OPTIONS
=over 3
=item B<-V>, B<--version>
Print out version and copyright information and exit.
=item B<-h>, B<--help>
Print this man page and exit.
=item B<-t>, B<--type> I<host|client>
Create report for hosts (servers) or clients (newsreaders), using
I<DBTableHosts> or I<DBTableClnts> respectively.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database.
=item B<-n>, B<--names> I<name(s)>
Limit processing to a certain set of host or client names. I<names(s)>
can be a single name (eternal-september.org), a group of names
(*.inka.de) or a list of either of these, separated by colons, for
example
eternal-september.org:solani.org:*.inka.de
=item B<-s>, B<--sums|--nosums> (sum per month)
Include a "virtual" host named "ALL" for every month in output,
containing the sum of all detected hosts for that month.
=item B<-r>, B<--report> I<default|sums>
Choose the report type: I<default> or I<sums>
By default, B<cliservstats> will report the number of postings for each
host/client in each month. But it can also report the total sum of postings
per host/client for all months.
For report type I<sums>, the B<group-by> option has no meaning and
will be silently ignored (see below).
=item B<-l>, B<--lower> I<lower boundary>
Set the lower boundary. See below.
=item B<-l>, B<--upper> I<upper boundary>
Set the upper boundary.
By default, all hosts/clients with more postings per month than the
upper boundary and/or less postings per month than the lower boundary
will be excluded from further processing. For the default report that
means each month only hosts/clients with a number of postings between
the boundaries will be displayed. For the sums report, hosts/clients
with a number of postings exceeding the boundaries in all (!) months
will not be considered.
=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>
By default, all results are grouped by month, sorted chronologically in
ascending order, like this:
# ----- 2012-01:
arcor-online.net : 9379
individual.net : 19525
news.albasani.net: 9063
# ----- 2012-02:
arcor-online.net : 8606
individual.net : 16768
news.albasani.net: 7879
The results can be grouped by host/client instead via
B<--group-by> I<name>:
----- individual.net
2012-01: 19525
2012-02: 16768
----- arcor-online.net
2012-01: 9379
2012-02: 8606
----- news.albasani.net
2012-01: 9063
2012-02: 7879
By appending I<-desc> to the group-by option parameter, you can reverse
the sort order - e.g. B<--group-by> I<month-desc> will give:
# ----- 2012-02:
arcor-online.net : 8606
individual.net : 16768
news.albasani.net: 7879
# ----- 2012-01:
arcor-online.net : 9379
individual.net : 19525
news.albasani.net: 9063
Sums reports (see above) will always be grouped by months; this option
will therefore be ignored.
=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
Within each group (a single month or single host/client, see above),
the report will be sorted by name (or month) in ascending alphabetical
order by default. You can change the sort order to descending or sort
by number of postings instead.
=item B<-f>, B<--format> I<pretty|list|dump>
Select the output format, I<pretty> being the default:
# ----- 2012-01:
arcor-online.net : 9379
individual.net : 19525
# ----- 2012-02:
arcor-online.net : 8606
individual.net : 16768
I<list> format looks like this:
2012-01 arcor-online.net 9379
2012-01 individual.net 19525
2012-02 arcor-online.net 8606
2012-02 individual.net 16768
And I<dump> format looks like this:
# 2012-01:
arcor-online.net 9379
individual.net 19525
# 2012-02:
arcor-online.net 8606
individual.net 16768
You can remove the comments by using B<--nocomments>, see below.
=item B<-c>, B<--captions|--nocaptions>
Add captions to output, like this:
----- Report for 2012-01 to 2012-02 (number of postings for each month)
----- Names: individual.net
----- Threshold: 8000 => x (counting only month fulfilling this condition)
----- Grouped by Month (ascending), sorted by number of postings descending
False by default.
=item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by default
as logn as B<--filetemplate> is not set.
Use I<--nocomments> to suppress anything except host/client names or months and
numbers of postings.
=item B<--filetemplate> I<filename template>
Save output to file(s) instead of dumping it to STDOUT. B<cliservstats> will
create one file for each month (or each host/client, accordant to the
setting of B<--group-by>, see above), with filenames composed by adding
year and month (or host/client names) to the I<filename template>, for
example with B<--filetemplate> I<stats>:
stats-2012-01
stats-2012-02
... and so on
=item B<--db> I<database table>
Override I<DBTableHosts> or I<DBTableClnts> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back
=head1 INSTALLATION
See L<doc/INSTALL>.
=head1 EXAMPLES
Show number of postings per group for lasth month in I<pretty> format:
cliservstats --type host
Show that report for January of 2010 and *.inka plus individual.net:
cliservstats --type host --month 2010-01 --names *.inka:individual.net:
Only show clients with 30 postings or less last month, ordered
by number of postings, descending, in I<pretty> format:
cliservstats --type client --upper 30 --order-by postings-desc
List number of postings per host for each month of 2010 and redirect
output to one file for each month, named hosts-2010-01 and so on, in
machine-readable form (without formatting):
cliservstats -t host -m 2010-01:2010-12 -f dump --filetemplate hosts
=head1 FILES
=over 4
=item F<bin/cliservstats.pl>
The script itself.
=item F<lib/NewsStats.pm>
Library functions for the NewsStats package.
=item F<etc/newsstats.conf>
Runtime configuration file.
=back
=head1 BUGS
Please report any bugs or feature requests to the author or use the
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
=head1 SEE ALSO
=over 2
=item -
L<doc/README>
=item -
l>doc/INSTALL>
=item -
gatherstats -h
=back
This script is part of the B<NewsStats> package.
=head1 AUTHOR
Thomas Hochstein <thh@thh.name>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=cut

View file

@ -7,7 +7,7 @@
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
@ -20,7 +20,7 @@ BEGIN {
use strict;
use warnings;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
use DBI;
use Getopt::Long qw(GetOptions);
@ -31,21 +31,19 @@ Getopt::Long::config ('bundling');
# define types of information that can be gathered
# all / groups (/ clients / hosts)
my %LegalStats;
@LegalStats{('all','groups','hosts')} = ();
@LegalStats{('all','groups')} = ();
################################# Main program #################################
### read commandline options
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
$OptHostsDB,$OptMID,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,
$OptConfFile);
$OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,$OptConfFile);
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'clientsdb=s' => \$OptClientsDB,
'd|debug+' => \$OptDebug,
'd|debug!' => \$OptDebug,
'groupsdb=s' => \$OptGroupsDB,
'hierarchy=s' => \$OptTLH,
'hostsdb=s' => \$OptHostsDB,
'mid=s' => \$OptMID,
'm|month=s' => \$OptMonth,
'rawdb=s' => \$OptRawDB,
's|stats=s' => \$OptStatsType,
@ -66,11 +64,6 @@ $ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride);
# set --debug and --test if --mid is set
if ($OptMID) {
$OptDebug = 1; $OptTest = 1;
}
### get type of information to gather, defaulting to 'all'
$OptStatsType = 'all' if !$OptStatsType;
&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
@ -115,9 +108,6 @@ if ($Conf{'TLH'}) {
### init database
my $DBHandle = InitDB(\%Conf,1);
my $DBRaw = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
my $DBGrps = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
my $DBHosts = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
### get data for each month
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
@ -125,76 +115,30 @@ foreach my $Month (&ListMonth($Period)) {
print "---------- $Month ----------\n" if $OptDebug;
### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
};
### HostStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'hosts') {
# define known hosts using subdomains
my @KnownHosts = qw(abavia.com aioe.org arcor-online.net arcor-ip.de astraweb.com read.cnntp.org
easynews.com eternal-september.org euro.net fernuni-hagen.de free.fr newsread.freenet.ag
googlegroups.com heirich.name news.neostrada.pl netcologne.de newsdawg.com newscene.com
news-service.com octanews.com readnews.com wieslauf.sub.de highway.telekom.at
united-newsserver.de xennanews.com xlned.com xsnews.nl news.xs4all.nl);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
};
};
### close handles
$DBHandle->disconnect;
################################# Subroutines ##################################
sub GroupStats {
### ----------------------------------------------------------------------------
### collect number of postings per group
### IN : $DBHandle : database handle
### $DBRaw : database table for raw data (to read from)
### $DBGrps : database table for groups data (to write to)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $TLH : TLHs to collect
### $Checkgroupsfile : filename template for checkgroups file
### (expanded to $Checkgroupsfile-$Month)
### $Test : test mode
### $Debug : debug mode
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$MID,$Test,$Debug) = @_;
# read list of newsgroups from --checkgroups
# into a hash
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))}
if $CheckgroupsFile;
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
if $OptCheckgroupsFile;
my $DBQuery;
if (!$MID) {
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$Conf{'DBDatabase'},
$Conf{'DBTableRaw'}));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$MID,
$DBRaw));
}
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
# count postings per group
my %Postings;
while (($_) = $DBQuery->fetchrow_array) {
# get list of newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_,$TLH,
$CheckgroupsFile ? \%ValidGroups : '');
$OptCheckgroupsFile ? \%ValidGroups : '');
# count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) {
$Postings{$_}++;
@ -221,166 +165,40 @@ sub GroupStats {
};
# delete old data for that month
if (!$Test) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
$DBGrps), undef,$Month)
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBGrps));
if (!$OptTest) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
undef,$Month)
or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
};
print "----- GroupStats -----\n" if $Debug;
print "----- GroupStats -----\n" if $OptDebug;
foreach my $Newsgroup (sort keys %Postings) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $Debug;
if (!$Test) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
if (!$OptTest) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
"(month,newsgroup,postings) ".
"VALUES (?, ?, ?)",$DBGrps));
"VALUES (?, ?, ?)",
$Conf{'DBDatabase'},
$Conf{'DBTableGrps'}));
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
"$DBI::errstr\n",$Month,$Newsgroup,$DBGrps));
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
"$DBI::errstr\n",$Month,$Newsgroup,
$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
$DBQuery->finish;
};
};
};
### ----------------------------------------------------------------------------
sub HostStats {
### ----------------------------------------------------------------------------
### collect number of postings per server
### IN : $DBHandle : database handle
### $DBRaw : database table for raw data (to read from)
### $DBHosts : database table for hosts data (to write to)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode
### $Debug : debug mode
### @KnownHosts : list of known hosts with subdomains
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
my (%Postings,$DBQuery);
if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups,headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get hosts data for %s from %s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
### ----------------------------------------------
print "----- HostStats -----\n" if $Debug;
### parse headers
while (my ($Newsgroups,$Headers) = $DBQuery->fetchrow_array) {
### skip postings with wrong TLH
# remove whitespace from contents of Newsgroups:
chomp($Newsgroups);
$Newsgroups =~ s/\s//;
my $GroupCount;
for (split /,/, $Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH
next if($TLH and !/^$TLH/);
$GroupCount++;
};
next if !$GroupCount;
my $Host;
my %Header = ParseHeaders(split(/\n/,$Headers));
# ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname
# Injection-Info
if($Header{'injection-info'}) {
($Host) = $Header{'injection-info'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+);/i;
# reset if IP address
undef($Host) if $Host && $Host !~ /[g-z]/i;
}
# X-Trace
if (!$Host && $Header{'x-trace'}) {
(undef, $Host) = $Header{'x-trace'} =~ /^(\s|\d)*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
# reset if IP address
undef($Host) if $Host && $Host !~ /[g-z]/i;
}
# Path
if (!$Host) {
if ($Header{'path'} =~ /!([^!]+)!.POSTED!/) {
$Host = "$1";
} elsif ($Header{'path'} =~ /([^!]+)!.POSTED.[^!]+!?/) {
$Host = "$1";
} else {
# iterate on the Path: header until we have a host name or no more
# path elements
while (!$Host && $Header{'path'} =~ /!/) {
($Host) = $Header{'path'} =~ /!?([a-z0-9-_]+\.[a-z0-9-_.]+)!!?[^!]+!?$/i;
undef($Host) if $Host && $Host =~ /\.MISMATCH/;
# remove last path element
$Header{'path'} =~ s/!!?[^!]+$//;
};
}
}
# trailing .POSTED
($Host) = $Host =~ /(\S+)\.POSTED$/ if $Host =~ /\.POSTED$/;
# special cases
$Host = 'news.highwinds-media.com' if $Host =~ /f(e|x)\d\d\.\S{3}\d?$/
or $Host =~ /(newsfe|fed)\d+\.(iad|ams2)$/;
$Host = 'newshosting.com' if $Host =~ /post\d*\.iad$/;
$Host = 'eternal-september.org' if $Host =~ /dont-email\.me$/;
# normalize hosts
foreach (@KnownHosts) {
if ($Host =~ /\.$_$/) {
($Host) = $_ ;
last;
}
}
# lowercase
$Host = lc($Host);
# count host
if ($Host) {
$Postings{$Host}++;
$Postings{'ALL'}++;
} else {
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
}
printf("%s: %s\n", $Header{'message-id'}, $Host) if ($MID or $Debug && $Debug >1);
};
# delete old data for that month
if (!$Test) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
$DBHosts),undef,$Month)
or &Bleat(2,sprintf("Can't delete old hosts data for %s from %s: ".
"$DBI::errstr\n",$Month,$DBHosts));
};
foreach my $Host (sort keys %Postings) {
print "$Host => $Postings{$Host}\n" if $Debug;
if (!$Test) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
"(month,host,postings) ".
"VALUES (?, ?, ?)",$DBHosts));
$DBQuery->execute($Month, $Host, $Postings{$Host})
or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
"$DBI::errstr\n",$Month,$Host,$DBHosts));
$DBQuery->finish;
};
# other types of information go here - later on
};
};
### close handles
$DBHandle->disconnect;
__END__
################################ Documentation #################################
@ -413,7 +231,9 @@ below).
By default B<gatherstats> will process all types of information; you
can change that using the B<--stats> option and assigning the type of
information to process.
information to process. Currently that doesn't matter yet as only
processing of the number of postings per group per month is
implemented anyway.
Possible information types include:
@ -435,15 +255,6 @@ only once for de.alt.ALL and de.ALL.
Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
override that default through the B<--groupsdb> option.
=item B<hosts> (postings from host per month)
B<gatherstats> will examine Injection-Info:, X-Trace: and Path:
headers and try to normalize them. Groups not in I<TLH> will be
ignored. The sum of all detected hosts will also saved for each month.
Data is written to I<DBTableHosts> (see L<doc/INSTALL>); you can
override that default through the B<--hostsdb> option.
=back
=head2 Configuration
@ -488,8 +299,9 @@ by a colon).
=item B<-s>, B<--stats> I<type>
Set processing type to one of I<all>, I<groups> or I<hosts>. Defaults
to all.
Set processing type to one of I<all> and I<groups>. Defaults to all
(and is currently rather pointless as only I<groups> has been
implemented).
=item B<-c>, B<--checkgroups> I<filename template>
@ -605,7 +417,7 @@ Thomas Hochstein <thh@thh.name>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.

View file

@ -51,8 +51,10 @@ GetOptions ('b|boundary=s' => \$OptBoundType,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
# parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# $OptComments defaults to TRUE
$OptComments = 1 if (!defined($OptComments));
# force --nocomments when --filetemplate is used
$OptComments = 0 if ($OptFileTemplate);
# parse $OptBoundType
if ($OptBoundType) {
if ($OptBoundType =~ /level/i) {
@ -108,7 +110,7 @@ my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# with placeholders as well as a list of newsgroup to bind to them
my ($SQLWhereNewsgroups,@SQLBindNewsgroups);
if ($OptNewsgroups) {
($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups,'newsgroup');
($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups);
# bail out if --newsgroups is invalid
&Bleat(2,"--newsgroups option has an invalid format!")
if !$SQLWhereNewsgroups;
@ -141,7 +143,7 @@ $OptGroupBy = 'newsgroup' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNewsgroups and $OptNewsgroups !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'newsgroup');
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
@ -224,7 +226,6 @@ $DBQuery->execute(@SQLBindNewsgroups)
# set default to 'pretty'
$OptFormat = 'pretty' if !$OptFormat;
# print captions if --caption is set
my $LeadIn;
if ($OptCaptions && $OptComments) {
# print time period with report type
my $CaptionReportType= '(number of postings for each month)';
@ -234,9 +235,9 @@ if ($OptCaptions && $OptComments) {
$CaptionReportType= '(number of all postings for that time period)'
if $OptReportType eq 'sum';
}
$LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
# print newsgroup list if --newsgroups is set
$LeadIn .= sprintf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
if $OptNewsgroups;
# print boundaries, if set
my $CaptionBoundary= '(counting only month fulfilling this condition)';
@ -245,12 +246,12 @@ if ($OptCaptions && $OptComments) {
$CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
$CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
}
$LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
printf("# ----- Threshold: %s %s x %s %s %s\n",
$LowBound ? $LowBound : '',$LowBound ? '=>' : '',
$UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
if ($LowBound or $UppBound);
# print primary and secondary sort order
$LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
printf("# ----- Grouped by %s (%s), sorted %s%s\n",
($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
@ -259,7 +260,7 @@ if ($OptCaptions && $OptComments) {
# output data
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
$OptCheckgroupsFile ? $ValidGroups : '',$LeadIn,
$OptCheckgroupsFile ? $ValidGroups : '',
$OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
### close handles
@ -335,6 +336,7 @@ using B<--nocomments>.
Last but not least you can redirect all output to a number of files, e.g.
one for each month, by submitting the B<--filetemplate> option, see below.
Captions and comments are automatically disabled in this case.
=head2 Configuration
@ -587,11 +589,10 @@ False by default.
=item B<--comments|--nocomments>
Add comments (group headers) to I<dump> and I<pretty> output. True by default
as logn as B<--filetemplate> is not set.
Add comments (group headers) to I<dump> and I<pretty> output. True by default.
Use I<--nocomments> to suppress anything except newsgroup names/months and
numbers of postings.
numbers of postings. This is enforced when using B<--filetemplate>, see below.
=item B<--filetemplate> I<filename template>
@ -605,6 +606,8 @@ example with B<--filetemplate> I<stats>:
stats-2012-02
... and so on
B<--nocomments> is enforced, see above.
=item B<--groupsdb> I<database table>
Override I<DBTableGrps> from F<newsstats.conf>.

View file

@ -1,354 +0,0 @@
#!/usr/bin/perl
#
# postingstats.pl
#
# This script will create statistic postings from NewsStats output.
# It defaults to statistics for de.* posted to de.admin.lists, but
# defaults can be changed at ----- configuration -----.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010-2012, 2025 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
#
# Usage:
# $~ groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
# $~ cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
#
BEGIN {
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;
use NewsStats qw(:DEFAULT LastMonth);
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
use constant TABLEWIDTH => 28; # width of table without newsgroup name
##### ----- pre-config -----------------------------------------------
### read commandline options
my ($Month, $Type);
GetOptions ('m|month=s' => \$Month,
't|type=s' => \$Type,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
$Month = &LastMonth if !$Month;
if ($Month !~ /^\d{4}-\d{2}$/) {
$Month = &LastMonth;
&Bleat(1,"--month option has an invalid format - set to $Month.");
};
# parse $Type
if (!$Type) {
# default
$Type = 'GroupStats';
} elsif ($Type =~ /(news)?groups?/i) {
$Type = 'GroupStats';
} elsif ($Type =~ /(host|server)s?/i) {
$Type = 'HostStats';
};
my $Timestamp = time;
##### ----- configuration --------------------------------------------
my $TLH = 'de';
my %Heading = ('GroupStats' => 'Postingstatistik fuer de.* im Monat '.$Month,
'HostStats' => 'Serverstatistik fuer de.* im Monat '.$Month
);
my %TH = ('counter' => 'Nr.',
'value' => 'Anzahl',
'percentage' => 'Prozent'
);
my %LeadIn = ('GroupStats' => <<GROUPSIN, 'HostStats' => <<HOSTSIN);
From: Thomas Hochstein <thh\@thh.name>
Newsgroups: local.test
Subject: Postingstatistik fuer de.* im Monat $Month
Message-ID: <destat-postings-$Month.$Timestamp\@mid.news.szaf.org>
Approved: thh\@thh.name
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
User-Agent: postingstats.pl/$VERSION (NewsStats)
GROUPSIN
From: Thomas Hochstein <thh\@thh.name>
Newsgroups: local.test
Subject: Serverstatistik fuer de.* im Monat $Month
Message-ID: <destat-hosts-$Month.$Timestamp\@mid.news.szaf.org>
Approved: thh\@thh.name
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
User-Agent: postingstats.pl/$VERSION (NewsStats)
HOSTSIN
my %LeadOut = ('GroupStats' => <<GROUPSOUT, 'HostStats' => <<HOSTSOUT);
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
Loeschnachricht) erreicht haben. Crosspostings werden in jeder Gruppe,
in die sie gerichtet sind, gezaehlt, aber bei Ermittlung der Summe be-
reinigt; daher ist die Postinganzahl fuer de.* gesamt niedriger als die
Summe der Postinganzahlen der Einzelgruppen.
Die Daten stehen graphisch aufbereitet unter <http://usenet.dex.de/> zur
Verfuegung.
GROUPSOUT
Alle Zahlen wurden ermittelt auf einem Newsserver mit redundanter Anbin-
dung fuer de.* unter Anwendung ueblicher Filtermassnahmen. Steuernach-
richten werden nicht erfasst; Postings, die supersedet oder gecancelt
wurden, bleiben erfasst, sofern sie das System ueberhaupt (und vor der
Loeschnachricht) erreicht haben.
HOSTSOUT
##### ----- subroutines ----------------------------------------------
sub Percentage {
# calculate percentage rate from base value and percentage
my ($Base,$Percentage) = @_;
return ($Percentage * 100 / $Base);
}
sub Divider {
# build a divider line of $Symbol as wide as the table is
my ($Symbol,$MaxLength) = @_;
return ':' . $Symbol x ($MaxLength+TABLEWIDTH) . ":\n";
}
##### ----- main loop ------------------------------------------------
my (%Value, $SumName, $SumTotal, $MaxLength);
$MaxLength = 0;
if ($Type eq 'GroupStats') {
$SumName = "$TLH.ALL";
$TH{'name'} = 'Newsgroup'
} elsif ($Type eq 'HostStats') {
$SumName = 'ALL';
$TH{'name'} = 'Server'
}
# read from STDIN
while(<>) {
my ($Name, $Value) = split;
$SumTotal = $Value if $Name eq $SumName;
next if $Name =~ /ALL$/;
$Value{$Name} = $Value;
$MaxLength = length($Name) if length($Name) > $MaxLength;
}
# print to STDOUT
my $PaddingLeft = ' ' x int((($MaxLength+TABLEWIDTH-2-length($Heading{$Type}))/2));
my $PaddingRight = $PaddingLeft;
$PaddingLeft .= ' ' if (length($Heading{$Type}) + (length($PaddingLeft) * 2) < $MaxLength+TABLEWIDTH);
my $Counter = 0;
print $LeadIn{$Type};
print &Divider('=',$MaxLength);
printf(": %s%s%s :\n",$PaddingLeft,$Heading{$Type},$PaddingRight);
print &Divider('=',$MaxLength);
printf(": %-3s : %-6s : %-7s : %-*s :\n",
substr($TH{'counter'},0,3),
substr($TH{'value'},0,6),
substr($TH{'percentage'},0,7),
$MaxLength,$TH{'name'});
print &Divider('-',$MaxLength);
foreach my $Name (sort { $Value{$b} <=> $Value {$a}} keys %Value) {
$Counter++;
printf(": %3u. : %6u : %6.2f%% : %-*s :\n",$Counter,$Value{$Name},&Percentage($SumTotal,$Value{$Name}),$MaxLength,$Name);
}
print &Divider('-',$MaxLength);
printf(": : %6u : %s : %-*s :\n",$SumTotal,'100.00%',$MaxLength,'');
print &Divider('=',$MaxLength);
print $LeadOut{$Type};
__END__
################################ Documentation #################################
=head1 NAME
postingstats - format and post reports
=head1 SYNOPSIS
B<postingstats> B<-t> I<groups|hosts> [B<-Vh> [B<-m> I<YYYY-MM>]
=head1 REQUIREMENTS
See L<doc/README>.
=head1 DESCRIPTION
This script will re-format reports on newsgroup usage created by
B<groupstats.pl> or B<cliservstats.pl> and create a message that can
be posted to Usenet.
=head2 Features and options
B<postingstats> will create a table with entries numbered from most
to least and percentages calculated from the sum total of all values.
It depends on a sorted list on STDIN in I<dump> format with I<sums>.
B<postingstats> needs a B<--type> and a B<--month> to create a caption
and select matching lead-ins and lead-outs. B<--type> is also needed
to catch the correct sum total from input.
It will default to posting statistics (number of postings per group)
and last month.
Output from B<postingstats> can be piped to any C<inews> implementation,
e.g. C<tinews.pl> from L<ftp://ftp.tin.org/pub/news/clients/tin/tools/tinews.pl>
(present in C</contrib/>).
=head2 Configuration
Configuration is done by changing the code in the
C<----- configuration -----> section.
=over 3
=item C<$TLH>
Top level hierarchy the report was created for. Used for display and
sum total.
=item C<%Heading>
Hash with keys for I<GroupStats> and I<HostStats>. Used to display a
heading.
=item C<%TH>
Hash with keys for I<counter>, I<value> and I<percentage>. Used to
create the table header for I<number>, I<quantity> and I<percentage>.
I<counter> must not be longer than 3 characters, I<value> no longer
than 6 characters and I<percentage> no longer than 7 characters.
Output will be truncated otherwise.
=item C<%LeadIn>
Hash with keys for I<GroupStats> and I<HostStats>. Used to create the
headers for our posting. Can contain other text that will be shown
before C<%Heading>.
=item C<%LeadOut>
Hash with keys for I<GroupStats> and I<HostStats>. Will be shown at the
end of our posting.
=back
=head1 OPTIONS
=over 3
=item B<-V>, B<--version>
Print out version and copyright information and exit.
=item B<-h>, B<--help>
Print this man page and exit.
=item B<-t>, B<--type> I<groups|hosts>
Set report type to posting statistics or hosts statistics accordingly.
=item B<-m>, B<--month> I<YYYY-MM>
Set month for display.
=back
=head1 INSTALLATION
See L<doc/INSTALL>.
=head1 USAGE
Create a posting from a posting statistics report for last month:
groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups
Create a posting from a posting statistics report for 2012-01:
groupstats.pl --nocomments --sums --format dump | postingstats.pl -t groups -m 2012-01
Create a posting from a host statistics report for last month:
cliservstats.pl -t server --nocomments --sums --format dump | postingstats.pl -t hosts
=head1 FILES
=over 4
=item F<bin/postingstats.pl>
The script itself.
=item F<lib/NewsStats.pm>
Library functions for the NewsStats package.
=item F<etc/newsstats.conf>
Runtime configuration file.
=back
=head1 BUGS
Please report any bugs or feature requests to the author or use the
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!
=head1 SEE ALSO
=over 2
=item -
L<doc/README>
=item -
l>doc/INSTALL>
=item -
groupstats -h
=item -
cliservstats -h
=back
This script is part of the B<NewsStats> package.
=head1 AUTHOR
Thomas Hochstein <thh@thh.name>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2012, 2025 Thomas Hochstein <thh@thh.name>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=cut

View file

@ -1,9 +0,0 @@
#!/bin/bash
# installation path is /srv/newsstats/, please adjust accordingly
if [[ $1 =~ [0-9]{4}-[0-9]{2} ]]; then
/srv/newsstats/bin/groupstats.pl --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
/srv/newsstats/bin/cliservstats.pl -t server --nocomments --sums --format dump --month $1 | /srv/newsstats/bin/postingstats.pl -t server --month $1 | /srv/newsstats/contrib/tinews.pl -X -Y
else
echo 'Input error, please use dopostingstats.sh YYYY-MM'
fi

File diff suppressed because it is too large Load diff

View file

@ -1,11 +0,0 @@
#!/bin/bash
# installation path is /srv/newsstats/, please adjust accordingly
# $1: newsgroup
echo "Stats for $1"
cd /srv/newsstats/
for year in {2012..2022}
do
echo -n "${year}: "
bin/groupstats.pl -m $year-01:$year-12 -r sums -n $1
done

View file

@ -1,28 +1,3 @@
NewsStats 0.3.0 (2025-05-18)
* Extract GroupStats (in gatherstats) to subroutine.
* Add ParseHeader() to library.
* Add HostStats to gatherstats.
* Add DBTableHosts structure to install script.
* Add cliservstats (for hosts and clients).
- Refactor SQL generators.
* Add --mid option to gatherstats for debugging purposes.
* Don't parse NNTP-Posting-Host to determine the server name.
* Add more known hosts.
* Implement hierarchy check on gatherstats.
* Add sums per month to HostStats.
* Add postingstats and refactor it:
- Make all text configurable (i18n).
- Generalize to make it usable for HostStats.
- Fallback to last month if no month is given.
- Add option handling, import VERSION, add POD.
* Update README, INSTALL and ChangeLog.
* Don't enforce --nocomment for --filetemplate, just default to it.
Change caption handling, update documentation accordingly.
* Fix call to GetMaxLength() in cliservstats.
* Fix typos in documentation, update sample config file.
* Move database creation from install/install.pl to bin/dbcreate.pl
* Add tinews.pl and some shell scripts to /contrib.
NewsStats 0.2.0 (2025-05-10)
* Redo directory structure:
- Move all scripts to /bin

View file

@ -1,4 +1,4 @@
NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
NewsStats (c) 2010-2013 Thomas Hochstein <thh@thh.name>
NewsStats is a software package used to gather statistical information
from a live Usenet feed and for its subsequent examination.
@ -14,7 +14,7 @@ INSTALLATION INSTRUCTIONS
1) Install the scripts
* Download the current version of NewsStats from
<https://th-h.de/net/software/newsstats/>.
<http://th-h.de/download/scripts.php>.
* Untar it into a directory of your choice:
@ -57,9 +57,6 @@ INSTALLATION INSTRUCTIONS
* DBTableGrps = groups_de
Table holding data on postings per group.
* DBTableHosts = hosts_de
Table holding data on postings per server.
b) Optional configuration options
* TLH = de
@ -70,9 +67,9 @@ INSTALLATION INSTRUCTIONS
* Setup your database server with a username, password and
database matching the NewsStats configuration (see 2 a).
* Start the database creation script:
* Start the installation script:
# bin/dbcreate.pl
# install/install.pl
It will setup the necessary database tables and display some
information on the next steps.

View file

@ -1,4 +1,4 @@
NewsStats (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
NewsStats (c) 2010-2013 Thomas Hochstein <thh@thh.name>
NewsStats is a software package for gathering statistical data live
from a Usenet feed and subsequent examination.
@ -13,7 +13,7 @@ What's that?
There's a multitude of tools for the statistical examination of
newsgroups: number of postings per month or per person, longest
threads, and so on (see <https://th-h.de/net/usenet/stats/>
threads, and so on (see <http://th-h.de/infos/usenet/stats.php>
[German language] for an incomplete list). Most of them use a per-
newsgroup approach while NewsStats is hierarchy oriented.
@ -43,7 +43,7 @@ Prerequisites
- File::Basename
- Sys::Syslog
* Perl modules from CPAN
* Perl modules form CPAN
- Config::Auto
- Date::Format
- DBI
@ -56,26 +56,19 @@ Installation instructions
See INSTALL.
Documentation is in /doc, configuration in /etc, the NewsStats
module in /lib and most scripts in /bin, while /contrib has some
sample scripts that may have to be adjusted to work in your
configuration.
Getting Started
'feedlog.pl' will continuously feed raw data to your raw data
table. See the feedlog.pl man page for more information.
You can process that data via 'gatherstats.pl'; currently the
tabulation of postings per group and injection server per month is
supported. Tabulation of clients (newsreaders) is planned. See
the gatherstats.pl man page for more information.
You can process that data via 'gatherstats.pl'; currently only the
tabulation of postings per group and month is supported. More to
come. See the gatherstats.pl man page for more information.
Report generation is handled by specialised scripts for each
report type. Currently reports on the number of postings per group
and month and injection server and month are supported; you can
use 'groupstats.pl' and 'cliservstats.pl' for that. See the
groupstats.pl and cliservstats.pl man pages for more information.
report type. Currently only reports on the number of postings per
group and month are supported; you can use 'groupstats.pl' for
this. See the groupstats.pl man page for more information.
Reporting Bugs
@ -100,3 +93,4 @@ Author
Thomas Hochstein <thh@thh.name>
<https://th-h.de/>

View file

@ -28,7 +28,8 @@ NewsStats.
mentioned information; and you should be able to get the history of any
group.
- Add other reports
NewsStats should include some other kinds of reports (stats on used clients)
NewsStats should include some other kinds of reports (stats on used clients,
on postings hosts/servers, ...)
- Add tools for database management
NewsStats should offer tools e.g. to inject postings into the 'raw' database,
or to split databases.
@ -64,7 +65,7 @@ NewsStats.
+ gatherstats.pl
- Use hierarchy information (see GroupInfo above)
- Add gathering of other stats (clients, ...)
- Add gathering of other stats (clients, hosts, ...)
- better modularisation (code reuse for other reports!)
- Add / enhance / test error handling
- General tests and optimisations

View file

@ -12,8 +12,8 @@ DBDatabase = newsstats
#
DBTableRaw = raw_de
DBTableGrps = groups_de
DBTableHosts = hosts_de
#DBTableClnts =
#DBTableHosts =
### hierarchy configuration
TLH = de

View file

@ -1,19 +1,19 @@
#! /usr/bin/perl
#
# dbcreate.pl
# install.pl
#
# This script will create database tables as necessary.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
use File::Basename;
# we're in .../bin, so our module is in ../lib
# we're in .../install, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
}
use strict;
@ -46,7 +46,7 @@ my $DBCreate = <<SQLDB;
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
SQLDB
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS, 'DBTableHosts' => <<HOSTS);
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);
--
-- Table structure for table DBTableRaw
--
@ -82,26 +82,10 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `month_newsgroup` (`month`,`newsgroup`),
KEY `month` (`month`),
KEY `newsgroup` (`newsgroup`)
KEY `newsgroup` (`newsgroup`),
KEY `postings` (`postings`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per newsgroup';
GRPS
--
-- Table structure for table DBTableHosts
--
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableHosts'}` (
`id` bigint(20) unsigned NOT NULL auto_increment,
`month` varchar(7) character set ascii NOT NULL,
`host` varchar(100) NOT NULL,
`postings` int(11) NOT NULL,
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `month_host` (`month`,`host`),
KEY `month` (`month`),
KEY `host` (`host`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per server';
HOSTS
##### --------------------------------------------------------------------------
##### Installation / upgrade instructions
@ -169,8 +153,8 @@ UPGRADE
if (!$OptUpdate) {
print "----------\nStarting database creation.\n";
# create database
# we can't use InitDB() as that will use the database name of
# the database that doesn't exist yet ...
# we can't use InitDB() as that will use a table name of
# the table that doesn't exist yet ...
my $DBHandle = DBI->connect(sprintf('DBI:%s:host=%s',$Conf{'DBDriver'},
$Conf{'DBHost'}), $Conf{'DBUser'},
$Conf{'DBPw'}, { PrintError => 0 });
@ -178,7 +162,7 @@ if (!$OptUpdate) {
$DBQuery->execute() or &Bleat(2, sprintf("Can't create database %s: %s%\n",
$Conf{'DBDatabase'}, $DBI::errstr));
printf("Database %s created succesfully.\n",$Conf{'DBDatabase'});
printf("Database table %s created succesfully.\n",$Conf{'DBDatabase'});
$DBHandle->disconnect;
};
@ -201,7 +185,7 @@ if (!$OptUpdate) {
} else {
##### upgrade mode
print "----------\nStarting upgrade process.\n";
my $PackageVersion = '0.03';
$PackageVersion = '0.03';
if ($OptUpdate < $PackageVersion) {
if ($OptUpdate < 0.02) {
# 0.01 -> 0.02
@ -231,7 +215,7 @@ sub CreateTable {
};
my $DBQuery = $DBHandle->prepare($DBCreate{$Table});
$DBQuery->execute() or
&Bleat(2, sprintf("Can't create table %s in database %s: %s\n",$Table,
&Bleat(2, sprintf("Can't create table %s in database %s: %s%\n",$Table,
$Conf{'DBDatabase'},$DBI::errstr));
printf("Database table %s.%s created succesfully.\n",
$Conf{'DBDatabase'},$Conf{$Table});
@ -261,11 +245,11 @@ __END__
=head1 NAME
dbcreate - database creation script
install - installation script
=head1 SYNOPSIS
B<dbcreate> [B<-Vh> [--update I<version>] [B<--conffile> I<filename>]
B<install> [B<-Vh> [--update I<version>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
@ -273,12 +257,11 @@ See L<doc/README>.
=head1 DESCRIPTION
This script will create a database and database tables as necessary
and configured.
This script will create database tables as necessary and configured.
=head2 Configuration
B<dbcreate> will read its configuration from F<newsstats.conf> which should
B<install> will read its configuration from F<newsstats.conf> which should
be present in etc/ via Config::Auto or from a configuration file submitted
by the B<--conffile> option.
@ -310,7 +293,7 @@ Load configuration from I<filename> instead of F<newsstats.conf>.
=over 4
=item F<bin/dbcreate.pl>
=item F<install/install.pl>
The script itself.
@ -351,7 +334,7 @@ Thomas Hochstein <thh@thh.name>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.

View file

@ -2,7 +2,7 @@
#
# Library functions for the NewsStats package.
#
# Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh@thh.name>
# Copyright (c) 2010-2013 Thomas Hochstein <thh@thh.name>
#
# This module can be redistributed and/or modified under the same terms under
# which Perl itself is published.
@ -34,7 +34,6 @@ require Exporter;
ListNewsgroups
ParseHierarchies
ReadGroupList
ParseHeaders
OutputData
FormatOutput
SQLHierarchies
@ -49,7 +48,7 @@ require Exporter;
Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
SQLSetBounds SQLBuildClause GetMaxLength)]);
$VERSION = '0.3.0';
$VERSION = '0.2.0';
use Data::Dumper;
use File::Basename;
@ -77,7 +76,7 @@ sub ShowVersion {
################################################################################
### display version and exit
print "$0 from NewsStats v$VERSION\n";
print "Copyright (c) 2010-2013, 2025 Thomas Hochstein <thh\@thh.name>\n";
print "Copyright (c) 2010-2013 Thomas Hochstein <thh\@thh.name>\n";
print "This program is free software; you may redistribute it ".
"and/or modify it under the same terms as Perl itself.\n";
exit(100);
@ -255,42 +254,6 @@ sub ReadGroupList {
return \%ValidGroups;
};
################################################################################
sub ParseHeaders {
################################################################################
### return a hash of all headers (ignoring duplicate headers)
### parsed from raw headers
### -> taken and modified from pgpverify
### -> Written April 1996, <tale@isc.org> (David C Lawrence)
### -> Currently maintained by Russ Allbery <eagle@eyrie.org>
### IN : $RawHeaders : raw headers as found in posting
### OUT: %Headers : hash containing header contents,
### keyed by lower-case header name
my (%Header, $Label, $Value);
foreach (@_) {
s/\r?\n$//;
last if /^$/;
if (/^(\S+):[ \t](.+)/) {
($Label, $Value) = ($1, $2);
# discard all duplicate headers
next if $Header{lc($Label)};
$Header{lc($Label)} = $Value;
} elsif (/^\s/) {
# continuation lines
if ($Label) {
$Header{lc($Label)} .= "\n$_";
} else {
warn (sprintf("Non-header line: %s\n",$_));
}
} else {
warn (sprintf("Non-header line: %s\n",$_));
}
}
return %Header;
};
################################################################################
#####----------------------------- TimePeriods ----------------------------#####
@ -430,13 +393,12 @@ sub OutputData {
### $GroupBy : primary sorting order (month or key)
### $Precision: number of digits right of decimal point (0 or 2)
### $ValidKeys: reference to a hash containing all valid keys
### $LeadIn : print at start of output
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
### $DBQuery : database query handle with executed query,
### containing $Month, $Key, $Value
### $PadField : padding length for key field (optional) for 'pretty'
### $PadValue : padding length for value field (optional) for 'pretty'
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $LeadIn, $FileTempl,
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
$DBQuery, $PadField, $PadValue) = @_;
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
my ($FileName, $Handle, $OUT);
@ -479,8 +441,8 @@ sub OutputData {
$FileName));
$Handle = $OUT;
};
print $Handle &FormatOutput($Format, $Comments, $LeadIn, $Caption,
$Key, $Value, $Precision, $PadField, $PadValue);
print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
$Precision, $PadField, $PadValue);
$LastIteration = $Caption;
};
close $OUT if ($FileTempl);
@ -499,7 +461,7 @@ sub FormatOutput {
### $PadField : padding length for key field (optional) for 'pretty'
### $PadValue : padding length for value field (optional) for 'pretty'
### OUT: $Output: formatted output
my ($Format, $Comments, $LeadIn, $Caption, $Key, $Value, $Precision, $PadField,
my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
$PadValue) = @_;
my ($Output);
# keep last caption in mind
@ -515,10 +477,8 @@ sub FormatOutput {
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
} elsif ($Format eq 'pretty') {
# output as a table
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) {
$Output = $LeadIn;
$Output .= sprintf ("# ----- %s:\n",$Caption);
}
$Output = sprintf ("# ----- %s:\n",$Caption)
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
# increase $PadValue for numbers with decimal point
$PadValue += $Precision+1 if $Precision;
# add padding if $PadField is set; $PadValue HAS to be set then
@ -582,22 +542,21 @@ sub SQLSortOrder {
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
### $OrderBy: secondary sort by month/newsgroups (default)
### or number of 'postings'
### $Type : newsgroup, host, client
### OUT: a SQL ORDER BY clause
my ($GroupBy,$OrderBy,$Type) = @_;
my ($GroupBy,$OrderBy) = @_;
my ($GroupSort,$OrderSort) = ('','');
# $GroupBy (primary sorting)
if (!$GroupBy) {
$GroupBy = 'month';
} else {
($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
if ($GroupBy =~ /name/i) {
$GroupBy = $Type;
if ($GroupBy =~ /group/i) {
$GroupBy = 'newsgroup';
} else {
$GroupBy = 'month';
}
}
my $Secondary = ($GroupBy eq 'month') ? $Type : 'month';
my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
# $OrderBy (secondary sorting)
if (!$OrderBy) {
$OrderBy = $Secondary;
@ -633,45 +592,44 @@ sub SQLParseOrder {
################################################################################
sub SQLGroupList {
################################################################################
### explode list of names separated by : (with wildcards)
### explode list of newsgroups separated by : (with wildcards)
### to a SQL 'WHERE' expression
### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
### $Type : newsgroup, host, client
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
### OUT: SQL code to become part of a 'WHERE' clause,
### list of names for SQL bindings
my ($Names,$Type) = @_;
### list of newsgroups for SQL bindings
my ($Newsgroups) = @_;
# substitute '*' wildcard with SQL wildcard character '%'
$Names =~ s/\*/%/g;
return (undef,undef) if !CheckValidNames($Names);
# just one name/newsgroup?
return (SQLGroupWildcard($Names,$Type),$Names) if $Names !~ /:/;
my ($SQL,@WildcardNames,@NoWildcardNames);
# list of names/newsgroups separated by ':'
my @NameList = split /:/, $Names;
foreach (@NameList) {
$Newsgroups =~ s/\*/%/g;
return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
# just one newsgroup?
return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
my ($SQL,@WildcardGroups,@NoWildcardGroups);
# list of newsgroups separated by ':'
my @GroupList = split /:/, $Newsgroups;
foreach (@GroupList) {
if ($_ !~ /%/) {
# add to list of names/newsgroup names WITHOUT wildcard
push (@NoWildcardNames,$_);
# add to list of newsgroup names WITHOUT wildcard
push (@NoWildcardGroups,$_);
} else {
# add to list of names WITH wildcard
push (@WildcardNames,$_);
# add to list of newsgroup names WITH wildcard
push (@WildcardGroups,$_);
# add wildcard to SQL clause
# 'OR' if SQL clause is not empty
$SQL .= ' OR ' if $SQL;
$SQL .= "$Type LIKE ?"
$SQL .= 'newsgroup LIKE ?'
}
};
if (scalar(@NoWildcardNames)) {
if (scalar(@NoWildcardGroups)) {
# add 'OR' if SQL clause is not empty
$SQL .= ' OR ' if $SQL;
if (scalar(@NoWildcardNames) < 2) {
# special case: just one name without wildcard
$SQL .= "$Type = ?";
if (scalar(@NoWildcardGroups) < 2) {
# special case: just one newsgroup without wildcard
$SQL .= 'newsgroup = ?';
} else {
# create list of names to include: e.g. 'newsgroup IN (...)'
$SQL .= "$Type IN (";
# create list of newsgroups to include: 'newsgroup IN (...)'
$SQL .= 'newsgroup IN (';
my $SQLin;
foreach (@NoWildcardNames) {
foreach (@NoWildcardGroups) {
$SQLin .= ',' if $SQLin;
$SQLin .= '?';
}
@ -679,28 +637,27 @@ sub SQLGroupList {
$SQL .= $SQLin .= ')';
}
}
# add brackets '()' to SQL clause as needed (more than one wildcard name)
if (scalar(@WildcardNames)) {
# add brackets '()' to SQL clause as needed (more than one wildcard group)
if (scalar(@WildcardGroups)) {
$SQL = '(' . $SQL .')';
}
# rebuild @NameList in (now) correct order
@NameList = (@WildcardNames,@NoWildcardNames);
return ($SQL,@NameList);
# rebuild @GroupList in (now) correct order
@GroupList = (@WildcardGroups,@NoWildcardGroups);
return ($SQL,@GroupList);
};
################################################################################
sub SQLGroupWildcard {
################################################################################
### build a valid SQL 'WHERE' expression with or without wildcards
### IN : $Name: expression, probably with wildcard
### IN : $Newsgroup: newsgroup expression, probably with wildcard
### (group.name or group.name.%)
### $Type: newsgroup, host, client
### OUT: SQL code to become part of a 'WHERE' clause
my ($Name,$Type) = @_;
if ($Name !~ /%/) {
return "$Type = ?";
my ($Newsgroup) = @_;
if ($Newsgroup !~ /%/) {
return 'newsgroup = ?';
} else {
return "$Type LIKE ?";
return 'newsgroup LIKE ?';
}
};
@ -802,14 +759,14 @@ sub SQLBuildClause {
#####--------------------------- Verifications ----------------------------#####
################################################################################
sub CheckValidNames {
sub CheckValidNewsgroups {
################################################################################
### syntax check of a list
### IN : $Names: list of names, e.g. newsgroups (group.one.*:group.two:group.three.*)
### syntax check of newgroup list
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
### OUT: boolean
my ($Names) = @_;
my ($Newsgroups) = @_;
my $InvalidCharRegExp = ',; ';
return ($Names =~ /[$InvalidCharRegExp]/) ? 0 : 1;
return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
};