From 66a175c7f8d8a35b7c015b68c897eecec84244e7 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Fri, 30 May 2025 19:48:02 +0200 Subject: [PATCH] Add clientstats (for clients). Signed-off-by: Thomas Hochstein --- bin/clientstats.pl | 598 +++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeLog | 1 + lib/NewsStats.pm | 10 +- 3 files changed, 606 insertions(+), 3 deletions(-) create mode 100644 bin/clientstats.pl diff --git a/bin/clientstats.pl b/bin/clientstats.pl new file mode 100644 index 0000000..0f7ae20 --- /dev/null +++ b/bin/clientstats.pl @@ -0,0 +1,598 @@ +#! /usr/bin/perl +# +# clientstats.pl +# +# This script will get statistical data on newsreader (client) usage +# from a database. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2025 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + use File::Basename; + # we're in .../bin, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); +} +use strict; +use warnings; + +use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); + +use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); + +################################# Main program ################################# + +### read commandline options +my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat, + $OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy, + $OptReportType,$OptSums,$UppBound,$OptVersions,$OptConfFile); +GetOptions ('c|captions!' => \$OptCaptions, + 'comments!' => \$OptComments, + 'db=s' => \$OptDB, + 'filetemplate=s' => \$OptFileTemplate, + 'f|format=s' => \$OptFormat, + 'g|group-by=s' => \$OptGroupBy, + 'l|lower=i' => \$LowBound, + 'm|month=s' => \$OptMonth, + 'n|names=s' => \$OptNames, + 'o|order-by=s' => \$OptOrderBy, + 'r|report=s' => \$OptReportType, + 's|sums!' => \$OptSums, + 'u|upper=i' => \$UppBound, + 'v|versions!' => \$OptVersions, + 'conffile=s' => \$OptConfFile, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; +# parse parameters +# $OptComments defaults to TRUE if --filetemplate is not used +$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments)); +# parse $OptReportType +if ($OptReportType) { + if ($OptReportType =~ /sums?/i) { + $OptReportType = 'sum'; + } else { + $OptReportType = 'default'; + } +} + +### read configuration +my %Conf = %{ReadConfig($OptConfFile)}; + +### set DBTable +$Conf{'DBTable'} = $Conf{'DBTableClnts'}; +$Conf{'DBTable'} = $OptDB if $OptDB; + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get time period and names, prepare SQL 'WHERE' clause +# get time period +# and set caption for output and expression for SQL 'WHERE' clause +my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); +# bail out if --month is invalid +&Bleat(2,"--month option has an invalid format - ". + "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; +# get list of clients and set expression for SQL 'WHERE' clause +# with placeholders as well as a list of names to bind to them +my ($SQLWhereNames,@SQLBindNames); +if ($OptNames) { + ($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'client'); + # bail out if --names is invalid + &Bleat(2,"--names option has an invalid format!") + if !$SQLWhereNames; +} + +### build SQL WHERE clause +my $ExcludeSums = $OptSums ? '' : "client != 'ALL'"; +my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames, + $ExcludeSums,"version = 'ALL'", + &SQLSetBounds('default',$LowBound,$UppBound)); + +### get sort order and build SQL 'ORDER BY' clause +# force to 'month' for $OptReportType 'sum' +$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); +# default to 'name' if $OptGroupBy is not set and +# just one name is requested, but more than one month +$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/ + and $OptNames and $OptNames !~ /[:*%]/); +# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause +# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month' +my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'client, version'); +# $GroupBy will contain 'month' or 'client, version' (parsed result of $OptGroupBy) +# set it to 'month' or 'key' for OutputData() +$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; + +### get report type and build SQL 'SELECT' query +my $SQLSelect; +my $SQLGroupClause = ''; + +if ($OptReportType and $OptReportType ne 'default') { + $SQLGroupClause = "GROUP BY client, version"; + # change $SQLOrderClause: replace everything before 'postings' + $SQLOrderClause =~ s/BY.+postings/BY postings/; + $SQLSelect = "'All months',LEFT(client,40),SUM(postings)"; + # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' + $SQLOrderClause =~ s/postings/SUM(postings)/; + } else { + $SQLSelect = "month,LEFT(client,40),postings"; +}; + +### get length of longest name delivered by query +### for formatting purposes +my $Field = ($GroupBy eq 'month') ? 'LEFT(client,40)' : 'month'; +my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'}, + $Field,'postings',$SQLWhereClause, + '',@SQLBindNames); + +### build and execute SQL query +my ($DBQuery); +# prepare query +$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', + $SQLSelect, + $Conf{'DBDatabase'},$Conf{'DBTable'}, + $SQLWhereClause,$SQLGroupClause, + $SQLOrderClause)); +# execute query +$DBQuery->execute(@SQLBindNames) + or &Bleat(2,sprintf("Can't get client data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'}, + $DBI::errstr)); + +### output results +# set default to 'pretty' +$OptFormat = 'pretty' if !$OptFormat; +# print captions if --caption is set +my $LeadIn; +if ($OptCaptions && $OptComments) { + # print time period with report type + my $CaptionReportType = '(number of postings for each month)'; + if ($OptReportType and $OptReportType ne 'default') { + $CaptionReportType = '(number of all postings for that time period)'; + } + $LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); + # print name list if --names is set + $LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames))) + if $OptNames; + # print boundaries, if set + my $CaptionBoundary= '(counting only months fulfilling this condition)'; + $LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n", + $LowBound ? $LowBound : '',$LowBound ? '=>' : '', + $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) + if ($LowBound or $UppBound); + # print primary and secondary sort order + $LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n", + ($GroupBy eq 'month') ? 'Months' : 'Names', + ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', + ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', + ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); +} + +# output data +# (changed code copy from NewsStats::OutputData) +my ($LastIteration, $FileName, $Handle, $OUT); + +# define output types +my %LegalOutput; +@LegalOutput{('dump','list','pretty')} = (); +# bail out if format is unknown +&Bleat(2,"Unknown output type '$OptFormat'!") if !exists($LegalOutput{$OptFormat}); + +while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { + # save client for later use + my $Client = $Key; + # care for correct sorting order and abstract from month and keys: + # $Caption will be $Month or $Key, according to sorting order, + # and $Key will be $Key or $Month, respectively + my $Caption; + if ($GroupBy eq 'key') { + $Caption = $Key; + $Key = $Month; + } else { + $Caption = $Month; + } + # set output file handle + if (!$OptFileTemplate) { + $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT + } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { + close $OUT if ($LastIteration); + # safeguards for filename creation: + # replace potential problem characters with '_' + $FileName = sprintf('%s-%s',$OptFileTemplate,$Caption); + $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; + open ($OUT,">$FileName") + or &Bleat(2,sprintf("Cannot open output file '%s': $!", + $FileName)); + $Handle = $OUT; + }; + print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, $Caption, + $Key, $Value, 0, $MaxLength, $MaxValLength, $LastIteration); + # output client versions + if ($OptVersions) { + ### get client versions + # $SQLWhereClause without 'ALL' version + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames, + $ExcludeSums,"version != 'ALL'","client = '$Client'", + &SQLSetBounds('default',$LowBound,$UppBound)); + + # save length of longest client + my $ClientMaxLenght = $MaxLength; + my $ClientMaxValLenght = $MaxValLength; + # get length of longest version delivered by query + # for formatting purposes + my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'}, + 'version','postings',$SQLWhereClause, + '',@SQLBindNames); + if ($MaxLength) { + # add lenght of '- ' + $MaxLength += 2; + # set to length of longest client, if longer + $MaxLength = $ClientMaxLenght if $ClientMaxLenght > $MaxLength; + $MaxValLength = $ClientMaxValLenght if $ClientMaxValLenght > $MaxValLength; + } + + # prepare query + my $DBVersQuery = $DBHandle->prepare(sprintf('SELECT version,postings FROM %s.%s %s %s %s', + $Conf{'DBDatabase'},$Conf{'DBTable'}, + $SQLWhereClause,$SQLGroupClause, + $SQLOrderClause)); + # execute query + $DBVersQuery->execute(@SQLBindNames) + or &Bleat(2,sprintf("Can't get version data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'}, + $DBI::errstr)); + # output versions + while (my ($Version, $Postings) = $DBVersQuery->fetchrow_array) { + $Version = '- ' . $Version; + print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, '', + $Version, $Postings, 0, $MaxLength, $MaxValLength, + ''); + } + } + $LastIteration = $Caption; +}; +close $OUT if ($OptFileTemplate); + +### close handles +$DBHandle->disconnect; + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +clientstats - create reports on client usage + +=head1 SYNOPSIS + +B [B<-Vhcs> B<--comments>] [B<-m> I[:I] | I] [B<-n> I] [B<-r> I] [B<-l> I] [B<-u> I] [B<-g> I] [B<-o> I] [B<-f> I] [B<--filetemplate> I] [B<--db> I] [B<--conffile> I] + +=head1 REQUIREMENTS + +See L. + +=head1 DESCRIPTION + +This script create reports on newsgroup usage (number of postings +using each client per month) taken from result tables created by +B. + +=head2 Features and options + +=head3 Time period and names + +The time period to act on defaults to last month; you can assign another +time period or a single month (or drop all time constraints) via the +B<--month> option (see below). + +B will process all clients by default; you can limit +processing to only some clients by supplying a list of those names by +using the B<--names> option (see below). + +=head3 Report type + +You can choose between different B<--report> types: postings per month +or all postings summed up; for details, see below. + +=head3 Upper and lower boundaries + +Furthermore you can set an upper and/or lower boundary to exclude some +results from output via the B<--lower> and B<--upper> options, +respectively. By default, all clients with more and/or less postings +per month will be excluded from the result set (i.e. not shown and +not considered for sum reports). + +=head3 Sorting and formatting the output + +By default, all results are grouped by month; you can group results by +clients instead via the B<--group-by> option. Within those groups, +the list of clients (or months) is sorted alphabetically +(or chronologically, respectively) ascending. You can change that order +(and sort by number of postings) with the B<--order-by> option. For +details and exceptions, please see below. + +The results will be formatted as a kind of table; you can change the +output format to a simple list or just a list of names and number of +postings with the B<--format> option. Captions will be added by means of +the B<--caption> option; all comments (and captions) can be supressed by +using B<--nocomments>. + +Last but not least you can redirect all output to a number of files, e.g. +one for each month, by submitting the B<--filetemplate> option, see below. + +=head2 Configuration + +B will read its configuration from F +which should be present in etc/ via Config::Auto or from a configuration file +submitted by the B<--conffile> option. + +See doc/INSTALL for an overview of possible configuration options. + +You can override some configuration options via the B<--db> option. + +=head1 OPTIONS + +=over 3 + +=item B<-V>, B<--version> + +Print out version and copyright information and exit. + +=item B<-h>, B<--help> + +Print this man page and exit. + +=item B<-m>, B<--month> I + +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). By using the keyword I instead, you can set no +processing period to process the whole database. + +=item B<-n>, B<--names> I + +Limit processing to a certain set of client names. I +can be a single name (eternal-september.org), a group of names +(*.inka.de) or a list of either of these, separated by colons, for +example + + eternal-september.org:solani.org:*.inka.de + +=item B<-s>, B<--sums|--nosums> (sum per month) + +Include "virtual" clients named "ALL" for every month in output, +containing the sum of all detected clients for that month. + +=item B<-r>, B<--report> I + +Choose the report type: I or I + +By default, B will report the number of postings for each +client in each month. But it can also report the total sum of postings +per client for all months. + +For report type I, the B option has no meaning and +will be silently ignored (see below). + +=item B<-l>, B<--lower> I + +Set the lower boundary. See below. + +=item B<-l>, B<--upper> I + +Set the upper boundary. + +By default, all clients with more postings per month than the +upper boundary and/or less postings per month than the lower boundary +will be excluded from further processing. For the default report that +means each month only /clients with a number of postings between +the boundaries will be displayed. For the sums report, /clients +with a number of postings exceeding the boundaries in all (!) months +will not be considered. + +=item B<-g>, B<--group-by> I + +By default, all results are grouped by month, sorted chronologically in +ascending order, like this: + + # ----- 2012-01: + arcor-online.net : 9379 + individual.net : 19525 + news.albasani.net: 9063 + # ----- 2012-02: + arcor-online.net : 8606 + individual.net : 16768 + news.albasani.net: 7879 + +The results can be grouped by client instead via +B<--group-by> I: + + ----- individual.net + 2012-01: 19525 + 2012-02: 16768 + ----- arcor-online.net + 2012-01: 9379 + 2012-02: 8606 + ----- news.albasani.net + 2012-01: 9063 + 2012-02: 7879 + +By appending I<-desc> to the group-by option parameter, you can reverse +the sort order - e.g. B<--group-by> I will give: + + # ----- 2012-02: + arcor-online.net : 8606 + individual.net : 16768 + news.albasani.net: 7879 + # ----- 2012-01: + arcor-online.net : 9379 + individual.net : 19525 + news.albasani.net: 9063 + +Sums reports (see above) will always be grouped by months; this option +will therefore be ignored. + +=item B<-o>, B<--order-by> I + +Within each group (a single month or single client, see above), +the report will be sorted by name (or month) in ascending alphabetical +order by default. You can change the sort order to descending or sort +by number of postings instead. + +=item B<-f>, B<--format> I + +Select the output format, I being the default: + + # ----- 2012-01: + arcor-online.net : 9379 + individual.net : 19525 + # ----- 2012-02: + arcor-online.net : 8606 + individual.net : 16768 + +I format looks like this: + + 2012-01 arcor-online.net 9379 + 2012-01 individual.net 19525 + 2012-02 arcor-online.net 8606 + 2012-02 individual.net 16768 + +And I format looks like this: + + # 2012-01: + arcor-online.net 9379 + individual.net 19525 + # 2012-02: + arcor-online.net 8606 + individual.net 16768 + +You can remove the comments by using B<--nocomments>, see below. + +=item B<-c>, B<--captions|--nocaptions> + +Add captions to output, like this: + + ----- Report for 2012-01 to 2012-02 (number of postings for each month) + ----- Names: individual.net + ----- Threshold: 8000 => x (counting only month fulfilling this condition) + ----- Grouped by Month (ascending), sorted by number of postings descending + +False by default. + +=item B<--comments|--nocomments> + +Add comments (group headers) to I and I output. True by default +as logn as B<--filetemplate> is not set. + +Use I<--nocomments> to suppress anything except client names or months and +numbers of postings. + +=item B<--filetemplate> I + +Save output to file(s) instead of dumping it to STDOUT. B +will create one file for each month (or each client, according to the +setting of B<--group-by>, see above), with filenames composed by adding +year and month (or client names) to the I, for +example with B<--filetemplate> I: + + stats-2012-01 + stats-2012-02 + ... and so on + +=item B<--db> I + +Override I or I from F. + +=item B<--conffile> I + +Load configuration from I instead of F. + +=back + +=head1 INSTALLATION + +See L. + +=head1 EXAMPLES + +Show number of postings per group for lasth month in I format: + + clientstats + +Show that report for January of 2010 and *.inka plus individual.net: + + clientstats --month 2010-01 --names *.inka:individual.net: + +Only show clients with 30 postings or less last month, ordered +by number of postings, descending, in I format: + + clientstats --upper 30 --order-by postings-desc + +List number of postings per host for each month of 2010 and redirect +output to one file for each month, named hosts-2010-01 and so on, in +machine-readable form (without formatting): + + clientstats -m 2010-01:2010-12 -f dump --filetemplate hosts + + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +L + +=item - + +L + +=item - + +gatherstats -h + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2025 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/doc/ChangeLog b/doc/ChangeLog index 4a54296..309ff60 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -6,6 +6,7 @@ NewsStats 0.4.0 (unreleased) * ParseHeader: re-merge continuation lines. * Add ClientStats to gatherstats. * Move cliservstats to hoststats. + * Add clientstats (for clients). NewsStats 0.3.0 (2025-05-18) * Extract GroupStats (in gatherstats) to subroutine. diff --git a/lib/NewsStats.pm b/lib/NewsStats.pm index 35b59c0..8a5dfde 100644 --- a/lib/NewsStats.pm +++ b/lib/NewsStats.pm @@ -506,16 +506,20 @@ sub FormatOutput { if ($Format eq 'dump') { # output as dump (key value) $Output = sprintf ("# %s:\n",$Caption) - if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); + if ($Caption and $Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); $Output .= sprintf ("%s %u\n",$Key,$Value); } elsif ($Format eq 'list') { # output as list (caption key value) - $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); + if ($Caption) { + $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); + } else { + $Output = sprintf ("%s %u\n",$Key,$Value); + } } elsif ($Format eq 'pretty') { # output as a table if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)) { $Output = $LeadIn; - $Output .= sprintf ("# ----- %s:\n",$Caption); + $Output .= sprintf ("# ----- %s:\n",$Caption) if $Caption; } # increase $PadValue for numbers with decimal point $PadValue += $Precision+1 if $Precision;