From edd250f2652311094fff5ef352702af44f3a8a1b Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Wed, 2 May 2012 18:11:43 +0200 Subject: [PATCH 01/26] Complete rewrite of groupstats.pl and NewsStats.pm. * Switch to Getopt::Long, change coding style; limit line length. * Replace 'die' and 'warn' by calls to &Bleat(). * Completely redo options and processing: - merge -m/-p/-a into --month - replace -i/-q/-d with - much more powerful - --group-by/--order-by - replace -t/-l with - much more powerful - --lower/--upper/--boundary - remove -b and replace it with --report Fixes #33. * Add new report types, boundaries and sorting options: - report types 'average' and 'sums' - boundaries 'average' and 'sums' - upper and/or lower boundary - sort output independently Issue #35. Fixes #34, #38. * Add possibility to cross-check newsgroups against checkgroups file. * Complete rewrite of groupstats.pl internal logic: - modularize construction fo SQL queries - remove unnecessary special cases - refactor code into NewsStats.pm functions as much as possible Issue #37. Fixes #36. * Rework output formats, fix padding problem by making use of modularized SQL queries. Fixes #15, #32. * Add some more consistency checks. Issue #12. * Redo documentation. * Update TODO list. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 493 +++++++++++++++++++++++-------- doc/TODO | 15 +- groupstats.pl | 804 +++++++++++++++++++++++++++++--------------------- 3 files changed, 845 insertions(+), 467 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index cc3dd83..a04ac0f 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -2,7 +2,7 @@ # # Library functions for the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # This module can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -16,18 +16,20 @@ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); require Exporter; @ISA = qw(Exporter); @EXPORT = qw( - $MySelf $MyVersion $PackageVersion - ReadOptions + $FullPath + $HomePath + ShowVersion + ShowPOD ReadConfig OverrideConfig InitDB + Bleat ); @EXPORT_OK = qw( GetTimePeriod LastMonth - CheckMonth SplitPeriod ListMonth ListNewsgroups @@ -35,58 +37,47 @@ require Exporter; OutputData FormatOutput SQLHierarchies + SQLSortOrder SQLGroupList + SQLSetBounds + SQLBuildClause GetMaxLength ); -%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], +%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod + ListMonth)], Output => [qw(OutputData FormatOutput)], - SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]); + SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList + SQLSetBounds SQLBuildClause GetMaxLength)]); $VERSION = '0.01'; our $PackageVersion = '0.01'; use Data::Dumper; use File::Basename; -use Getopt::Std; use Config::Auto; use DBI; #####-------------------------------- Vars --------------------------------##### -our $MySelf = fileparse($0, '.pl'); -our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; +# trim the path +our $FullPath = $0; +our $HomePath = dirname($0); +$0 =~ s%.*/%%; +# set version string +our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; #####------------------------------- Basics -------------------------------##### -################################################################################ -sub ReadOptions { -################################################################################ -### read commandline options and act on standard options -h and -V -### IN : $Params: list of legal commandline paramaters (without -h and -V) -### OUT: a hash containing the commandline options - $Getopt::Std::STANDARD_HELP_VERSION = 1; - - my ($Params) = @_; - my %Options; - - getopts('Vh'.$Params, \%Options); - - # -V: display version - &ShowVersion if ($Options{'V'}); - - # -h: feed myself to perldoc - &ShowPOD if ($Options{'h'}); - - return %Options; -}; ################################################################################ ################################################################################ sub ShowVersion { ################################################################################ ### display version and exit - print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein \n"; - print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; + print "NewsStats v$PackageVersion\n$MyVersion\n"; + print "Copyright (c) 2010-2012 Thomas Hochstein \n"; + print "This program is free software; you may redistribute it ". + "and/or modify it under the same terms as Perl itself.\n"; exit(100); }; ################################################################################ @@ -95,7 +86,7 @@ sub ShowVersion { sub ShowPOD { ################################################################################ ### feed myself to perldoc and exit - exec('perldoc', $0); + exec('perldoc', $FullPath); exit(100); }; ################################################################################ @@ -112,7 +103,7 @@ sub ReadConfig { ################################################################################ ################################################################################ -sub OverrideConfig { +sub OverrideConfig { ################################################################################ ### override configuration values ### IN : $ConfigR : reference to configuration hash @@ -120,7 +111,8 @@ sub OverrideConfig { my ($ConfigR,$OverrideR) = @_; my %Override = %$OverrideR; # Config hash empty? - warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); + &Bleat(1,"Empty configuration hash passed to OverrideConfig()") + if ( keys %$ConfigR < 1); # return if no overrides return if (keys %Override < 1 or keys %$ConfigR < 1); foreach my $Key (keys %Override) { @@ -138,15 +130,35 @@ sub InitDB { ### OUT: DBHandle my ($ConfigR,$Die) = @_; my %Conf = %$ConfigR; - my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 }); + my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', + $Conf{'DBDriver'},$Conf{'DBDatabase'}, + $Conf{'DBHost'}), $Conf{'DBUser'}, + $Conf{'DBPw'}, { PrintError => 0 }); if (!$DBHandle) { - die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die); - warn("$MySelf: W: $DBI::errstr\n"); + &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); + &Bleat(1,$DBI::errstr); }; return $DBHandle; }; ################################################################################ +################################################################################ +sub Bleat { +################################################################################ +### print warning or error messages and terminate in case of error +### IN : $Level : 1 = warning, 2 = error +### $Message: warning or error message + my ($Level,$Message) = @_; + if ($Level == 1) { + warn "$0 W: $Message\n" + } elsif ($Level == 2) { + die "$0 E: $Message\n" + } else { + print "$0: $Message\n" + } +}; +################################################################################ + #####------------------------------ GetStats ------------------------------##### ################################################################################ @@ -172,7 +184,7 @@ sub ListNewsgroups { next if($TLH and !/^$TLH/); # don't count invalid newsgroups if(%ValidGroups and !defined($ValidGroups{$_})) { - warn (sprintf("DROPPED: %s\n",$_)); + &Bleat(1,sprintf("DROPPED: %s",$_)); next; } # add original newsgroup to %Newsgroups @@ -216,7 +228,7 @@ sub ReadGroupList { ### OUT: \%ValidGroups: hash containing all valid newsgroups my ($Filename) = @_; my %ValidGroups; - open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; + open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); while (<$LIST>) { s/^(\S+).*$/$1/; chomp; @@ -233,29 +245,42 @@ sub ReadGroupList { ################################################################################ sub GetTimePeriod { ################################################################################ -### get a time period to act on, in order of preference: by default the -### last month; or a month submitted by -m YYYY-MM; or a time period submitted -### by -p YYYY-MM:YYYY-MM -### IN : $Month,$Period: contents of -m and -p -### OUT: $StartMonth, $EndMonth (identical if period is just one month) - my ($Month,$Period) = @_; - # exit if -m is set and not like YYYY-MM - die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); - # warn if -m and -p is set - warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period); - # default: set -m to last month - $Month = &LastMonth if (!defined($Month) and !defined($Period)); - # set $StartMonth, $EndMonth - my ($StartMonth, $EndMonth); - if ($Period) { - # -p: get date range - ($StartMonth, $EndMonth) = &SplitPeriod($Period); - die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth); +### get a time period to act on from --month option; +### if empty, default to last month +### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' +### OUT: $Verbal,$SQL: verbal description and WHERE-clause +### of the chosen time period + my ($Month) = @_; + # define result variables + my ($Verbal, $SQL); + # define a regular expression for a month + my $REMonth = '\d{4}-\d{2}'; + + # default to last month if option is not set + if(!$Month) { + $Month = &LastMonth; + } + + # check for valid input + if ($Month =~ /^$REMonth$/) { + # single month (YYYY-MM) + ($Month) = &CheckMonth($Month); + $Verbal = $Month; + $SQL = sprintf("month = '%s'",$Month); + } elsif ($Month =~ /^$REMonth:$REMonth$/) { + # time period (YYYY-MM:YYYY-MM) + $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); + $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); + } elsif ($Month =~ /^all$/i) { + # special case: ALL + $Verbal = 'all time'; + $SQL = ''; } else { - # set $StartMonth = $EndMonth = $Month if -p is not set - $StartMonth = $EndMonth = $Month; - }; - return ($StartMonth, $EndMonth); + # invalid input + return (undef,undef); + } + + return ($Verbal,$SQL); }; ################################################################################ @@ -278,12 +303,23 @@ sub LastMonth { ################################################################################ sub CheckMonth { ################################################################################ -### check if input is a valid month in YYYY-MM form -### IN : $Month: month -### OUT: TRUE / FALSE - my ($Month) = @_; - return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/); - return 1; +### check if input (in YYYY-MM form) is valid with MM between 01 and 12; +### otherwise, fix it +### IN : @Month: array of month +### OUT: @Month: a valid month + my (@Month) = @_; + foreach my $Month (@Month) { + my ($OldMonth) = $Month; + my ($CalMonth) = substr ($Month, -2); + if ($CalMonth < 1 or $CalMonth > 12) { + $CalMonth = '12' if $CalMonth > 12; + $CalMonth = '01' if $CalMonth < 1; + substr($Month, -2) = $CalMonth; + &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". + "and '12'), set to '%s'.",$OldMonth,$Month)); + } + } + return @Month; }; ################################################################################ @@ -291,10 +327,10 @@ sub SplitPeriod { ################################################################################ ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month ### IN : $Period: time period -### OUT: $StartMonth, Â$EndMonth +### OUT: $StartMonth, $EndMonth my ($Period) = @_; - return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/; my ($StartMonth, $EndMonth) = split /:/, $Period; + ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); # switch parameters as necessary if ($EndMonth gt $StartMonth) { return ($StartMonth, $EndMonth); @@ -310,7 +346,8 @@ sub ListMonth { ### IN : $StartMonth, $EndMonth ### OUT: @Months: array containing all months from $StartMonth to $EndMonth my ($StartMonth, $EndMonth) = @_; - return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/); + return (undef,undef) + if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/); # return if $StartMonth = $EndMonth return ($StartMonth) if ($StartMonth eq $EndMonth); # set $Year, $Month from $StartMonth @@ -335,64 +372,96 @@ sub ListMonth { sub OutputData { ################################################################################ ### read database query results from DBHandle and print results with formatting -### IN : $Format : format specifier -### $FileName: file name template (-f): filename-YYYY-MM -### $DBQuery : database query handle with executed query, +### IN : $Format : format specifier +### $Comments : print or suppress all comments for machine-readable output +### $GroupBy : primary sorting order (month or key) +### $Precision: number of digits right of decimal point (0 or 2) +### $ValidKeys: reference to a hash containing all valid keys +### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM +### $DBQuery : database query handle with executed query, ### containing $Month, $Key, $Value -### $PadGroup: padding length for newsgroups field (optional) for 'pretty' - my ($Format, $FileName, $DBQuery, $PadGroup) = @_; - my ($Handle, $OUT); +### $PadGroup : padding length for key field (optional) for 'pretty' + my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, + $DBQuery, $PadGroup) = @_; + my %ValidKeys = %{$ValidKeys} if $ValidKeys; + my ($FileName, $Handle, $OUT); our $LastIteration; + + # define output types + my %LegalOutput; + @LegalOutput{('dump',,'list','pretty')} = (); + # bail out if format is unknown + &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); + while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { + # don't display invalid keys + if(%ValidKeys and !defined($ValidKeys{$Key})) { + # FIXME + # &Bleat(1,sprintf("DROPPED: %s",$Key)); + next; + }; + # care for correct sorting order and abstract from month and keys: + # $Caption will be $Month or $Key, according to sorting order, + # and $Key will be $Key or $Month, respectively + my $Caption; + if ($GroupBy eq 'key') { + $Caption = $Key; + $Key = $Month; + } else { + $Caption = $Month; + } # set output file handle - if (!$FileName) { + if (!$FileTempl) { $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT - } elsif (!defined($LastIteration) or $LastIteration ne $Month) { + } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { close $OUT if ($LastIteration); - open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month); + # safeguards for filename creation: + # replace potential problem characters with '_' + $FileName = sprintf('%s-%s',$FileTempl,$Caption); + $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; + open ($OUT,">$FileName") + or &Bleat(2,sprintf("Cannot open output file '%s': $!", + $FileName)); $Handle = $OUT; }; - print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); - $LastIteration = $Month; + print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, + $Precision, $PadGroup); + $LastIteration = $Caption; }; - close $OUT if ($FileName); + close $OUT if ($FileTempl); }; ################################################################################ sub FormatOutput { ################################################################################ ### format information for output according to format specifier -### IN : $Format : format specifier -### $Month : month (as YYYY-MM) -### $Key : newsgroup, client, ... -### $Value : number of postings with that attribute -### $PadGroup: padding length for key field (optional) for 'pretty' +### IN : $Format : format specifier +### $Comments : print or suppress all comments for machine-readable output +### $Caption : month (as YYYY-MM) or $Key, according to sorting order +### $Key : newsgroup, client, ... or $Month, as above +### $Value : number of postings with that attribute +### $Precision: number of digits right of decimal point (0 or 2) +### $PadGroup : padding length for key field (optional) for 'pretty' ### OUT: $Output: formatted output - my ($Format, $Month, $Key, $Value, $PadGroup) = @_; - - # define output types - my %LegalOutput; - @LegalOutput{('dump','dumpgroup','list','pretty')} = (); - # bail out if format is unknown - die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format}); - + my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; my ($Output); - # keep last month in mind + # keep last caption in mind our ($LastIteration); + # create one line of output if ($Format eq 'dump') { - # output as dump (ng nnnnn) - $Output = sprintf ("%s %u\n",$Key,$Value); - } elsif ($Format eq 'dumpgroup') { - # output as dump (YYYY-NN: nnnnn) - $Output = sprintf ("%s: %5u\n",$Month,$Value); + # output as dump (key value) + $Output = sprintf ("# %s:\n",$Caption) + if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); + $Output .= sprintf ("%s %u\n",$Key,$Value); } elsif ($Format eq 'list') { - # output as list (YYYY-NN: ng nnnnn) - $Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value); + # output as list (caption key value) + $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); } elsif ($Format eq 'pretty') { - # output as table - $Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration); - $LastIteration = $Month; - $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value); + # output as a table + $Output = sprintf ("# ----- %s:\n",$Caption) + if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); + $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : + "%s %.*f\n",$Key,$Precision,$Value); }; return $Output; }; @@ -408,46 +477,226 @@ sub SQLHierarchies { ### IN : $ShowHierarchies: boolean value ### OUT: SQL code my ($ShowHierarchies) = @_; - return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'"; + return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; }; ################################################################################ sub GetMaxLength { ################################################################################ ### get length of longest field in future query result -### IN : $DBHandle : database handel -### $Table : table to query -### $Field : field to check -### $WhereClause: WHERE clause -### @BindVars : bind variables for WHERE clause +### IN : $DBHandle : database handel +### $Table : table to query +### $Field : field to check +### $WhereClause : WHERE clause +### $HavingClause: HAVING clause +### @BindVars : bind variables for WHERE clause ### OUT: $Length: length of longest instnace of $Field - my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_; - my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause)); - $DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table); + my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; + my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". + "FROM %s %s %s",$Field,$Table, + $WhereClause,$HavingClause ? + 'GROUP BY newsgroup' . $HavingClause . + ' ORDER BY LENGTH(newsgroup) '. + 'DESC LIMIT 1': '')); + $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". + "for '%s' from table '%s': ". + "$DBI::errstr",$Field,$Table)); my ($Length) = $DBQuery->fetchrow_array; return $Length; }; +################################################################################ +sub SQLSortOrder { +################################################################################ +### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and +### $OptOrderBy (secondary sorting), both ascending or descending; +### descending sorting order is done by adding '-desc' +### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' +### $OrderBy: secondary sort by month/newsgroups (default) +### or number of 'postings' +### OUT: a SQL ORDER BY clause + my ($GroupBy,$OrderBy) = @_; + my ($GroupSort,$OrderSort) = ('',''); + # $GroupBy (primary sorting) + if (!$GroupBy) { + $GroupBy = 'month'; + } else { + ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); + if ($GroupBy =~ /group/i) { + $GroupBy = 'newsgroup'; + } else { + $GroupBy = 'month'; + } + } + my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; + # $OrderBy (secondary sorting) + if (!$OrderBy) { + $OrderBy = $Secondary; + } else { + ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); + if ($OrderBy =~ /posting/i) { + $OrderBy = "postings $OrderSort, $Secondary"; + } else { + $OrderBy = "$Secondary $OrderSort"; + } + } + return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); +}; + +################################################################################ +sub SQLParseOrder { +################################################################################ +### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. +### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' +### IN : $OrderOption: order option (see above) +### OUT: parameter to sort by, +### sort order ('DESC' or nothing, meaning 'ASC') + my ($OrderOption) = @_; + my $SortOrder = ''; + if ($OrderOption =~ s/-?desc$//i) { + $SortOrder = 'DESC'; + } else { + $OrderOption =~ s/-?asc$//i + } + return ($OrderOption,$SortOrder); +}; + ################################################################################ sub SQLGroupList { ################################################################################ -### explode list of newsgroups separated by : (with wildcards) to a SQL WHERE -### clause +### explode list of newsgroups separated by : (with wildcards) +### to a SQL 'WHERE' expression ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) -### OUT: SQL code, list of newsgroups +### OUT: SQL code to become part of a 'WHERE' clause, +### list of newsgroups for SQL bindings my ($Newsgroups) = @_; + # substitute '*' wildcard with SQL wildcard character '%' $Newsgroups =~ s/\*/%/g; - return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/; + # just one newsgroup? + return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; + # list of newsgroups separated by ':' my $SQL = '('; my @GroupList = split /:/, $Newsgroups; foreach (@GroupList) { $SQL .= ' OR ' if $SQL gt '('; - $SQL .= 'newsgroup LIKE ?'; + $SQL .= SQLGroupWildcard($_); }; $SQL .= ')'; return ($SQL,@GroupList); }; +################################################################################ +sub SQLGroupWildcard { +################################################################################ +### build a valid SQL 'WHERE' expression with or without wildcards +### IN : $Newsgroup: newsgroup expression, probably with wildcard +### (group.name or group.name.%) +### OUT: SQL code to become part of a 'WHERE' clause + my ($Newsgroup) = @_; + # FIXME: check for validity + if ($Newsgroup !~ /%/) { + return 'newsgroup = ?'; + } else { + return 'newsgroup LIKE ?'; + } +}; + +################################################################################ +sub SQLSetBounds { +################################################################################ +### set upper and/or lower boundary (number of postings) +### IN : $Type: 'level', 'average', 'sum' or 'default' +### $LowBound,$UppBound: lower/upper boundary, respectively +### OUT: SQL code to become part of a WHERE or HAVING clause + my ($Type,$LowBound,$UppBound) = @_; + ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); + if($LowBound and $UppBound and $LowBound > $UppBound) { + &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". + "$UppBound, exchanging boundaries."); + ($LowBound,$UppBound) = ($UppBound,$LowBound); + } + # default to 'default' + my $WhereHavingFunction = 'postings'; + # set $LowBound to SQL statement: + # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' + if ($Type eq 'level') { + $WhereHavingFunction = 'MIN(postings)' + } elsif ($Type eq 'average') { + $WhereHavingFunction = 'AVG(postings)' + } elsif ($Type eq 'sum') { + $WhereHavingFunction = 'SUM(postings)' + } + $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); + # set $LowBound to SQL statement: + # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' + if ($Type eq 'level') { + $WhereHavingFunction = 'MAX(postings)' + } elsif ($Type eq 'average') { + $WhereHavingFunction = 'AVG(postings)' + } elsif ($Type eq 'sum') { + $WhereHavingFunction = 'SUM(postings)' + } + $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); + return ($LowBound,$UppBound); +}; + +################################################################################ +sub SQLCheckNumber { +################################################################################ +### check if input is a valid positive integer; otherwise, make it one +### IN : @Numbers: array of parameters +### OUT: @Numbers: a valid positive integer + my (@Numbers) = @_; + foreach my $Number (@Numbers) { + if ($Number and $Number < 0) { + &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); + $Number = -$Number; + } + $Number = '' if ($Number and $Number !~ /^\d+$/); + } + return @Numbers; +}; + +################################################################################ +sub SQLBuildClause { +################################################################################ +### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause +### from multiple expressions which *may* be empty +### IN : $Type: 'where', 'having', 'group' or 'order' +### @Expressions: array of expressions +### OUT: $SQLClause: a SQL clause + my ($Type,@Expressions) = @_; + my ($SQLClause,$Separator,$Statement); + # set separator ('AND' or ',') + if ($Type eq 'where' or $Type eq 'having') { + $Separator = 'AND'; + } else { + $Separator = ','; + } + # set statement + if ($Type eq 'where') { + $Statement = 'WHERE'; + } elsif ($Type eq 'order') { + $Statement = 'ORDER BY'; + } elsif ($Type eq 'having') { + $Statement = 'HAVING'; + } else { + $Statement = 'GROUP BY'; + } + # build query from expressions with separators + foreach my $Expression (@Expressions) { + if ($Expression) { + $SQLClause .= " $Separator " if ($SQLClause); + $SQLClause .= $Expression; + } + } + # add statement in front if not already present + $SQLClause = " $Statement " . $SQLClause + if ($SQLClause and $SQLClause !~ /$Statement/); + return $SQLClause; +}; + + #####------------------------------- done ---------------------------------##### 1; diff --git a/doc/TODO b/doc/TODO index 52c60ac..ce6f795 100644 --- a/doc/TODO +++ b/doc/TODO @@ -29,6 +29,9 @@ Bug numbers refer to the Mantis issue tracker at . - Add other reports NewsStats should include some other kinds of reports (stats on used clients, on postings hosts/servers, ...) + - Add tools for database management + NewsStats should offer tools e.g. to inject postings into the 'raw' database, + or to split databases. * Individual improvements + NewsStats.pm @@ -43,10 +46,15 @@ Bug numbers refer to the Mantis issue tracker at . - General tests and optimisations + feedlog.pl + - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) + - Gracefully handle interruptions to database connection, at least log + lost postings (Bug #30). + - Buffer output that cannot be written to database (Bug #31). - Add / enhance / test error handling - General tests and optimisations + gatherstats.pl + - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Use hierarchy information (see GroupInfo above) - Add gathering of other stats (clients, hosts, ...) - better modularisation (code reuse for other reports!) @@ -54,13 +62,6 @@ Bug numbers refer to the Mantis issue tracker at . - General tests and optimisations + groupstats.pl - - output: fix formatting - GetMaxLength does not work for special queries - like -l or -b - - beautify output (formats / captions) - - -a: improve code - it doesn't make that much sense to get first/last - month from database just to query that same database with a time period - that equals no time period ... this part of the WHERE clause should be - dropped instead - better modularisation (code reuse for other reports!) - Add / enhance / test error handling - General tests and optimisations diff --git a/groupstats.pl b/groupstats.pl index bed36e2..14d19c6 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -7,7 +7,7 @@ # # It is part of the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -19,226 +19,229 @@ BEGIN { } use strict; -use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); +use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:'); +my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, + $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, + $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound); +GetOptions ('b|boundary=s' => \$OptBoundType, + 'c|captions!' => \$OptCaptions, + 'checkgroups=s' => \$OptCheckgroupsFile, + 'comments!' => \$OptComments, + 'filetemplate=s' => \$OptFileTemplate, + 'f|format=s' => \$OptFormat, + 'g|group-by=s' => \$OptGroupBy, + 'groupsdb=s' => \$OptGroupsDB, + 'l|lower=i' => \$LowBound, + 'm|month=s' => \$OptMonth, + 'n|newsgroups=s' => \$OptNewsgroups, + 'o|order-by=s' => \$OptOrderBy, + 'r|report=s' => \$OptReportType, + 's|sums!' => \$OptSums, + 'u|upper=i' => \$UppBound, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; +# parse parameters +# $OptComments defaults to TRUE +$OptComments = 1 if (!defined($OptComments)); +# force --nocomments when --filetemplate is used +$OptComments = 0 if ($OptFileTemplate); +# parse $OptBoundType +if ($OptBoundType) { + if ($OptBoundType =~ /level/i) { + $OptBoundType = 'level'; + } elsif ($OptBoundType =~ /av(era)?ge?/i) { + $OptBoundType = 'average'; + } elsif ($OptBoundType =~ /sums?/i) { + $OptBoundType = 'sum'; + } else { + $OptBoundType = 'default'; + } +} +# parse $OptReportType +if ($OptReportType) { + if ($OptReportType =~ /av(era)?ge?/i) { + $OptReportType = 'average'; + } elsif ($OptReportType =~ /sums?/i) { + $OptReportType = 'sum'; + } else { + $OptReportType = 'default'; + } +} +# read list of newsgroups from --checkgroups +# into a hash reference +my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile; ### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; +my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ### override configuration via commandline options my %ConfOverride; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; &OverrideConfig(\%Conf,\%ConfOverride); -### check for incompatible command line options -# you can't mix '-t', '-b' and '-l' -# -b/-l take preference over -t, and -b takes preference over -l -# you can't use '-f' with '-b' or '-l' -if ($Options{'b'} or $Options{'l'}) { - if ($Options{'f'}) { - # drop -f - warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n"); - undef($Options{'f'}); - }; - if ($Options{'t'}) { - # drop -t - warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); - undef($Options{'t'}); - }; - if ($Options{'b'} and $Options{'l'}) { - # drop -l - warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); - undef($Options{'l'}); - }; - # -q/-d don't work with -b or -l - warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; - warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; -}; - -### check output type -# default output type to 'pretty' -$Options{'o'} = 'pretty' if !$Options{'o'}; -# fail if more than one newsgroup is combined with 'dumpgroup' type -die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); -# accept 'dumpgroup' only with -n -if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { - $Options{'o'} = 'dump'; - warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); -}; -# set output type to 'pretty' for -l -if ($Options{'l'} and $Options{'o'} ne 'pretty') { - $Options{'o'} = 'pretty'; - warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); -}; -# set output type to 'dump' for -f -if ($Options{'f'} and $Options{'o'} ne 'dump') { - $Options{'o'} = 'dump'; - warn ("$MySelf: W: Output type forced to '-o dump' due to usage of '-f'.\n"); -}; - ### init database my $DBHandle = InitDB(\%Conf,1); -### get time period -my ($StartMonth,$EndMonth); -# if '-a' is set, set start/end month from database -# FIXME - it doesn't make that much sense to get first/last month from database to query it -# with a time period that equals no time period ... -if ($Options{'a'}) { - undef($Options{'m'}); - undef($Options{'p'}); - my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); - ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array; -} else { - ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); -}; -# if -p or -a are set: drop -m -undef $Options{'m'} if ($Options{'p'} or $Options{'a'}); -# if time period is more than one month: force output type to '-o pretty' or '-o dumpgroup' -if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) { - if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) { - # just one newsgroup is defined - warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n"); - $Options{'o'} = 'dumpgroup'; - } elsif (!defined($Options{'f'})) { - # more than one newsgroup - and no file output - warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n"); - $Options{'o'} = 'pretty'; - } -}; +### get time period and newsgroups, prepare SQL 'WHERE' clause +# get time period +# and set caption for output and expression for SQL 'WHERE' clause +my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); +# bail out if --month is invalid +&Bleat(2,"--month option has an invalid format - ". + "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; +# get list of newsgroups and set expression for SQL 'WHERE' clause +# with placeholders as well as a list of newsgroup to bind to them +my ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups) + if $OptNewsgroups;; -### create report -# get list of newsgroups (-n) -my ($QueryGroupList,$QueryThreshold,@GroupList,@Params); -my $Newsgroups = $Options{'n'}; -if ($Newsgroups) { - # explode list of newsgroups for WHERE clause - ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups); +### build SQL WHERE clause (and HAVING clause, if needed) +my ($SQLWhereClause,$SQLHavingClause); +# $OptBoundType 'level' +if ($OptBoundType and $OptBoundType ne 'default') { + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, + $SQLWhereNewsgroups,&SQLHierarchies($OptSums)); + $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType, + $LowBound,$UppBound)); +# $OptBoundType 'threshold' / 'default' or none } else { - # set to dummy value (always true) - $QueryGroupList = 1; -}; - -# manage thresholds -if (defined($Options{'t'})) { - if ($Options{'i'}) { - # -i: list groups below threshold - $QueryThreshold .= ' postings < ?'; - } else { - # default: list groups above threshold - $QueryThreshold .= ' postings > ?'; - }; - # push threshold to Params - push @Params,$Options{'t'}; -} else { - # set to dummy value (always true) - $QueryThreshold = 1; + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, + $SQLWhereNewsgroups,&SQLHierarchies($OptSums), + &SQLSetBounds('default',$LowBound,$UppBound)); } -# construct WHERE clause -# $QueryGroupList is "list of newsgroup" (or 1), -# $QueryThreshold is threshold definition (or 1), -# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) -# according to setting of -s -my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'})); +### get sort order and build SQL 'ORDER BY' clause +# default to 'newsgroup' for $OptBoundType 'level' or 'average' +$OptGroupBy = 'newsgroup' if (!$OptGroupBy and + $OptBoundType and $OptBoundType ne 'default'); +# force to 'month' for $OptReportType 'average' or 'sum' +$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); +# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause +my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); +# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) +# set it to 'month' or 'key' for OutputData() +$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; -# get length of longest newsgroup delivered by query for formatting purposes -# FIXME -my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params)); +### get report type and build SQL 'SELECT' query +my $SQLSelect; +my $SQLGroupClause = ''; +my $Precision = 0; # number of digits right of decimal point for output +if ($OptReportType and $OptReportType ne 'default') { + $SQLGroupClause = 'GROUP BY newsgroup'; + # change $SQLOrderClause: replace everything before 'postings' + $SQLOrderClause =~ s/BY.+postings/BY postings/; + if ($OptReportType eq 'average') { + $SQLSelect = "'All months',newsgroup,AVG(postings)"; + $Precision = 2; + # change $SQLOrderClause: replace 'postings' with 'AVG(postings)' + $SQLOrderClause =~ s/postings/AVG(postings)/; + } elsif ($OptReportType eq 'sum') { + $SQLSelect = "'All months',newsgroup,SUM(postings)"; + # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' + $SQLOrderClause =~ s/postings/SUM(postings)/; + } + } else { + $SQLSelect = 'month,newsgroup,postings'; +}; -my ($OrderClause,$DBQuery); -# -b (best of / top list) defined? -if (!defined($Options{'b'}) and !defined($Options{'l'})) { - # default: neither -b nor -l - # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" - # according to -q and -d - $OrderClause = 'newsgroup'; - $OrderClause = 'postings' if $Options{'q'}; - $OrderClause .= ' DESC' if $Options{'d'}; - # prepare query: get number of postings per group from groups table for given months and newsgroups - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); -} elsif ($Options{'b'}) { - # -b is set (then -l can't be!) - # set sorting order (-i): top or flop list? - if ($Options{'i'}) { - $OrderClause = 'postings'; - } else { - $OrderClause = 'postings DESC'; - }; - # set -b to 10 if < 1 (Top 10) - $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1; - # push LIMIT to Params - push @Params,$Options{'b'}; - # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT - $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); -} else { - # -l must be set now, as all other cases have been taken care of - # which kind of level (-i): more than -l x or less than -l x? - my ($Level); - if ($Options{'i'}) { - $Level = '<'; - } else { - $Level = '>'; - }; - # prepare and execute query: get list of newsgroups meeting level condition - $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level)); - $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'}) - or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); +### get length of longest newsgroup name delivered by query +### for formatting purposes +my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; +my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, + $Field,$SQLWhereClause,$SQLHavingClause, + @SQLBindNewsgroups); + +### build and execute SQL query +my ($DBQuery); +# special query preparation for $OptBoundType 'level', 'average' or 'sums' +if ($OptBoundType and $OptBoundType ne 'default') { + # prepare and execute first query: + # get list of newsgroups meeting level conditions + $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '. + 'GROUP BY newsgroup %s', + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $SQLWhereClause,$SQLHavingClause)); + $DBQuery->execute(@SQLBindNewsgroups) + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $DBI::errstr)); # add newsgroups to a comma-seperated list ready for IN(...) query my $GroupList; while (my ($Newsgroup) = $DBQuery->fetchrow_array) { - $GroupList .= ',' if (defined($GroupList) and $GroupList ne ''); + $GroupList .= ',' if $GroupList; $GroupList .= "'$Newsgroup'"; }; - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause)); -}; + # enhance $WhereClause + if ($GroupList) { + $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause, + sprintf('newsgroup IN (%s)',$GroupList)); + } else { + # condition cannot be satisfied; + # force query to fail by adding '0=1' + $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1'); + } +} + +# prepare query +$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', + $SQLSelect, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $SQLWhereClause,$SQLGroupClause,$ + SQLOrderClause)); # execute query -$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params) - or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); +$DBQuery->execute(@SQLBindNewsgroups) + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $DBI::errstr)); -# output results -# reset caption (-c) if -f is set -undef($Options{'c'}) if $Options{'f'}; -# print caption (-c) with time period if -m or -p is set -if ($Options{'c'}) { - if ($Options{'m'}) { - printf ("----- Report for %s\n",$StartMonth); - } else { - printf ("----- Report from %s to %s %s\n",$StartMonth,$EndMonth,$Options{'a'} ? '(all months)' : ''); - }; -}; -# print caption (-c) with newsgroup list if -n is set -printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; -# print caption (-c) with threshold if -t is set, taking -i in account -printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; -if (!defined($Options{'b'}) and !defined($Options{'l'})) { - # default: neither -b nor -l - &OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength); -} elsif ($Options{'b'}) { - # -b is set (then -l can't be!) - # we have to read in the query results ourselves, as they do not have standard layout - while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty - print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); - }; -} else { - # -l must be set now, as all other cases have been taken care of - # print caption (-c) with level, taking -i in account - printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'}; - # we have to read in the query results ourselves, as they do not have standard layout - while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just switch $Newsgroups and $Month for output generation - print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); - }; -}; +### output results +# set default to 'pretty' +$OptFormat = 'pretty' if !$OptFormat; +# print captions if --caption is set +if ($OptCaptions && $OptComments) { + # print time period with report type + my $CaptionReportType= '(number of postings for each month)'; + if ($OptReportType and $OptReportType ne 'default') { + $CaptionReportType= '(average number of postings for each month)' + if $OptReportType eq 'average'; + $CaptionReportType= '(number of all postings for that time period)' + if $OptReportType eq 'sum'; + } + printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); + # print newsgroup list if --newsgroups is set + printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups))) + if $OptNewsgroups; + # print boundaries, if set + my $CaptionBoundary= '(counting only month fulfilling this condition)'; + if ($OptBoundType and $OptBoundType ne 'default') { + $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level'; + $CaptionBoundary= '(on average)' if $OptBoundType eq 'average'; + $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum'; + } + printf("# ----- Threshold: %s %s x %s %s %s\n", + $LowBound ? $LowBound : '',$LowBound ? '=>' : '', + $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) + if ($LowBound or $UppBound); + # print primary and secondary sort order + printf("# ----- Grouped by %s (%s), sorted %s%s\n", + ($GroupBy eq 'month') ? 'Months' : 'Newsgroups', + ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', + ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', + ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); +} + +# output data +&OutputData($OptFormat,$OptComments,$GroupBy,$Precision, + $OptCheckgroupsFile ? $ValidGroups : '', + $OptFileTemplate,$DBQuery,$MaxLength); ### close handles $DBHandle->disconnect; @@ -253,60 +256,67 @@ groupstats - create reports on newsgroup usage =head1 SYNOPSIS -B [B<-Vhiscqd>] [B<-m> I | B<-p> I | B<-a>] [B<-n> I] [B<-t> I] [B<-l> I] [B<-b> I] [B<-o> I] [B<-f> I] [B<-g> I] +B [B<-Vhcs> B<--comments>] [B<-m> I[:I] | I] [B<-n> I] [B<--checkgroups> I] [B<-r> I] [B<-l> I] [B<-u> I] [B<-b> I] [B<-g> I] [B<-o> I] [B<-f> I] [B<--filetemplate> I] [B<--groupsdb> I] =head1 REQUIREMENTS -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back +See L. =head1 DESCRIPTION This script create reports on newsgroup usage (number of postings per group per month) taken from result tables created by -F. +B. -The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. +=head2 Features and options + +=head3 Time period and newsgroups + +The time period to act on defaults to last month; you can assign another +time period or a single month (or drop all time constraints) via the +B<--month> option (see below). B will process all newsgroups by default; you can limit -that to only some newsgroups by supplying a list of those groups via -B<-n> (see below). You can include hierarchy levels in the output by -adding the B<-s> switch (see below). +processing to only some newsgroups by supplying a list of those groups via +B<--newsgroups> option (see below). You can include hierarchy levels in +the output by adding the B<--sums> switch (see below). Optionally +newsgroups not present in a checkgroups file can be excluded from output, +sse B<--checkgroups> below. -Furthermore you can set a threshold via B<-t> so that only newsgroups -with more postings per month will be included in the report. You can -invert that by the B<-i> switch so only newsgroups with less than -I postings per month will be included. +=head3 Report type -You can sort the output by number of postings per month instead of the -default (alphabetical list of newsgroups) by using B<-q>; you can -reverse the sorting order (from highest to lowest or in reversed -alphabetical order) by using B<-d>. +You can choose between different B<--report> types: postings per month, +average postings per month or all postings summed up; for details, see +below. -Furthermore, you can create a list of newsgroups that had consistently -more (or less) than x postings per month during the whole report -period by using B<-l> (together with B as needed). +=head3 Upper and lower boundaries -Last but not least you can create a "best of" list of the top x -newsgroups via B<-b> (or a "worst of" list by adding B). +Furthermore you can set an upper and/or lower boundary to exclude some +results from output via the B<--lower> and B<--upper> options, +respectively. By default, all newsgroups with more and/or less postings +per month will be excluded from the result set (i.e. not shown and not +considered for average and sum reports). You can change the meaning of +those boundaries with the B<--boundary> option. For details, please see +below. -By default, B will dump an alphabetical list of newsgroups, -one per line, followed by the number of postings in that group, for -every month. You can change the output format by using B<-o> (see -below). Captions can be added by setting the B<-c> switch. +=head3 Sorting and formatting the output + +By default, all results are grouped by month; you can group results by +newsgroup instead via the B<--groupy-by> option. Within those groups, the +list of newsgroups (or months) is sorted alphabetically (or +chronologically, respectively) ascending. You can change that order (and +sort by number of postings) with the B<--order-by> option. For details and +exceptions, please see below. + +The results will be formatted as a kind of table; you can change the +output format to a simple list or just a list of newsgroups and number of +postings with the B<--format> option. Captions will be added by means of +the B<--caption> option; all comments (and captions) can be supressed by +using B<--nocomments>. + +Last but not least you can redirect all output to a number of files, e.g. +one for each month, by submitting the B<--filetemplate> option, see below. +Captions and comments are automatically disabled in this case. =head2 Configuration @@ -315,37 +325,28 @@ which should be present in the same directory via Config::Auto. See doc/INSTALL for an overview of possible configuration options. -You can override configuration options via the B<-g> switch. +You can override some configuration options via the B<--groupsdb> option. =head1 OPTIONS =over 3 -=item B<-V> (version) +=item B<-V>, B<--version> -Print out version and copyright information on B and exit. +Print out version and copyright information and exit. -=item B<-h> (help) +=item B<-h>, B<--help> Print this man page and exit. -=item B<-m> I (month) +=item B<-m>, B<--month> I -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -or B<-a> is set. +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). By using the keyword I instead, you can set no +processing period to process the whole database. -=item B<-p> I (period) - -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if -B<-a> is set. - -=item B<-a> (all) - -Set no processing period (process whole database). Overrides B<-m> -and B<-p>. - -=item B<-n> I (newsgroups) +=item B<-n>, B<--newsgroups> I Limit processing to a certain set of newsgroups. I can be a single newsgroup name (de.alt.test), a newsgroup hierarchy @@ -354,41 +355,7 @@ example de.test:de.alt.test:de.newusers.* -=item B<-t> I (threshold) - -Only include newsgroups with more than I postings per -month. Can be inverted by the B<-i> switch so that only newsgroups -with less than I postings will be included. - -This setting will be ignored if B<-l> or B<-b> is set. - -=item B<-l> I (level) - -Only include newsgroups with more than I postings per -month, every month during the whole reporting period. Can be inverted -by the B<-i> switch so that only newsgroups with less than I -postings every single month will be included. Output will be ordered -by newsgroup name, followed by month. - -This setting will be ignored if B<-b> is set. Overrides B<-t> and -can't be used together with B<-q>, B<-d> or B<-f>. - -=item B<-b> I (best of) - -Create a list of the I newsgroups with the most postings over the -whole reporting period. Can be inverted by the B<-i> switch so that a -list of the I newsgroups with the least postings over the whole -period is generated. Output will be ordered by sum of postings. - -Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d> -or B<-f>. Output format is set to I (see below). - -=item B<-i> (invert) - -Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower -threshold or level or generate a "bottom list" instead of a top list. - -=item B<-s> (sum per hierarchy level) +=item B<-s>, B<--sums|--nosums> (sum per hierarchy level) Include "virtual" groups for every hierarchy level in output, for example: @@ -399,62 +366,219 @@ example: See the B man page for details. -=item B<-o> I (output format) +=item B<--checkgroups> I -Set output format. Default is I, which will print a header for -each new month, followed by an alphabetical list of newsgroups, each -on a new line, followed by the number of postings in that month. -B will try to align newsgroup names and posting counts. -Usage of B<-b> will force this format; it cannot be used together with -B<-f>. +Restrict output to those newgroups present in a file in checkgroups format +(one newgroup name per line; everything after the first whitespace on each +line is ignored). All other newsgroups will be removed from output. -I format is used to create an easily parsable output consisting -of an alphabetical list of newsgroups, each on a new line, followed by -the number of postings in that month, without any alignment. This -default format can't be used with time periods of more than one month. -Usage of B<-f> will force this format. +=item B<-r>, B<--report> I -I format is like I, but will print the month in front of -the newsgroup name. +Choose the report type: I, I or I -I format can only be use with a group list (see B<-n>) of -exactly one newsgroup and is like I, but will output months, -followed by the number of postings. +By default, B will report the number of postings for each +newsgroup in each month. But it can also report the average number of +postings per group for all months or the total sum of postings per group +for all months. -=item B<-c> (captions) +For report types I and I, the B option has no +meaning and will be silently ignored (see below). -Add captions to output (reporting period, newsgroups list, threshold -and so on). +=item B<-l>, B<--lower> I -This setting will be ignored if B<-f> is set. +Set the lower boundary. See B<--boundary> below. -=item B<-q> (quantity of postings) +=item B<-l>, B<--upper> I -Sort by number of postings instead of by newsgroup names. +Set the upper boundary. See B<--boundary> below. -Cannot be used with B<-l> or B<-b>. +=item B<-b>, B<--boundary> I -=item B<-d> (descending) +Set the boundary type to one of I, I, I or +I. -Change sort order to descending. +By default, all newsgroups with more postings per month than the upper +boundary and/or less postings per month than the lower boundary will be +excluded from further processing. For the default report that means each +month only newsgroups with a number of postings between the boundaries +will be displayed. For the other report types, newsgroups with a number of +postings exceeding the boundaries in all (!) months will not be +considered. -Cannot be used with B<-l> or B<-b>. +For example, lets take a list of newsgroups like this: -=item B<-f> I (output file) + ----- 2012-01: + de.comp.datenbanken.misc 6 + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.misc 8 + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + ----- 2012-03: + de.comp.datenbanken.misc 24 + de.comp.datenbanken.ms-access 83 + de.comp.datenbanken.mysql 36 -Save output to file instead of dumping it to STDOUT. B -will create one file for each month, with filenames composed by -adding year and month to the I, for example -with B<-f> I: +With C, +you'll get the following result: - stats-2010-01 - stats-2010-02 + ----- All months: + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 124 + +de.comp.datenbanken.misc has not been considered even though it has 38 +postings in total, because it has less than 25 postings in every single +month. If you want to list all newsgroups with more than 25 postings U, you'll have to set the boundary type to I, see below. + +A boundary type of I will show only those newsgroups - at all - +that satisfy the boundaries in each and every single month. With the above +list of newsgroups and +C, +you'll get this result: + + ----- All months: + de.comp.datenbanken.ms-access 293 + +de.comp.datenbanken.mysql has not been considered because it had less than +25 postings in 2012-02. + +You can use that to get a list of newsgroups that have more (or less) then +x postings during the whole reporting period. + +A boundary type of I will show only those newsgroups - at all -that +satisfy the boundaries on average. With the above list of newsgroups and +C, +you'll get this result: + + ----- All months: + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 145 + +The average number of postings in the three groups is: + + de.comp.datenbanken.misc 12.67 + de.comp.datenbanken.ms-access 97.67 + de.comp.datenbanken.mysql 48.33 + +Last but not least, a boundary type of I will show only those +newsgroups - at all - that satisfy the boundaries with the total sum of +all postings during the reporting period. With the above list of +newsgroups and +C, +you'll finally get this result: + + ----- All months: + de.comp.datenbanken.misc 38 + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 145 + + +=item B<-g>, B<--group-by> I + +By default, all results are grouped by month, sorted chronologically in +ascending order, like this: + + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +The results can be grouped by newsgroups instead via +B<--group-by> I: + + ----- de.comp.datenbanken.ms-access: + 2012-01 84 + 2012-02 126 + ----- de.comp.datenbanken.mysql: + 2012-01 88 + 2012-02 21 + +By appending I<-desc> to the group-by option parameter, you can reverse +the sort order - e.g. B<--group-by> I will give: + + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + +Average and sums reports (see above) will always be grouped by months; +this option will therefore be ignored. + +=item B<-o>, B<--order-by> I + +Within each group (a single month or single newsgroup, see above), the +report will be sorted by newsgroup names in ascending alphabetical order +by default. You can change the sort order to descending or sort by number +of postings instead. + +=item B<-f>, B<--format> I + +Select the output format, I being the default: + + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +I format looks like this: + + 2012-01 de.comp.datenbanken.ms-access 84 + 2012-01 de.comp.datenbanken.mysql 88 + 2012-02 de.comp.datenbanken.ms-access 126 + 2012-02 de.comp.datenbanken.mysql 21 + +And I format looks like this: + + # 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + # 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +You can remove the comments by using B<--nocomments>, see below. + +=item B<-c>, B<--captions|--nocaptions> + +Add captions to output, like this: + + ----- Report for 2012-01 to 2012-02 (number of postings for each month) + ----- Newsgroups: de.comp.datenbanken.* + ----- Threshold: 10 => x <= 20 (on average) + ----- Grouped by Newsgroups (ascending), sorted by number of postings descending + +False by default. + +=item B<--comments|--nocomments> + +Add comments (group headers) to I and I output. True by default. + +Use I<--nocomments> to suppress anything except newsgroup names/months and +numbers of postings. This is enforced when using B<--filetemplate>, see below. + +=item B<--filetemplate> I + +Save output to file(s) instead of dumping it to STDOUT. B will +create one file for each month (or each newsgroup, accordant to the +setting of B<--group-by>, see above), with filenames composed by adding +year and month (or newsgroup names) to the I, for +example with B<--filetemplate> I: + + stats-2012-01 + stats-2012-02 ... and so on -This setting will be ignored if B<-l> or B<-b> is set. Output format -is set to I (see above). +B<--nocomments> is enforced, see above. -=item B<-g> I (postings per group table) +=item B<--groupsdb> I Override I from F. @@ -462,36 +586,40 @@ Override I from F. =head1 INSTALLATION -See doc/INSTALL. +See L. =head1 EXAMPLES -Show number of postings per group for lasth month in I format: +Show number of postings per group for lasth month in I format: groupstats Show that report for January of 2010 and de.alt.* plus de.test, including display of hierarchy levels: - groupstats -m 2010-01 -n de.alt.*:de.test -s + groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums -Show that report for the year of 2010 in I format: - - groupstats -p 2010-01:2010-12 -o pretty - -Only show newsgroups with less than 30 postings last month, ordered +Only show newsgroups with 30 postings or less last month, ordered by number of postings, descending, in I format: - groupstats -iqdt 30 -o pretty + groupstats --upper 30 --order-by postings-desc -Show top 10 for the first half-year of of 2010 in I format: +Show the total of all postings for the year of 2010 for all groups that +had 30 postings or less in every single month in that year, ordered by +number of postings in descending order: - groupstats -p 2010-01:2010-06 -b 10 -o pretty + groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc -Report all groups that had less than 30 postings every singele month -in the year of 2010 (I format is forced) +The same for the average number of postings in the year of 2010: + + groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc + +List number of postings per group for eacht month of 2010 and redirect +output to one file for each month, namend stats-2010-01 and so on, in +machine-readable form (without formatting): + + groupstats -m 2010-01:2010-12 -f dump --filetemplate stats - groupstats -p 2010-01:2010-12 -il 30 =head1 FILES @@ -507,7 +635,7 @@ Library functions for the NewsStats package. =item F -Runtime configuration file for B. +Runtime configuration file. =back @@ -522,11 +650,11 @@ bug tracker at L! =item - -doc/README +L =item - -doc/INSTALL +l>doc/INSTALL> =item - @@ -542,7 +670,7 @@ Thomas Hochstein =head1 COPYRIGHT AND LICENSE -Copyright (c) 2010 Thomas Hochstein +Copyright (c) 2010-2012 Thomas Hochstein This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. From 880c3eb2270c235048aedfa6becfc0bc0e7b8ed8 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 7 May 2012 20:29:25 +0200 Subject: [PATCH 02/26] Adapt gatherstats.pl to new coding style. * Switch to Getopt::Long, change coding style; limit line length. * Replace 'die' and 'warn' by calls to &Bleat(). * Completely changed options due to new GetOpt::Long processing. - merged -m/-p into --month * Adapt to changes in NewsStats.pm * Redo documentation. * Update TODO. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 14 ++-- doc/TODO | 2 +- gatherstats.pl | 212 +++++++++++++++++++++++++++---------------------- 3 files changed, 127 insertions(+), 101 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index a04ac0f..bdfdcf5 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -343,13 +343,13 @@ sub SplitPeriod { sub ListMonth { ################################################################################ ### return a list of months (YYYY-MM) between start and end month -### IN : $StartMonth, $EndMonth -### OUT: @Months: array containing all months from $StartMonth to $EndMonth - my ($StartMonth, $EndMonth) = @_; - return (undef,undef) - if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/); - # return if $StartMonth = $EndMonth - return ($StartMonth) if ($StartMonth eq $EndMonth); +### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') +### OUT: @Months: array containing all months from $MonthExpression enumerated + my ($MonthExpression )= @_; + # return if single month + return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); + # parse $MonthExpression + my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); # set $Year, $Month from $StartMonth my ($Year, $Month) = split /-/, $StartMonth; # define @Months diff --git a/doc/TODO b/doc/TODO index ce6f795..a30cdcd 100644 --- a/doc/TODO +++ b/doc/TODO @@ -42,6 +42,7 @@ Bug numbers refer to the Mantis issue tracker at . names - would be nice. + install/install.pl + - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Add / enhance / test error handling - General tests and optimisations @@ -54,7 +55,6 @@ Bug numbers refer to the Mantis issue tracker at . - General tests and optimisations + gatherstats.pl - - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Use hierarchy information (see GroupInfo above) - Add gathering of other stats (clients, hosts, ...) - better modularisation (code reuse for other reports!) diff --git a/gatherstats.pl b/gatherstats.pl index e9ae0f8..b570cd8 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -7,7 +7,7 @@ # # It is part of the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -22,37 +22,56 @@ use strict; use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList); use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); ################################# Definitions ################################## # define types of information that can be gathered # all / groups (/ clients / hosts) -my %LegalTypes; -@LegalTypes{('all','groups')} = (); +my %LegalStats; +@LegalStats{('all','groups')} = (); ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('dom:p:t:l:n:r:g:c:s:'); +my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, + $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest); +GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, + 'clientsdb=s' => \$OptClientsDB, + 'd|debug!' => \$OptDebug, + 'groupsdb=s' => \$OptGroupsDB, + 'hierarchy=s' => \$OptTLH, + 'hostsdb=s' => \$OptHostsDB, + 'm|month=s' => \$OptMonth, + 'rawdb=s' => \$OptRawDB, + 's|stats=s' => \$OptStatsType, + 't|test!' => \$OptTest, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; ### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; +my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ### override configuration via commandline options my %ConfOverride; -$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; -$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; -$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; -$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; +$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; +$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; +$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; +$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB; +$ConfOverride{'TLH'} = $OptTLH if $OptTLH; &OverrideConfig(\%Conf,\%ConfOverride); ### get type of information to gather, defaulting to 'all' -$Options{'t'} = 'all' if !$Options{'t'}; -die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); +$OptStatsType = 'all' if !$OptStatsType; +&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType)) + if !exists($LegalStats{$OptStatsType}); -### get time period (-m or -p) -my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); +### get time period from --month +# get verbal description of time period, drop SQL code +my ($Period) = &GetTimePeriod($OptMonth); +&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ". + "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time'); ### reformat $Conf{'TLH'} my $TLH; @@ -67,45 +86,55 @@ if ($Conf{'TLH'}) { # strip whitespace $TLH =~ s/\s//g; # check for illegal characters - die "$MySelf: E: Config error - illegal characters in TLH definition\n" if ($TLH !~ /^[a-zA-Z0-9:]+$/); + &Bleat(2,'Config error - illegal characters in TLH definition!') + if ($TLH !~ /^[a-zA-Z0-9:]+$/); if ($TLH =~ /:/) { - # reformat $TLH form a:b to (a)|(b) + # reformat $TLH from a:b to (a)|(b), + # e.g. replace '.' by '|' $TLH =~ s/:/)|(/g; $TLH = '(' . $TLH . ')'; }; }; -### read newsgroups list from -l -my %ValidGroups = %{&ReadGroupList($Options{'l'})} if $Options{'l'}; +# read list of newsgroups from --checkgroups +# into a hash +my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile; ### init database my $DBHandle = InitDB(\%Conf,1); ### get data for each month -warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; -foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { +&Bleat(1,'Test mode. Database is not updated.') if $OptTest; +foreach my $Month (&ListMonth($Period)) { - print "---------- $Month ----------\n" if $Options{'d'}; + print "---------- $Month ----------\n" if $OptDebug; - if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { + if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') { ### ---------------------------------------------- ### get groups data (number of postings per group) # get groups data from raw table for given month - my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); + my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ". + "WHERE day LIKE ? AND NOT disregard", + $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'})); + $DBQuery->execute($Month.'-%') + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". + "$DBI::errstr\n",$Month, + $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); # count postings per group my %Postings; while (($_) = $DBQuery->fetchrow_array) { # get list oft newsgroups and hierarchies from Newsgroups: - my %Newsgroups = ListNewsgroups($_,$TLH,$Options{'l'} ? \%ValidGroups : ''); + my %Newsgroups = ListNewsgroups($_,$TLH, + $OptCheckgroupsFile ? \%ValidGroups : ''); # count each newsgroup and hierarchy once foreach (sort keys %Newsgroups) { $Postings{$_}++; }; }; - # add valid but empty groups if -l is set + # add valid but empty groups if --checkgroups is set if (%ValidGroups) { foreach (sort keys %ValidGroups) { if (!defined($Postings{$_})) { @@ -116,19 +145,29 @@ foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { }; # delete old data for that month - if (!$Options{'o'}) { - $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),undef,$Month) - or warn sprintf("$MySelf: E: Can't delete old groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + if (!$OptTest) { + $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}), + undef,$Month) + or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ". + "$DBI::errstr\n",$Month, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); }; - print "----- GroupStats -----\n" if $Options{'d'}; + print "----- GroupStats -----\n" if $OptDebug; foreach my $Newsgroup (sort keys %Postings) { - print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; - if (!$Options{'o'}) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug; + if (!$OptTest) { # write to database - $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - # $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ". + "(month,newsgroup,postings) ". + "VALUES (?, ?, ?)", + $Conf{'DBDatabase'}, + $Conf{'DBTableGrps'})); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) + or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ". + "$DBI::errstr\n",$Month,$Newsgroup, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); $DBQuery->finish; }; }; @@ -150,43 +189,31 @@ gatherstats - process statistical data from a raw source =head1 SYNOPSIS -B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-l> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] =head1 REQUIREMENTS -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back +See L. =head1 DESCRIPTION This script will extract and process statistical information from a database table which is fed from F for a given time period and write its results to (an)other database table(s). Entries marked -with I<'disregard'> in the database will be ignored; currently, you have -to set this flag yourself, using your database management tools. You -can exclude erroneous entries that way (e.g. automatic reposts (think -of cancels flood and resurrectors); spam; ...). +with I<'disregard'> in the database will be ignored; currently, you +have to set this flag yourself, using your database management tools. +You can exclude erroneous entries that way (e.g. automatic reposts +(think of cancels flood and resurrectors); spam; ...). The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. +another time period or a single month via the B<--month> option (see +below). By default B will process all types of information; you -can change that using the B<-t> switch and assigning the type of -information to process. Currently only processing of the number of -postings per group per month is implemented anyway, so that doesn't -matter yet. +can change that using the B<--stats> option and assigning the type of +information to process. Currently that doesn't matter yet as only +processing of the number of postings per group per month is +implemented anyway. Possible information types include: @@ -205,59 +232,58 @@ respectively. A crossposting to de.alt.test and de.alt.admin, on the other hand, will be counted for de.alt.test and de.alt.admin each, but only once for de.alt.ALL and de.ALL. -Data is written to I (see doc/INSTALL). +Data is written to I (see L); you can +override that default through the B<--groupsdb> option. =back =head2 Configuration -F will read its configuration from F +B will read its configuration from F which should be present in the same directory via Config::Auto. -See doc/INSTALL for an overview of possible configuration options. +See L for an overview of possible configuration options. -You can override configuration options via the B<-n>, B<-r>, B<-g>, -B<-c> and B<-s> switches, respectively. +You can override configuration options via the B<--hierarchy>, +B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, +respectively. =head1 OPTIONS =over 3 -=item B<-V> (version) +=item B<-V>, B<--version> -Print out version and copyright information on B and exit. +Print out version and copyright information and exit. -=item B<-h> (help) +=item B<-h>, B<--help> Print this man page and exit. -=item B<-d> (debug) +=item B<-d>, B<--debug> Output debugging information to STDOUT while processing (number of postings per group). -=item B<-o> (output only) +=item B<-t>, B<--test> -Do not write results to database. You should use B<-d> in conjunction -with B<-o> ... everything else seems a bit pointless. +Do not write results to database. You should use B<--debug> in +conjunction with B<--test> ... everything else seems a bit pointless. -=item B<-m> I (month) +=item B<-m>, B<--month> I -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -is set. +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). -=item B<-p> I (period) -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. - -=item B<-t> I (type) +=item B<-s>, B<--stats> I Set processing type to one of I and I. Defaults to all (and is currently rather pointless as only I has been implemented). -=item B<-l> I (check against list) +=item B<-c>, B<--checkgroups> I Check each group against a list of valid newsgroups read from I, one group on each line and ignoring everything after the @@ -268,23 +294,23 @@ Newsgroups not found in I will be dropped (and logged to STDERR), and newsgroups found in I but having no postings will be added with a count of 0 (and logged to STDERR). -=item B<-n> I (newsgroup hierarchy) +=item B<--hierarchy> I (newsgroup hierarchy) Override I from F. -=item B<-r> I
(raw data table) +=item B<--rawdb> I
(raw data table) Override I from F. -=item B<-g> I
(postings per group table) +=item B<--groupsdb> I
(postings per group table) Override I from F. -=item B<-c> I
(client data table) +=item B<--clientsdb> I
(client data table) Override I from F. -=item B<-s> I
(server/host data table) +=item B<--hostsdb> I
(host data table) Override I from F. @@ -292,7 +318,7 @@ Override I from F. =head1 INSTALLATION -See doc/INSTALL. +See L. =head1 EXAMPLES @@ -302,16 +328,16 @@ Process all types of information for lasth month: Do a dry run, showing results of processing: - gatherstats -do + gatherstats --debug --test Process all types of information for January of 2010: - gatherstats -m 2010-01 + gatherstats --month 2010-01 Process only number of postings for the year of 2010, checking against checkgroups-2010.txt: - gatherstats -p 2010-01:2010-12 -t groups -l checkgroups-2010.txt + gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt =head1 FILES @@ -327,7 +353,7 @@ Library functions for the NewsStats package. =item F -Runtime configuration file for B. +Runtime configuration file. =back @@ -342,11 +368,11 @@ bug tracker at L! =item - -doc/README +L =item - -doc/INSTALL +L =back @@ -358,7 +384,7 @@ Thomas Hochstein =head1 COPYRIGHT AND LICENSE -Copyright (c) 2010 Thomas Hochstein +Copyright (c) 2010-2012 Thomas Hochstein This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. From f05ec4aeaa9fd6507c2d9da53c356316b28705db Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 27 May 2012 14:30:14 +0200 Subject: [PATCH 03/26] Adapt feedlog.pl to new coding style. * Switch to Getopt::Long, change coding style; limit line length. * Completely changed options due to new GetOpt::Long processing. * Adapt to changes in NewsStats.pm * Redo documentation. * Update TODO. Signed-off-by: Thomas Hochstein --- doc/TODO | 1 - feedlog.pl | 76 ++++++++++++++++++++++++++---------------------------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/doc/TODO b/doc/TODO index a30cdcd..6af50ca 100644 --- a/doc/TODO +++ b/doc/TODO @@ -47,7 +47,6 @@ Bug numbers refer to the Mantis issue tracker at . - General tests and optimisations + feedlog.pl - - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Gracefully handle interruptions to database connection, at least log lost postings (Bug #30). - Buffer output that cannot be written to database (Bug #31). diff --git a/feedlog.pl b/feedlog.pl index 61311ac..1752758 100755 --- a/feedlog.pl +++ b/feedlog.pl @@ -7,7 +7,7 @@ # # It is part of the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -25,18 +25,24 @@ use Sys::Syslog qw(:standard :macros); use Date::Format; use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('qd'); +my ($OptDebug,$OptQuiet); +GetOptions ('d|debug!' => \$OptDebug, + 'q|test!' => \$OptQuiet, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; ### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; +my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ### init syslog -openlog($MySelf, 'nofatal,pid', LOG_NEWS); -syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'}; +openlog($0, 'nofatal,pid', LOG_NEWS); +syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet; ### init database my $DBHandle = InitDB(\%Conf,0); @@ -44,7 +50,11 @@ if (!$DBHandle) { syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); while (1) {}; # go into endless loop to suppress further errors and respawning }; -my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); +my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid, + timestamp,token,size,peer,path, + newsgroups,headers) + VALUES (?,?,?,?,?,?,?,?,?,?)", + $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); ### main loop while (<>) { @@ -72,17 +82,21 @@ while (<>) { my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); # write to database - if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, + $Path, $Newsgroups, $Headers)) { syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); }; $DBQuery->finish; - warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if $Options{'d'}; + warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n". + "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n", + $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, + $Newsgroups, $Headers) if $OptDebug; } ### close handles $DBHandle->disconnect; -syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'}; +syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet; closelog(); __END__ @@ -99,23 +113,7 @@ B [B<-Vhdq>] =head1 REQUIREMENTS -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -Date::Format - -=item - - -DBI - -=back +See L. =head1 DESCRIPTION @@ -131,29 +129,29 @@ terminating would only result in a rapid respawn. =head2 Configuration -F will read its configuration from F which +B will read its configuration from F which should be present in the same directory via Config::Auto. -See doc/INSTALL for an overview of possible configuration options. +See L for an overview of possible configuration options. =head1 OPTIONS =over 3 -=item B<-V> (version) +=item B<-V>, B<--version> -Print out version and copyright information on B and exit. +Print out version and copyright information and exit. -=item B<-h> (help) +=item B<-h>, B<--help> Print this man page and exit. -=item B<-d> (debug) +=item B<-d>, B<--debug> Output debugging information to STDERR while parsing STDIN. You'll find that information most probably in your B F file. -=item B<-q> (quiet) +=item B<-q>, B<--quiet> Suppress logging to syslog. @@ -161,7 +159,7 @@ Suppress logging to syslog. =head1 INSTALLATION -See doc/INSTALL. +See L =head1 EXAMPLES @@ -172,7 +170,7 @@ Set up a feed like that in your B F file: :!*,de.* :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl -See doc/INSTALL for further information. +See L for further information. =head1 FILES @@ -188,7 +186,7 @@ Library functions for the NewsStats package. =item F -Runtime configuration file for B. +Runtime configuration file. =back @@ -203,11 +201,11 @@ bug tracker at L! =item - -doc/README +L =item - -doc/INSTALL +L =back @@ -219,7 +217,7 @@ Thomas Hochstein =head1 COPYRIGHT AND LICENSE -Copyright (c) 2010 Thomas Hochstein +Copyright (c) 2010-2012 Thomas Hochstein This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. From 1fa947994652e526566d286e1527e2b58860cc83 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 27 May 2012 14:32:58 +0200 Subject: [PATCH 04/26] Adapt install.pl to new coding style. * Switch to Getopt::Long, change coding style; limit line length. * Replace 'die' and 'warn' by calls to &Bleat(). * Completely changed options due to new GetOpt::Long processing. * Adapt to changes in NewsStats.pm * Redo documentation. * Update TODO. Signed-off-by: Thomas Hochstein --- doc/TODO | 1 - install/install.pl | 72 ++++++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/doc/TODO b/doc/TODO index 6af50ca..7d6e794 100644 --- a/doc/TODO +++ b/doc/TODO @@ -42,7 +42,6 @@ Bug numbers refer to the Mantis issue tracker at . names - would be nice. + install/install.pl - - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Add / enhance / test error handling - General tests and optimisations diff --git a/install/install.pl b/install/install.pl index b998280..61e8ef1 100755 --- a/install/install.pl +++ b/install/install.pl @@ -6,7 +6,7 @@ # # It is part of the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -24,11 +24,16 @@ use NewsStats qw(:DEFAULT); use Cwd; use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('u:'); +my ($OptUpdate); +GetOptions ('u|update=s' => \$OptUpdate, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; ### change working directory to .. (as we're in .../install) chdir dirname($0).'/..'; @@ -36,7 +41,7 @@ my $Path = cwd(); ### read configuration print("Reading configuration.\n"); -my %Conf = %{ReadConfig('newsstats.conf')}; +my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ##### -------------------------------------------------------------------------- ##### Database table definitions @@ -131,7 +136,7 @@ INSTALL my $Upgrade = <table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; +my %TablesInDB = + %{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; -if (!$Options{'u'}) { +if (!$OptUpdate) { ##### installation mode print "----------\nStarting database table generation.\n"; # check for tables and create them, if they don't exist yet @@ -162,8 +168,8 @@ if (!$Options{'u'}) { ##### upgrade mode print "----------\nStarting upgrade process.\n"; $PackageVersion = '0.03'; - if ($Options{'u'} < $PackageVersion) { - if ($Options{'u'} < 0.02) { + if ($OptUpdate < $PackageVersion) { + if ($OptUpdate < 0.02) { # 0.01 -> 0.02 # &DoMySQL('...;'); # print "v0.02: Database upgrades ...\n"; @@ -185,19 +191,23 @@ exit(0); sub CreateTable { my $Table = shift; if (defined($TablesInDB{$Conf{$Table}})) { - printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table}); + printf("Database table %s.%s already exists, skipping ....\n", + $Conf{'DBDatabase'},$Conf{$Table}); return; }; my $DBQuery = $DBHandle->prepare($DBCreate{$Table}); - $DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr); - printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table}); + $DBQuery->execute() or + &Bleat(2, sprintf("Can't create table %s in database %s: %s%\n",$Table, + $Conf{'DBDatabase'},$DBI::errstr)); + printf("Database table %s.%s created succesfully.\n", + $Conf{'DBDatabase'},$Conf{$Table}); return; }; sub DoMySQL { my $SQL = shift; my $DBQuery = $DBHandle->prepare($SQL); - $DBQuery->execute() or warn sprintf("$MySelf: E: Database error: %s\n",$DBI::errstr); + $DBQuery->execute() or &Bleat(1, sprintf("Database error: %s\n",$DBI::errstr)); return; }; @@ -221,23 +231,11 @@ install - installation script =head1 SYNOPSIS -B [B<-Vh>] +B [B<-Vh> [--update I] =head1 REQUIREMENTS -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back +See L. =head1 DESCRIPTION @@ -245,23 +243,27 @@ This script will create database tables as necessary and configured. =head2 Configuration -F will read its configuration from F via +B will read its configuration from F via Config::Auto. -See doc/INSTALL for an overview of possible configuration options. +See L for an overview of possible configuration options. =head1 OPTIONS =over 3 -=item B<-V> (version) +=item B<-V>, B<--version> -Print out version and copyright information on B and exit. +Print out version and copyright information and exit. -=item B<-h> (help) +=item B<-h>, B<--help> Print this man page and exit. +=item B<-u>, B<--update> I + +Don't do a fresh install, but update from I. + =back =head1 FILES @@ -278,7 +280,7 @@ Library functions for the NewsStats package. =item F -Runtime configuration file for B. +Runtime configuration file. =back @@ -293,11 +295,11 @@ bug tracker at L! =item - -doc/README +L =item - -doc/INSTALL +L =back @@ -309,7 +311,7 @@ Thomas Hochstein =head1 COPYRIGHT AND LICENSE -Copyright (c) 2010 Thomas Hochstein +Copyright (c) 2010-2012 Thomas Hochstein This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. From 43a0fc776902f3a7b3ea019e94b67cb7e4500039 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 27 May 2012 13:56:06 +0200 Subject: [PATCH 05/26] Fix parsing of more than one TLH in config. The code introduced in 17ffbebad562acd3af71328cdbf187297b5a9e6d did not check the correct variable for being an array. Improve an unrelated comment, too. Signed-off-by: Thomas Hochstein --- gatherstats.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatherstats.pl b/gatherstats.pl index b570cd8..d2d4faa 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -78,7 +78,7 @@ my $TLH; if ($Conf{'TLH'}) { # $Conf{'TLH'} is parsed as an array by Config::Auto; # make a flat list again, separated by : - if (ref($TLH) eq 'ARRAY') { + if (ref($Conf{'TLH'}) eq 'ARRAY') { $TLH = join(':',@{$Conf{'TLH'}}); } else { $TLH = $Conf{'TLH'}; @@ -90,7 +90,7 @@ if ($Conf{'TLH'}) { if ($TLH !~ /^[a-zA-Z0-9:]+$/); if ($TLH =~ /:/) { # reformat $TLH from a:b to (a)|(b), - # e.g. replace '.' by '|' + # e.g. replace ':' by ')|(' $TLH =~ s/:/)|(/g; $TLH = '(' . $TLH . ')'; }; From 7773fb6d8f26f2d2331a4bb394ed164160a9a210 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 27 May 2012 13:58:32 +0200 Subject: [PATCH 06/26] Match TLHs correctly, not only partially. The TLH was checked to match the beginning of the newsgroup name, not the whole TLH part. So the TLH "de" would match not only "de.test", but also "denver.test", which was not the desired outcome. Signed-off-by: Thomas Hochstein --- gatherstats.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gatherstats.pl b/gatherstats.pl index d2d4faa..6e41485 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -85,9 +85,15 @@ if ($Conf{'TLH'}) { } # strip whitespace $TLH =~ s/\s//g; + # add trailing dots if none are present yet + # (using negative look-behind assertions) + $TLH =~ s/(? Date: Sun, 27 May 2012 14:00:14 +0200 Subject: [PATCH 07/26] Allow more characters in TLH definitions. TLH may now also contain literal dots '.', allowing for using second or third level hierarchies as "TLH". To faciliate that, '+' and '-' will be allowed, too. Signed-off-by: Thomas Hochstein --- gatherstats.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatherstats.pl b/gatherstats.pl index 6e41485..0a5a9a2 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -91,7 +91,7 @@ if ($Conf{'TLH'}) { $TLH =~ s/(? Date: Sun, 27 May 2012 15:31:49 +0200 Subject: [PATCH 08/26] Remove call to &Bleat where not appropriate. Some warn()ings are used for debugging purposes. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NewsStats.pm b/NewsStats.pm index bdfdcf5..f0a4066 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -184,7 +184,7 @@ sub ListNewsgroups { next if($TLH and !/^$TLH/); # don't count invalid newsgroups if(%ValidGroups and !defined($ValidGroups{$_})) { - &Bleat(1,sprintf("DROPPED: %s",$_)); + warn (sprintf("DROPPED: %s\n",$_)); next; } # add original newsgroup to %Newsgroups From 7662b1065e85874ceee234701d93aa50a1cde408 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 27 May 2012 15:33:11 +0200 Subject: [PATCH 09/26] Be more fault-tolerant when reading checkgroups. * Accept lines starting with whitespace. * Drop empty "groups", i.e. lines containing only whitespace. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NewsStats.pm b/NewsStats.pm index f0a4066..428719e 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -230,8 +230,9 @@ sub ReadGroupList { my %ValidGroups; open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); while (<$LIST>) { - s/^(\S+).*$/$1/; + s/^\s*(\S+).*$/$1/; chomp; + next if /^$/; $ValidGroups{$_} = '1'; }; close $LIST; From 93c8eae2edcb7ccb4a3fe25817908fdb0723f324 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 27 May 2012 15:53:29 +0200 Subject: [PATCH 10/26] Change interpretation of --checkgroups to template In most hierarchies, the list of valid newsgroups will change over time, so you'll have to use another checkgroups file for each month. gatherstats will now understand the value of --checkgroups to be a template and amend it with each month it is processing. Documentation changed accordingly. Signed-off-by: Thomas Hochstein --- gatherstats.pl | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/gatherstats.pl b/gatherstats.pl index 0a5a9a2..2bc426b 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -102,10 +102,6 @@ if ($Conf{'TLH'}) { }; }; -# read list of newsgroups from --checkgroups -# into a hash -my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile; - ### init database my $DBHandle = InitDB(\%Conf,1); @@ -116,6 +112,11 @@ foreach my $Month (&ListMonth($Period)) { print "---------- $Month ----------\n" if $OptDebug; if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') { + # read list of newsgroups from --checkgroups + # into a hash + my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))} + if $OptCheckgroupsFile; + ### ---------------------------------------------- ### get groups data (number of postings per group) # get groups data from raw table for given month @@ -195,7 +196,7 @@ gatherstats - process statistical data from a raw source =head1 SYNOPSIS -B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] =head1 REQUIREMENTS @@ -289,15 +290,23 @@ Set processing type to one of I and I. Defaults to all (and is currently rather pointless as only I has been implemented). -=item B<-c>, B<--checkgroups> I +=item B<-c>, B<--checkgroups> I -Check each group against a list of valid newsgroups read from -I, one group on each line and ignoring everything after the -first whitespace (so you can use a file in checkgroups format or (part -of) your INN active file). +Check each group against a list of valid newsgroups read from a file, +one group on each line and ignoring everything after the first +whitespace (so you can use a file in checkgroups format or (part of) +your INN active file). -Newsgroups not found in I will be dropped (and logged to -STDERR), and newsgroups found in I but having no postings +The filename is taken from I, amended by each B<-- +month> B is processing, so that + + gatherstats -m 2010-01:2010-12 -c checkgroups + +will check against F for January 2010, against +F for February 2010 and so on. + +Newsgroups not found in the checkgroups file will be dropped (and +logged to STDERR), and newsgroups found there but having no postings will be added with a count of 0 (and logged to STDERR). =item B<--hierarchy> I (newsgroup hierarchy) @@ -341,9 +350,9 @@ Process all types of information for January of 2010: gatherstats --month 2010-01 Process only number of postings for the year of 2010, -checking against checkgroups-2010.txt: +checking against checkgroups-*: - gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt + gatherstats -m 2010-01:2010-12 -s groups -c checkgroups =head1 FILES From b5125b1099cf5cf12beb0520d5896b9a1d7850ae Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 11 Aug 2013 01:47:32 +0200 Subject: [PATCH 11/26] Add empty 'virtual' .ALL hierarchies as needed. When using a --checkgroups file while tabulating, valid but empty groups will be added with a posting count of zero as needed. If all groups in a sub-hierarchy are empty, the virtual '.ALL' group for that sub-hierarchy was not created, though. If local.test.dummy and local.test.binary were both empty, both groups were added with a posting count of '0', but local.test.ALL was not. Now we loop through all hierarchy elements using ParseHierarchies and add empty .ALL hierarchies as needed. Fixes #49. Also fixing a typo in some comment. :-) Signed-off-by: Thomas Hochstein --- gatherstats.pl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/gatherstats.pl b/gatherstats.pl index 2bc426b..ae7d65d 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -132,7 +132,7 @@ foreach my $Month (&ListMonth($Period)) { # count postings per group my %Postings; while (($_) = $DBQuery->fetchrow_array) { - # get list oft newsgroups and hierarchies from Newsgroups: + # get list of newsgroups and hierarchies from Newsgroups: my %Newsgroups = ListNewsgroups($_,$TLH, $OptCheckgroupsFile ? \%ValidGroups : ''); # count each newsgroup and hierarchy once @@ -145,12 +145,19 @@ foreach my $Month (&ListMonth($Period)) { if (%ValidGroups) { foreach (sort keys %ValidGroups) { if (!defined($Postings{$_})) { - $Postings{$_} = 0 ; - warn (sprintf("ADDED: %s as empty group\n",$_)); + # expand newsgroup with hierarchies + my @Newsgroups = ParseHierarchies($_); + # add each empty newsgroup and empty hierarchies, too, as needed + foreach (@Newsgroups) { + if (!defined($Postings{$_})) { + $Postings{$_} = 0; + warn (sprintf("ADDED: %s as empty group\n",$_)); + }; + }; } }; }; - + # delete old data for that month if (!$OptTest) { $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", From 98563c619eeb66fee046c6acb3dfc0f14c442e8d Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sat, 13 Oct 2012 01:27:54 +0200 Subject: [PATCH 12/26] Refactor database initialisation in feedlog.pl. * Move database initialisation to a separate function. * (Re-)try to connect every five seconds (instead of going into an endless loop) and log successful (re-)connections. * Log postings that are dropped due to database failures to syslog (Message-ID) for recovery. * If the connection to the database is lost, try to recover it (every five seconds) and try again to write the pending data. * Input will be buffered automatically by INN until feedlog is able to process it (see man 5 newsfeeds). Fixes #30, #31. Signed-off-by: Thomas Hochstein --- doc/TODO | 3 --- feedlog.pl | 63 ++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/doc/TODO b/doc/TODO index 7d6e794..6f1a61b 100644 --- a/doc/TODO +++ b/doc/TODO @@ -46,9 +46,6 @@ Bug numbers refer to the Mantis issue tracker at . - General tests and optimisations + feedlog.pl - - Gracefully handle interruptions to database connection, at least log - lost postings (Bug #30). - - Buffer output that cannot be written to database (Bug #31). - Add / enhance / test error handling - General tests and optimisations diff --git a/feedlog.pl b/feedlog.pl index 1752758..636de83 100755 --- a/feedlog.pl +++ b/feedlog.pl @@ -28,6 +28,42 @@ use DBI; use Getopt::Long qw(GetOptions); Getopt::Long::config ('bundling'); +################################# Subroutines ################################## + +sub PrepareDB { +### initialise database connection, prepare statement +### and catch errors +### IN : \%Conf : reference to configuration hash +### OUT: $DBHandle: database handle +### $DBQuery : prepared statement + our ($DBHandle, $DBQuery, $OptQuiet); + my ($ConfigR) = @_; + my %Conf = %$ConfigR; + # drop current database connection - hard, if necessary + if ($DBHandle) { + $DBHandle->disconnect; + undef $DBHandle; + }; + # connect to database; try again every 5 seconds + while (!$DBHandle) { + $DBHandle = InitDB($ConfigR,0); + if (!$DBHandle) { + syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); + sleep(5); + } else {; + syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet; + } + }; + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid, + timestamp,token,size,peer,path, + newsgroups,headers) + VALUES (?,?,?,?,?,?,?,?,?,?)", + $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'})); + return ($DBHandle,$DBQuery); +} + + ################################# Main program ################################# ### read commandline options @@ -45,16 +81,7 @@ openlog($0, 'nofatal,pid', LOG_NEWS); syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet; ### init database -my $DBHandle = InitDB(\%Conf,0); -if (!$DBHandle) { - syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); - while (1) {}; # go into endless loop to suppress further errors and respawning -}; -my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid, - timestamp,token,size,peer,path, - newsgroups,headers) - VALUES (?,?,?,?,?,?,?,?,?,?)", - $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); +my ($DBHandle,$DBQuery) = PrepareDB(\%Conf); ### main loop while (<>) { @@ -84,7 +111,21 @@ while (<>) { # write to database if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { - syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); + syslog(LOG_ERR, 'Database error %s while processing %s: %s', + $DBI::err, $Mid, $DBI::errstr); + # if "MySQL server has gone away", try to recover + if ($DBI::err == 2006) { + # try to reconnect to database + ($DBHandle,$DBQuery) = PrepareDB(\%Conf); + # try to repeat the write attempt as before + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, + $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, '%s was dropped and lost.',$Mid); + }; + # otherwise log missing posting + } else { + syslog(LOG_ERR, '%s was dropped and lost.',$Mid); + }; }; $DBQuery->finish; From da6fc073eed91506a94ae96471b19b6f22b72cb0 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Wed, 12 Jan 2011 16:39:10 +0100 Subject: [PATCH 13/26] Handle undefined previous version when installing. $OptUpdate is undefined when not upgrading, so don't prepare an upgrade notice to avoid calling an undefined variable. Signed-off-by: Thomas Hochstein --- install/install.pl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/install/install.pl b/install/install.pl index 61e8ef1..8ab7798 100755 --- a/install/install.pl +++ b/install/install.pl @@ -134,7 +134,9 @@ Enjoy! -thh INSTALL -my $Upgrade = < Date: Wed, 12 Jan 2011 16:41:21 +0100 Subject: [PATCH 14/26] Add database creation to installer. It's not enough to create tables, one should create the database first if it is still missing ... Signed-off-by: Thomas Hochstein --- install/install.pl | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/install/install.pl b/install/install.pl index 8ab7798..374d74b 100755 --- a/install/install.pl +++ b/install/install.pl @@ -47,6 +47,10 @@ my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ##### Database table definitions ##### -------------------------------------------------------------------------- +my $DBCreate = < < <connect(sprintf('DBI:%s:host=%s',$Conf{'DBDriver'}, + $Conf{'DBHost'}), $Conf{'DBUser'}, + $Conf{'DBPw'}, { PrintError => 0 }); + my $DBQuery = $DBHandle->prepare($DBCreate); + $DBQuery->execute() or &Bleat(2, sprintf("Can't create database %s: %s%\n", + $Conf{'DBDatabase'}, $DBI::errstr)); + + printf("Database table %s created succesfully.\n",$Conf{'DBDatabase'}); + $DBHandle->disconnect; +}; + ### DB init, read list of tables print "Reading database information.\n"; my $DBHandle = InitDB(\%Conf,1); @@ -158,7 +179,6 @@ my %TablesInDB = if (!$OptUpdate) { ##### installation mode - print "----------\nStarting database table generation.\n"; # check for tables and create them, if they don't exist yet foreach my $Table (keys %DBCreate) { &CreateTable($Table); From 7b310df13f064955ef771d9657997dedd2498cfb Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 11 Aug 2013 22:51:11 +0200 Subject: [PATCH 15/26] Add some TODO entries. Signed-off-by: Thomas Hochstein --- doc/TODO | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/TODO b/doc/TODO index 6f1a61b..36b6b50 100644 --- a/doc/TODO +++ b/doc/TODO @@ -12,6 +12,10 @@ Bug numbers refer to the Mantis issue tracker at . - Add a test suite There is currently no kind of test suite or regression tests. Something like that is badly needed. + - separate directories (bin, config, ...) + - make NewsStats installable in accordance with LFS and customary procedures, + i.e. scripts to /usr/local/news/bin or /usr/lib/news/bin, configuration + to /usr/local/news/etc or /etc/news and so on * Additional features - Add hierarchy information (GroupInfo - Bugs #19 #20 #21 #22 #23 #24 #25 #26) @@ -42,6 +46,8 @@ Bug numbers refer to the Mantis issue tracker at . names - would be nice. + install/install.pl + - Check installer & fix bugs to get a working version ... finally + - Read current version from a file dropped and updated by installer - Add / enhance / test error handling - General tests and optimisations From 1703b8e3b454713d4f873acdb5cb400db490a152 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 1 Sep 2013 17:52:21 +0200 Subject: [PATCH 16/26] Get empty 'virtual' hierarchies working. Commit b5125b1099cf5cf12beb0520d5896b9a1d7850ae was broken. We didn't add empty .ALL hierarchies as needed; we added empty (non-existant) hierarchies without appended '.ALL', and didn't add the original empty group we started with. (What's more, gatherstats didn't even start any more due to missing ex- and import of &ParseHierarchies from NewsStats.pm.) Fixes #52 (and some more breakage). Signed-off-by: Thomas Hochstein --- NewsStats.pm | 1 + gatherstats.pl | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index 428719e..bfcb37b 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -33,6 +33,7 @@ require Exporter; SplitPeriod ListMonth ListNewsgroups + ParseHierarchies ReadGroupList OutputData FormatOutput diff --git a/gatherstats.pl b/gatherstats.pl index ae7d65d..4c539f1 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -19,7 +19,7 @@ BEGIN { } use strict; -use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList); +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList); use DBI; use Getopt::Long qw(GetOptions); @@ -145,13 +145,15 @@ foreach my $Month (&ListMonth($Period)) { if (%ValidGroups) { foreach (sort keys %ValidGroups) { if (!defined($Postings{$_})) { - # expand newsgroup with hierarchies - my @Newsgroups = ParseHierarchies($_); - # add each empty newsgroup and empty hierarchies, too, as needed - foreach (@Newsgroups) { - if (!defined($Postings{$_})) { - $Postings{$_} = 0; - warn (sprintf("ADDED: %s as empty group\n",$_)); + # add current newsgroup as empty group + $Postings{$_} = 0; + warn (sprintf("ADDED: %s as empty group\n",$_)); + # add empty hierarchies for current newsgroup as needed + foreach (ParseHierarchies($_)) { + my $Hierarchy = $_ . '.ALL'; + if (!defined($Postings{$Hierarchy})) { + $Postings{$Hierarchy} = 0; + warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); }; }; } From 512781bd9259ec07dd7f2958b0074f2f49573970 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 2 Sep 2013 06:44:32 +0200 Subject: [PATCH 17/26] Fix config path detection for install.pl. Make use of $Path - which is set and checked to display a correct 'newsfeeds' example - to load our configuration from the correct location. Signed-off-by: Thomas Hochstein --- install/install.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install/install.pl b/install/install.pl index 374d74b..4069bf5 100755 --- a/install/install.pl +++ b/install/install.pl @@ -36,12 +36,12 @@ GetOptions ('u|update=s' => \$OptUpdate, 'V|version' => \&ShowVersion) or exit 1; ### change working directory to .. (as we're in .../install) -chdir dirname($0).'/..'; +chdir dirname($FullPath).'/..'; my $Path = cwd(); ### read configuration print("Reading configuration.\n"); -my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; +my %Conf = %{ReadConfig($Path.'/newsstats.conf')}; ##### -------------------------------------------------------------------------- ##### Database table definitions From 10459ac8c7e23e8f594209091e9c92bcf1e57df7 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 2 Sep 2013 08:55:13 +0200 Subject: [PATCH 18/26] Create better newsgroup lists for SQL clause. Build a 'IN(...)' list for single newsgroup names without wildcards. Create SQL clause with a mix of wildcards and wildcard-less group names. More code for a better query ... Fixes #37. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index bfcb37b..b0094dc 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -576,14 +576,46 @@ sub SQLGroupList { $Newsgroups =~ s/\*/%/g; # just one newsgroup? return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; + my ($SQL,@WildcardGroups,@NoWildcardGroups); # list of newsgroups separated by ':' - my $SQL = '('; my @GroupList = split /:/, $Newsgroups; foreach (@GroupList) { - $SQL .= ' OR ' if $SQL gt '('; - $SQL .= SQLGroupWildcard($_); + if ($_ !~ /%/) { + # add to list of newsgroup names WITHOUT wildcard + push (@NoWildcardGroups,$_); + } else { + # add to list of newsgroup names WITH wildcard + push (@WildcardGroups,$_); + # add wildcard to SQL clause + # 'OR' if SQL clause is not empty + $SQL .= ' OR ' if $SQL; + $SQL .= 'newsgroup LIKE ?' + } }; - $SQL .= ')'; + if (scalar(@NoWildcardGroups)) { + # add 'OR' if SQL clause is not empty + $SQL .= ' OR ' if $SQL; + if (scalar(@NoWildcardGroups) < 2) { + # special case: just one newsgroup without wildcard + $SQL .= 'newsgroup = ?'; + } else { + # create list of newsgroups to include: 'newsgroup IN (...)' + $SQL .= 'newsgroup IN ('; + my $SQLin; + foreach (@NoWildcardGroups) { + $SQLin .= ',' if $SQLin; + $SQLin .= '?'; + } + # add list to SQL clause + $SQL .= $SQLin .= ')'; + } + } + # add brackets '()' to SQL clause as needed (more than one wildcard group) + if (scalar(@WildcardGroups)) { + $SQL = '(' . $SQL .')'; + } + # rebuild @GroupList in (now) correct order + @GroupList = (@WildcardGroups,@NoWildcardGroups); return ($SQL,@GroupList); }; From db7696e550695219f00b24b19020f0c163e8c171 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 2 Sep 2013 10:40:17 +0200 Subject: [PATCH 19/26] Add some basic validation to config parser. We check for empty mandatory options for starters. Fixes #13 ... so we can release RSN :) Signed-off-by: Thomas Hochstein --- NewsStats.pm | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/NewsStats.pm b/NewsStats.pm index bfcb37b..91a1396 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -99,7 +99,19 @@ sub ReadConfig { ### IN : $ConfFile: config filename ### OUT: reference to a hash containing the configuration my ($ConfFile) = @_; - return Config::Auto::parse($ConfFile, format => 'equal'); + # mandatory configuration options + my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', + 'DBTableRaw','DBTableGrps'); + # read config via Config::Auto + my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); + my %Conf = %{$ConfR}; + # check for mandatory options + foreach (@Mandatory) { + &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) + if (!defined($Conf{$_})); + } + # $Conf{'TLH'} is checked in gatherstats.pl + return $ConfR; }; ################################################################################ From c30822b48b896836735cee5144cbfa3c3703001d Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 2 Sep 2013 08:51:45 +0200 Subject: [PATCH 20/26] Check for invalid newsgroup names. Add check to SQLGroupList() and act on it in groupstats.pl. Issue #12. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 15 ++++++++++++++- groupstats.pl | 13 +++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index bfcb37b..fd4843b 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -574,6 +574,7 @@ sub SQLGroupList { my ($Newsgroups) = @_; # substitute '*' wildcard with SQL wildcard character '%' $Newsgroups =~ s/\*/%/g; + return (undef,undef) if !CheckValidNewsgroups($Newsgroups); # just one newsgroup? return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; # list of newsgroups separated by ':' @@ -595,7 +596,6 @@ sub SQLGroupWildcard { ### (group.name or group.name.%) ### OUT: SQL code to become part of a 'WHERE' clause my ($Newsgroup) = @_; - # FIXME: check for validity if ($Newsgroup !~ /%/) { return 'newsgroup = ?'; } else { @@ -698,6 +698,19 @@ sub SQLBuildClause { return $SQLClause; }; +#####--------------------------- Verifications ----------------------------##### + +################################################################################ +sub CheckValidNewsgroups { +################################################################################ +### syntax check of newgroup list +### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) +### OUT: boolean + my ($Newsgroups) = @_; + my $InvalidCharRegExp = ',; '; + return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; +}; + #####------------------------------- done ---------------------------------##### 1; diff --git a/groupstats.pl b/groupstats.pl index 14d19c6..8b93559 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -99,8 +99,13 @@ my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; # get list of newsgroups and set expression for SQL 'WHERE' clause # with placeholders as well as a list of newsgroup to bind to them -my ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups) - if $OptNewsgroups;; +my ($SQLWhereNewsgroups,@SQLBindNewsgroups); +if ($OptNewsgroups) { + ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups); + # bail out if --newsgroups is invalid + &Bleat(2,"--newsgroups option has an invalid format!") + if !$SQLWhereNewsgroups; +} ### build SQL WHERE clause (and HAVING clause, if needed) my ($SQLWhereClause,$SQLHavingClause); @@ -193,8 +198,8 @@ if ($OptBoundType and $OptBoundType ne 'default') { $DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', $SQLSelect, $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, - $SQLWhereClause,$SQLGroupClause,$ - SQLOrderClause)); + $SQLWhereClause,$SQLGroupClause, + $SQLOrderClause)); # execute query $DBQuery->execute(@SQLBindNewsgroups) From b342fcf030155e68748c09c3469f43d4559a6dfb Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 2 Sep 2013 10:13:35 +0200 Subject: [PATCH 21/26] Improve output padding. Take 'length' of numbers in account. Change GetMaxLength() accordingly and use that new information in FormatOutput(). Fixes #53. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 48 ++++++++++++++++++++++++++++++------------------ groupstats.pl | 9 +++++---- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index bfcb37b..cd059cf 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -382,16 +382,17 @@ sub OutputData { ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM ### $DBQuery : database query handle with executed query, ### containing $Month, $Key, $Value -### $PadGroup : padding length for key field (optional) for 'pretty' +### $PadField : padding length for key field (optional) for 'pretty' +### $PadValue : padding length for value field (optional) for 'pretty' my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, - $DBQuery, $PadGroup) = @_; + $DBQuery, $PadField, $PadValue) = @_; my %ValidKeys = %{$ValidKeys} if $ValidKeys; my ($FileName, $Handle, $OUT); our $LastIteration; # define output types my %LegalOutput; - @LegalOutput{('dump',,'list','pretty')} = (); + @LegalOutput{('dump','list','pretty')} = (); # bail out if format is unknown &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); @@ -427,7 +428,7 @@ sub OutputData { $Handle = $OUT; }; print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, - $Precision, $PadGroup); + $Precision, $PadField, $PadValue); $LastIteration = $Caption; }; close $OUT if ($FileTempl); @@ -443,9 +444,11 @@ sub FormatOutput { ### $Key : newsgroup, client, ... or $Month, as above ### $Value : number of postings with that attribute ### $Precision: number of digits right of decimal point (0 or 2) -### $PadGroup : padding length for key field (optional) for 'pretty' +### $PadField : padding length for key field (optional) for 'pretty' +### $PadValue : padding length for value field (optional) for 'pretty' ### OUT: $Output: formatted output - my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; + my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, + $PadValue) = @_; my ($Output); # keep last caption in mind our ($LastIteration); @@ -462,8 +465,13 @@ sub FormatOutput { # output as a table $Output = sprintf ("# ----- %s:\n",$Caption) if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); - $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : - "%s %.*f\n",$Key,$Precision,$Value); + # increase $PadValue for numbers with decimal point + $PadValue += $Precision+1 if $Precision; + # add padding if $PadField is set; $PadValue HAS to be set then + $Output .= sprintf ($PadField ? + sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : + "%s%s %.*f\n",$Key,$Comments ? ':' : '', + $Precision,$Value); }; return $Output; }; @@ -485,26 +493,30 @@ sub SQLHierarchies { ################################################################################ sub GetMaxLength { ################################################################################ -### get length of longest field in future query result -### IN : $DBHandle : database handel +### get length of longest fields in future query result +### IN : $DBHandle : database handle ### $Table : table to query -### $Field : field to check +### $Field : field (key!, i.e. month, newsgroup, ...) to check +### $Value : field (value!, i.e. postings) to check ### $WhereClause : WHERE clause ### $HavingClause: HAVING clause ### @BindVars : bind variables for WHERE clause -### OUT: $Length: length of longest instnace of $Field - my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; - my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". - "FROM %s %s %s",$Field,$Table, - $WhereClause,$HavingClause ? +### OUT: $FieldLength : length of longest instance of $Field +### $ValueLength : length of longest instance of $Value + my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; + my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". + "MAX(%s) ". + "FROM %s %s %s",$Field,,$Value, + $Table,$WhereClause,$HavingClause ? 'GROUP BY newsgroup' . $HavingClause . ' ORDER BY LENGTH(newsgroup) '. 'DESC LIMIT 1': '')); $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". "for '%s' from table '%s': ". "$DBI::errstr",$Field,$Table)); - my ($Length) = $DBQuery->fetchrow_array; - return $Length; + my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; + my $ValueLength = length($ValueMax) if ($ValueMax); + return ($FieldLength,$ValueLength); }; ################################################################################ diff --git a/groupstats.pl b/groupstats.pl index 14d19c6..ea1148f 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -154,9 +154,10 @@ if ($OptReportType and $OptReportType ne 'default') { ### get length of longest newsgroup name delivered by query ### for formatting purposes my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; -my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, - $Field,$SQLWhereClause,$SQLHavingClause, - @SQLBindNewsgroups); +my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, + $Field,'postings',$SQLWhereClause, + $SQLHavingClause, + @SQLBindNewsgroups); ### build and execute SQL query my ($DBQuery); @@ -241,7 +242,7 @@ if ($OptCaptions && $OptComments) { # output data &OutputData($OptFormat,$OptComments,$GroupBy,$Precision, $OptCheckgroupsFile ? $ValidGroups : '', - $OptFileTemplate,$DBQuery,$MaxLength); + $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength); ### close handles $DBHandle->disconnect; From 3f817eb4289da08fff2aab19db97a82d977b8e3e Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 2 Sep 2013 11:15:12 +0200 Subject: [PATCH 22/26] Change handling of warnings. Replace 'perl -W' by 'use warnings;'. The latter is preferred, and '-W' (instead of '-w') was causing problems with warnings in DB::mysql::GetInfo.pm. Signed-off-by: Thomas Hochstein --- feedlog.pl | 3 ++- gatherstats.pl | 3 ++- groupstats.pl | 3 ++- install/install.pl | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/feedlog.pl b/feedlog.pl index 636de83..80dbbe0 100755 --- a/feedlog.pl +++ b/feedlog.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -W +#! /usr/bin/perl # # feedlog.pl # @@ -18,6 +18,7 @@ BEGIN { push(@INC, dirname($0)); } use strict; +use warnings; use NewsStats; diff --git a/gatherstats.pl b/gatherstats.pl index 4c539f1..160c115 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -W +#! /usr/bin/perl # # gatherstats.pl # @@ -18,6 +18,7 @@ BEGIN { push(@INC, dirname($0)); } use strict; +use warnings; use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList); diff --git a/groupstats.pl b/groupstats.pl index 14d19c6..f0e4278 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -W +#! /usr/bin/perl # # groupstats.pl # @@ -18,6 +18,7 @@ BEGIN { push(@INC, dirname($0)); } use strict; +use warnings; use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); diff --git a/install/install.pl b/install/install.pl index 4069bf5..59920fa 100755 --- a/install/install.pl +++ b/install/install.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -W +#! /usr/bin/perl # # install.pl # @@ -18,6 +18,7 @@ BEGIN { push(@INC, dirname($0).'/..'); } use strict; +use warnings; use NewsStats qw(:DEFAULT); From 6e6c520f943284696f75ad24d4f87f77830a0ba9 Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Tue, 28 Sep 2010 22:15:08 +0200 Subject: [PATCH 23/26] README: improve phrasing. Signed-off-by: Thomas Hochstein --- doc/README | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/README b/doc/README index 440cd11..150077f 100644 --- a/doc/README +++ b/doc/README @@ -12,7 +12,7 @@ the Free Software Foundation. What's that? There's a multitude of tools for the statistical examination of - newsgroups: number of postings month or per person, longest + newsgroups: number of postings per month or per person, longest threads, and so on (see [German language] for an incomplete list). Most of them use a per- newsgroup approach while NewsStats is hierarchy oriented. @@ -27,7 +27,7 @@ Workflow That raw data will be regularly - e.g. monthly - processed to a second set of database tables each dedicated to a certain - statistical aspect, e.g. number of postings per group per month. + statistical aspect, e.g. number of postings per group and month. Several kinds of reports can then be generated from those result tables. @@ -35,8 +35,8 @@ Workflow Prerequisites NewsStats is written in Perl (5.8.x and above) and makes use of a - MySQL database, so you'll need Perl, some modules, mysql and, of - course, an INN. + MySQL database, so you will need Perl, some modules, mysql and, of + course, INN. * Perl 5.8.x with standard modules - Cwd @@ -62,12 +62,12 @@ Getting Started table. See the feedlog.pl man page for more information. You can process that data via 'gatherstats.pl'; currently only the - tabulation of postings per group per month is supported. More to + tabulation of postings per group and month is supported. More to come. See the gatherstats.pl man page for more information. Report generation is handled by specialised scripts for each report type. Currently only reports on the number of postings per - group per month are supported; you can use 'groupstats.pl' for + group and month are supported; you can use 'groupstats.pl' for this. See the groupstats.pl man page for more information. Reporting Bugs @@ -95,3 +95,4 @@ Author Thomas Hochstein + From 435a99783c6f0fdf6fc0f584d6d8a9968d9bf6de Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sat, 13 Oct 2012 01:35:34 +0200 Subject: [PATCH 24/26] README: Update copyright notice. Signed-off-by: Thomas Hochstein --- doc/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/README b/doc/README index 150077f..4c88ef2 100644 --- a/doc/README +++ b/doc/README @@ -1,4 +1,4 @@ -NewsStats 0.1 (c) 2010 Thomas Hochstein +NewsStats 0.1 (c) 2010-2012 Thomas Hochstein NewsStats is a software package for gathering statistical data live from a Usenet feed and subsequent examination. From b31f78506404cd5db097135a0b5ac7abe270c71f Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sat, 13 Oct 2012 01:36:17 +0200 Subject: [PATCH 25/26] Improve INSTALL documentation. Fix wrong example for newsfeeds entry (lacking backslashes). Fix typos, update copyright, improve phrasing. Signed-off-by: Thomas Hochstein --- doc/INSTALL | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/INSTALL b/doc/INSTALL index 225fdbf..0c2790c 100644 --- a/doc/INSTALL +++ b/doc/INSTALL @@ -1,4 +1,4 @@ -NewsStats 0.1 (c) 2010 Thomas Hochstein +NewsStats 0.1 (c) 2010-2012 Thomas Hochstein NewsStats is a software package for gathering statistical data live from a Usenet feed and subsequent examination. @@ -20,7 +20,8 @@ INSTALLATION INSTRUCTIONS # tar -xzf newsstats-nn.tar.gz - Scripts in this path should be executable by the news user. + Scripts in this path - at least feedlog.pl - should be executable by the + news user. 2) Configuration @@ -80,8 +81,8 @@ INSTALLATION INSTRUCTIONS * Edit your 'newsfeeds' file and insert something like ## gather statistics for NewsStats - newsstats! - :!*,de.* + newsstats!\ + :!*,de.*\ :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl * You should only feed that hierarchy (those hierarchies ...) to @@ -109,7 +110,7 @@ INSTALLATION INSTRUCTIONS Everything should be going smoothly now. * If INN is spewing error messages to 'errlog' or reporting - continous respaws of feedlog.pl to 'news.notice', stop your feed: + continous respawns of feedlog.pl to 'news.notice', stop your feed: # ctlinnd drop 'newsstats!' From 95d9fe2cfd944f93036d3c584328a91d952c5c7c Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sun, 11 Aug 2013 22:44:08 +0200 Subject: [PATCH 26/26] Some documentation fixes and enhancments. Signed-off-by: Thomas Hochstein --- doc/INSTALL | 4 ++-- feedlog.pl | 2 +- gatherstats.pl | 8 ++++---- groupstats.pl | 11 +++++++---- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/doc/INSTALL b/doc/INSTALL index 0c2790c..3539ab2 100644 --- a/doc/INSTALL +++ b/doc/INSTALL @@ -1,7 +1,7 @@ NewsStats 0.1 (c) 2010-2012 Thomas Hochstein -NewsStats is a software package for gathering statistical data live -from a Usenet feed and subsequent examination. +NewsStats is a software package used to gather statistical information +from a live Usenet feed and for its subsequent examination. This script package is free software; you can redistribute it and/or modify it under the terms of the GNU Public License as published by diff --git a/feedlog.pl b/feedlog.pl index 636de83..0814f2c 100755 --- a/feedlog.pl +++ b/feedlog.pl @@ -200,7 +200,7 @@ Suppress logging to syslog. =head1 INSTALLATION -See L +See L. =head1 EXAMPLES diff --git a/gatherstats.pl b/gatherstats.pl index 4c539f1..64ea87b 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -205,7 +205,7 @@ gatherstats - process statistical data from a raw source =head1 SYNOPSIS -B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I] [B<-c> I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] =head1 REQUIREMENTS @@ -292,7 +292,6 @@ Set processing period to a single month in YYYY-MM format or to a time period between two month in YYYY-MM:YYYY-MM format (two month, separated by a colon). - =item B<-s>, B<--stats> I Set processing type to one of I and I. Defaults to all @@ -306,8 +305,9 @@ one group on each line and ignoring everything after the first whitespace (so you can use a file in checkgroups format or (part of) your INN active file). -The filename is taken from I, amended by each B<-- -month> B is processing, so that +The filename is taken from I, amended by each +B<--month> B is processing in the form of I, +so that gatherstats -m 2010-01:2010-12 -c checkgroups diff --git a/groupstats.pl b/groupstats.pl index 14d19c6..efd34ef 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -372,6 +372,9 @@ Restrict output to those newgroups present in a file in checkgroups format (one newgroup name per line; everything after the first whitespace on each line is ignored). All other newsgroups will be removed from output. +Contrary to B, I is not a template, but refers to +a single file in checkgroups format. + =item B<-r>, B<--report> I Choose the report type: I, I or I @@ -429,8 +432,8 @@ you'll get the following result: de.comp.datenbanken.misc has not been considered even though it has 38 postings in total, because it has less than 25 postings in every single -month. If you want to list all newsgroups with more than 25 postings U, you'll have to set the boundary type to I, see below. +month. If you want to list all newsgroups with more than 25 postings +I, you'll have to set the boundary type to I, see below. A boundary type of I will show only those newsgroups - at all - that satisfy the boundaries in each and every single month. With the above @@ -442,10 +445,10 @@ you'll get this result: de.comp.datenbanken.ms-access 293 de.comp.datenbanken.mysql has not been considered because it had less than -25 postings in 2012-02. +25 postings in 2012-02 (only). You can use that to get a list of newsgroups that have more (or less) then -x postings during the whole reporting period. +x postings in every month during the whole reporting period. A boundary type of I will show only those newsgroups - at all -that satisfy the boundaries on average. With the above list of newsgroups and