newsstats/NewsStats.pm
Thomas Hochstein d3b6810d3d Fix some typos.
groupsstats.pl: Fix typo in lead-in comment.
groupsstats.pl/NewsStats.pm: Fix typo concerning
GetMaxLength - it isn't 'GetMaxLenght' ...

Signed-off-by: Thomas Hochstein <thh@inter.net>
2010-11-01 20:28:31 +01:00

455 lines
17 KiB
Perl
Raw Blame History

# NewsStats.pm
#
# Library functions for the NewsStats package.
#
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
#
# This module can be redistributed and/or modified under the same terms under
# which Perl itself is published.
package NewsStats;
use strict;
use warnings;
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(
$MySelf
$MyVersion
$PackageVersion
ReadOptions
ReadConfig
OverrideConfig
InitDB
);
@EXPORT_OK = qw(
GetTimePeriod
LastMonth
CheckMonth
SplitPeriod
ListMonth
ListNewsgroups
ReadGroupList
OutputData
FormatOutput
SQLHierarchies
SQLGroupList
GetMaxLength
);
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)],
Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]);
$VERSION = '0.01';
our $PackageVersion = '0.01';
use Data::Dumper;
use File::Basename;
use Getopt::Std;
use Config::Auto;
use DBI;
#####-------------------------------- Vars --------------------------------#####
our $MySelf = fileparse($0, '.pl');
our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)";
#####------------------------------- Basics -------------------------------#####
################################################################################
sub ReadOptions {
################################################################################
### read commandline options and act on standard options -h and -V
### IN : $Params: list of legal commandline paramaters (without -h and -V)
### OUT: a hash containing the commandline options
$Getopt::Std::STANDARD_HELP_VERSION = 1;
my ($Params) = @_;
my %Options;
getopts('Vh'.$Params, \%Options);
# -V: display version
&ShowVersion if ($Options{'V'});
# -h: feed myself to perldoc
&ShowPOD if ($Options{'h'});
return %Options;
};
################################################################################
################################################################################
sub ShowVersion {
################################################################################
### display version and exit
print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n";
print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n";
exit(100);
};
################################################################################
################################################################################
sub ShowPOD {
################################################################################
### feed myself to perldoc and exit
exec('perldoc', $0);
exit(100);
};
################################################################################
################################################################################
sub ReadConfig {
################################################################################
### read config via Config::Auto
### IN : $ConfFile: config filename
### OUT: reference to a hash containing the configuration
my ($ConfFile) = @_;
return Config::Auto::parse($ConfFile, format => 'equal');
};
################################################################################
################################################################################
sub OverrideConfig {
################################################################################
### override configuration values
### IN : $ConfigR : reference to configuration hash
### $OverrideR: reference to a hash containing overrides
my ($ConfigR,$OverrideR) = @_;
my %Override = %$OverrideR;
# Config hash empty?
warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1);
# return if no overrides
return if (keys %Override < 1 or keys %$ConfigR < 1);
foreach my $Key (keys %Override) {
$$ConfigR{$Key} = $Override{$Key};
};
};
################################################################################
################################################################################
sub InitDB {
################################################################################
### initialise database connection
### IN : $ConfigR: reference to configuration hash
### $Die : if TRUE, die if connection fails
### OUT: DBHandle
my ($ConfigR,$Die) = @_;
my %Conf = %$ConfigR;
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 });
if (!$DBHandle) {
die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die);
warn("$MySelf: W: $DBI::errstr\n");
};
return $DBHandle;
};
################################################################################
#####------------------------------ GetStats ------------------------------#####
################################################################################
sub ListNewsgroups {
################################################################################
### explode a (scalar) list of newsgroup names to a list of newsgroup and
### hierarchy names where every newsgroup and hierarchy appears only once:
### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
### $TLH : top level hierarchy (all other newsgroups are ignored)
### $ValidGroupsR: reference to a hash containing all valid newsgroups
### as keys
### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
my %Newsgroups;
chomp($Newsgroups);
# remove whitespace from contents of Newsgroups:
$Newsgroups =~ s/\s//;
# call &HierarchyCount for each newsgroup in $Newsgroups:
for (split /,/, $Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH
next if($TLH and !/^$TLH/);
# don't count invalid newsgroups
if(%ValidGroups and !defined($ValidGroups{$_})) {
warn (sprintf("DROPPED: %s\n",$_));
next;
}
# add original newsgroup to %Newsgroups
$Newsgroups{$_} = 1;
# add all hierarchy elements to %Newsgroups, amended by '.ALL',
# i.e. de.alt.ALL and de.ALL
foreach (ParseHierarchies($_)) {
$Newsgroups{$_.'.ALL'} = 1;
}
};
return %Newsgroups;
};
################################################################################
sub ParseHierarchies {
################################################################################
### return a list of all hierarchy levels a newsgroup belongs to
### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
### IN : $Newsgroup : a newsgroup name
### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
my ($Newsgroup) = @_;
my @Hierarchies;
# strip trailing dots
$Newsgroup =~ s/(.+)\.+$/$1/;
# butcher newsgroup name by "." and add each hierarchy to @Hierarchies
# i.e. de.alt.test: "de.alt" and "de"
while ($Newsgroup =~ /\./) {
$Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
push @Hierarchies, $Newsgroup;
};
return @Hierarchies;
};
################################################################################
sub ReadGroupList {
################################################################################
### read a list of valid newsgroups from file (each group on one line,
### ignoring everything after the first whitespace and so accepting files
### in checkgroups format as well as (parts of) an INN active file)
### IN : $Filename : file to read
### OUT: \%ValidGroups: hash containing all valid newsgroups
my ($Filename) = @_;
my %ValidGroups;
open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n";
while (<$LIST>) {
s/^(\S+).*$/$1/;
chomp;
$ValidGroups{$_} = '1';
};
close $LIST;
return \%ValidGroups;
};
################################################################################
#####----------------------------- TimePeriods ----------------------------#####
################################################################################
sub GetTimePeriod {
################################################################################
### get a time period to act on, in order of preference: by default the
### last month; or a month submitted by -m YYYY-MM; or a time period submitted
### by -p YYYY-MM:YYYY-MM
### IN : $Month,$Period: contents of -m and -p
### OUT: $StartMonth, $EndMonth (identical if period is just one month)
my ($Month,$Period) = @_;
# exit if -m is set and not like YYYY-MM
die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month);
# warn if -m and -p is set
warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period);
# default: set -m to last month
$Month = &LastMonth if (!defined($Month) and !defined($Period));
# set $StartMonth, $EndMonth
my ($StartMonth, $EndMonth);
if ($Period) {
# -p: get date range
($StartMonth, $EndMonth) = &SplitPeriod($Period);
die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth);
} else {
# set $StartMonth = $EndMonth = $Month if -p is not set
$StartMonth = $EndMonth = $Month;
};
return ($StartMonth, $EndMonth);
};
################################################################################
sub LastMonth {
################################################################################
### get last month from todays date in YYYY-MM format
### OUT: last month as YYYY-MM
# get today's date
my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
# $Month is already defined from 0 to 11, so no need to decrease it by 1
$Year += 1900;
if ($Month < 1) {
$Month = 12;
$Year--;
};
# return last month
return sprintf('%4d-%02d',$Year,$Month);
};
################################################################################
sub CheckMonth {
################################################################################
### check if input is a valid month in YYYY-MM form
### IN : $Month: month
### OUT: TRUE / FALSE
my ($Month) = @_;
return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/);
return 1;
};
################################################################################
sub SplitPeriod {
################################################################################
### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
### IN : $Period: time period
### OUT: $StartMonth, <20>$EndMonth
my ($Period) = @_;
return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/;
my ($StartMonth, $EndMonth) = split /:/, $Period;
# switch parameters as necessary
if ($EndMonth gt $StartMonth) {
return ($StartMonth, $EndMonth);
} else {
return ($EndMonth, $StartMonth);
};
};
################################################################################
sub ListMonth {
################################################################################
### return a list of months (YYYY-MM) between start and end month
### IN : $StartMonth, $EndMonth
### OUT: @Months: array containing all months from $StartMonth to $EndMonth
my ($StartMonth, $EndMonth) = @_;
return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/);
# return if $StartMonth = $EndMonth
return ($StartMonth) if ($StartMonth eq $EndMonth);
# set $Year, $Month from $StartMonth
my ($Year, $Month) = split /-/, $StartMonth;
# define @Months
my (@Months);
until ("$Year-$Month" gt $EndMonth) {
push @Months, "$Year-$Month";
$Month = "$Month"; # force string context
$Month++;
if ($Month > 12) {
$Month = '01';
$Year++;
};
};
return @Months;
};
#####---------------------------- OutputFormats ---------------------------#####
################################################################################
sub OutputData {
################################################################################
### read database query results from DBHandle and print results with formatting
### IN : $Format : format specifier
### $FileName: file name template (-f): filename-YYYY-MM
### $DBQuery : database query handle with executed query,
### containing $Month, $Key, $Value
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
my ($Format, $FileName, $DBQuery, $PadGroup) = @_;
my ($Handle, $OUT);
our $LastIteration;
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
# set output file handle
if (!$FileName) {
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
} elsif (!defined($LastIteration) or $LastIteration ne $Month) {
close $OUT if ($LastIteration);
open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month);
$Handle = $OUT;
};
print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
$LastIteration = $Month;
};
close $OUT if ($FileName);
};
################################################################################
sub FormatOutput {
################################################################################
### format information for output according to format specifier
### IN : $Format : format specifier
### $Month : month (as YYYY-MM)
### $Key : newsgroup, client, ...
### $Value : number of postings with that attribute
### $PadGroup: padding length for key field (optional) for 'pretty'
### OUT: $Output: formatted output
my ($Format, $Month, $Key, $Value, $PadGroup) = @_;
# define output types
my %LegalOutput;
@LegalOutput{('dump','dumpgroup','list','pretty')} = ();
# bail out if format is unknown
die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format});
my ($Output);
# keep last month in mind
our ($LastIteration);
if ($Format eq 'dump') {
# output as dump (ng nnnnn)
$Output = sprintf ("%s %u\n",$Key,$Value);
} elsif ($Format eq 'dumpgroup') {
# output as dump (YYYY-NN: nnnnn)
$Output = sprintf ("%s: %5u\n",$Month,$Value);
} elsif ($Format eq 'list') {
# output as list (YYYY-NN: ng nnnnn)
$Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value);
} elsif ($Format eq 'pretty') {
# output as table
$Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration);
$LastIteration = $Month;
$Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value);
};
return $Output;
};
#####------------------------- QueryModifications -------------------------#####
################################################################################
sub SQLHierarchies {
################################################################################
### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
### true, accordingly)
### IN : $ShowHierarchies: boolean value
### OUT: SQL code
my ($ShowHierarchies) = @_;
return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'";
};
################################################################################
sub GetMaxLength {
################################################################################
### get length of longest field in future query result
### IN : $DBHandle : database handel
### $Table : table to query
### $Field : field to check
### $WhereClause: WHERE clause
### @BindVars : bind variables for WHERE clause
### OUT: $Length: length of longest instnace of $Field
my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_;
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause));
$DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table);
my ($Length) = $DBQuery->fetchrow_array;
return $Length;
};
################################################################################
sub SQLGroupList {
################################################################################
### explode list of newsgroups separated by : (with wildcards) to a SQL WHERE
### clause
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
### OUT: SQL code, list of newsgroups
my ($Newsgroups) = @_;
$Newsgroups =~ s/\*/%/g;
return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/;
my $SQL = '(';
my @GroupList = split /:/, $Newsgroups;
foreach (@GroupList) {
$SQL .= ' OR ' if $SQL gt '(';
$SQL .= 'newsgroup LIKE ?';
};
$SQL .= ')';
return ($SQL,@GroupList);
};
#####------------------------------- done ---------------------------------#####
1;