84e9923abe
* thh-parsedb: Add some input validation. Add documentation to parsedb.pl. Handle more than one entitiy in From: etc. Let gatherstats read its data from DBTableParse. Add decoding and parsing of From: etc. Create a database table with parsed raw data. Make GetTimePeriod() and others accept days. # Conflicts: # bin/gatherstats.pl
809 lines
30 KiB
Perl
809 lines
30 KiB
Perl
# NewsStats.pm
|
|
#
|
|
# Library functions for the NewsStats package.
|
|
#
|
|
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
|
#
|
|
# This module can be redistributed and/or modified under the same terms under
|
|
# which Perl itself is published.
|
|
|
|
package NewsStats;
|
|
|
|
use strict;
|
|
use warnings;
|
|
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
|
|
|
|
require Exporter;
|
|
@ISA = qw(Exporter);
|
|
@EXPORT = qw(
|
|
$MyVersion
|
|
$PackageVersion
|
|
$FullPath
|
|
$HomePath
|
|
ShowVersion
|
|
ShowPOD
|
|
ReadConfig
|
|
OverrideConfig
|
|
InitDB
|
|
Bleat
|
|
);
|
|
@EXPORT_OK = qw(
|
|
GetTimePeriod
|
|
LastMonth
|
|
SplitPeriod
|
|
ListMonth
|
|
ListNewsgroups
|
|
ParseHierarchies
|
|
ReadGroupList
|
|
OutputData
|
|
FormatOutput
|
|
SQLHierarchies
|
|
SQLSortOrder
|
|
SQLGroupList
|
|
SQLSetBounds
|
|
SQLBuildClause
|
|
GetMaxLength
|
|
);
|
|
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
|
|
ListMonth)],
|
|
Output => [qw(OutputData FormatOutput)],
|
|
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
|
|
SQLSetBounds SQLBuildClause GetMaxLength)]);
|
|
$VERSION = '0.02';
|
|
our $PackageVersion = '0.02';
|
|
|
|
use Data::Dumper;
|
|
use File::Basename;
|
|
use Cwd qw(realpath);
|
|
|
|
use Config::Auto;
|
|
use DBI;
|
|
|
|
#####-------------------------------- Vars --------------------------------#####
|
|
|
|
# save $0 in $FullPath
|
|
our $FullPath = $0;
|
|
# strip filename and /bin or /install directory to create the $HomePath
|
|
our $HomePath = dirname(realpath($0));
|
|
$HomePath =~ s/\/(bin|install)//;
|
|
# trim $0
|
|
$0 =~ s%.*/%%;
|
|
# set version string
|
|
our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
|
|
|
|
#####------------------------------- Basics -------------------------------#####
|
|
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub ShowVersion {
|
|
################################################################################
|
|
### display version and exit
|
|
print "NewsStats v$PackageVersion\n$MyVersion\n";
|
|
print "Copyright (c) 2010-2013 Thomas Hochstein <thh\@inter.net>\n";
|
|
print "This program is free software; you may redistribute it ".
|
|
"and/or modify it under the same terms as Perl itself.\n";
|
|
exit(100);
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub ShowPOD {
|
|
################################################################################
|
|
### feed myself to perldoc and exit
|
|
exec('perldoc', $FullPath);
|
|
exit(100);
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub ReadConfig {
|
|
################################################################################
|
|
### read config via Config::Auto
|
|
### IN : $ConfFile: config filename
|
|
### OUT: reference to a hash containing the configuration
|
|
my ($ConfFile) = @_;
|
|
# set default
|
|
$ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
|
|
# mandatory configuration options
|
|
my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
|
|
'DBTableRaw','DBTableParse','DBTableGrps');
|
|
# read config via Config::Auto
|
|
my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
|
|
my %Conf = %{$ConfR};
|
|
# check for mandatory options
|
|
foreach (@Mandatory) {
|
|
&Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_))
|
|
if (!defined($Conf{$_}));
|
|
}
|
|
# $Conf{'TLH'} is checked in gatherstats.pl
|
|
return $ConfR;
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub OverrideConfig {
|
|
################################################################################
|
|
### override configuration values
|
|
### IN : $ConfigR : reference to configuration hash
|
|
### $OverrideR: reference to a hash containing overrides
|
|
my ($ConfigR,$OverrideR) = @_;
|
|
my %Override = %$OverrideR;
|
|
# Config hash empty?
|
|
&Bleat(1,"Empty configuration hash passed to OverrideConfig()")
|
|
if ( keys %$ConfigR < 1);
|
|
# return if no overrides
|
|
return if (keys %Override < 1 or keys %$ConfigR < 1);
|
|
foreach my $Key (keys %Override) {
|
|
$$ConfigR{$Key} = $Override{$Key};
|
|
};
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub InitDB {
|
|
################################################################################
|
|
### initialise database connection
|
|
### IN : $ConfigR: reference to configuration hash
|
|
### $Die : if TRUE, die if connection fails
|
|
### OUT: DBHandle
|
|
my ($ConfigR,$Die) = @_;
|
|
my %Conf = %$ConfigR;
|
|
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
|
|
$Conf{'DBDriver'},$Conf{'DBDatabase'},
|
|
$Conf{'DBHost'}), $Conf{'DBUser'},
|
|
$Conf{'DBPw'}, { PrintError => 0 });
|
|
if (!$DBHandle) {
|
|
&Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
|
|
&Bleat(1,$DBI::errstr);
|
|
};
|
|
return $DBHandle;
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub Bleat {
|
|
################################################################################
|
|
### print warning or error messages and terminate in case of error
|
|
### IN : $Level : 1 = warning, 2 = error
|
|
### $Message: warning or error message
|
|
my ($Level,$Message) = @_;
|
|
if ($Level == 1) {
|
|
warn "$0 W: $Message\n"
|
|
} elsif ($Level == 2) {
|
|
die "$0 E: $Message\n"
|
|
} else {
|
|
print "$0: $Message\n"
|
|
}
|
|
};
|
|
################################################################################
|
|
|
|
#####------------------------------ GetStats ------------------------------#####
|
|
|
|
################################################################################
|
|
sub ListNewsgroups {
|
|
################################################################################
|
|
### explode a (scalar) list of newsgroup names to a list of newsgroup and
|
|
### hierarchy names where every newsgroup and hierarchy appears only once:
|
|
### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
|
|
### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
|
|
### $TLH : top level hierarchy (all other newsgroups are ignored)
|
|
### $ValidGroupsR: reference to a hash containing all valid newsgroups
|
|
### as keys
|
|
### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
|
|
my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
|
|
my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
|
|
my %Newsgroups;
|
|
chomp($Newsgroups);
|
|
# remove whitespace from contents of Newsgroups:
|
|
$Newsgroups =~ s/\s//;
|
|
# call &HierarchyCount for each newsgroup in $Newsgroups:
|
|
for (split /,/, $Newsgroups) {
|
|
# don't count newsgroup/hierarchy in wrong TLH
|
|
next if($TLH and !/^$TLH/);
|
|
# don't count invalid newsgroups
|
|
if(%ValidGroups and !defined($ValidGroups{$_})) {
|
|
warn (sprintf("DROPPED: %s\n",$_));
|
|
next;
|
|
}
|
|
# add original newsgroup to %Newsgroups
|
|
$Newsgroups{$_} = 1;
|
|
# add all hierarchy elements to %Newsgroups, amended by '.ALL',
|
|
# i.e. de.alt.ALL and de.ALL
|
|
foreach (ParseHierarchies($_)) {
|
|
$Newsgroups{$_.'.ALL'} = 1;
|
|
}
|
|
};
|
|
return %Newsgroups;
|
|
};
|
|
|
|
################################################################################
|
|
sub ParseHierarchies {
|
|
################################################################################
|
|
### return a list of all hierarchy levels a newsgroup belongs to
|
|
### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
|
|
### IN : $Newsgroup : a newsgroup name
|
|
### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
|
|
my ($Newsgroup) = @_;
|
|
my @Hierarchies;
|
|
# strip trailing dots
|
|
$Newsgroup =~ s/(.+)\.+$/$1/;
|
|
# butcher newsgroup name by "." and add each hierarchy to @Hierarchies
|
|
# i.e. de.alt.test: "de.alt" and "de"
|
|
while ($Newsgroup =~ /\./) {
|
|
$Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
|
|
push @Hierarchies, $Newsgroup;
|
|
};
|
|
return @Hierarchies;
|
|
};
|
|
|
|
################################################################################
|
|
sub ReadGroupList {
|
|
################################################################################
|
|
### read a list of valid newsgroups from file (each group on one line,
|
|
### ignoring everything after the first whitespace and so accepting files
|
|
### in checkgroups format as well as (parts of) an INN active file)
|
|
### IN : $Filename : file to read
|
|
### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups
|
|
my ($Filename) = @_;
|
|
my %ValidGroups;
|
|
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
|
|
while (<$LIST>) {
|
|
s/^\s*(\S+).*$/$1/;
|
|
chomp;
|
|
next if /^$/;
|
|
$ValidGroups{$_} = '1';
|
|
};
|
|
close $LIST;
|
|
return \%ValidGroups;
|
|
};
|
|
|
|
################################################################################
|
|
|
|
#####----------------------------- TimePeriods ----------------------------#####
|
|
|
|
################################################################################
|
|
sub GetTimePeriod {
|
|
################################################################################
|
|
### get a time period to act on from --month / --day option;
|
|
### if empty, default to last month / day
|
|
### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)'
|
|
### or 'all'
|
|
### $Type : may be 'month' or 'day'
|
|
### OUT: $Verbal,$SQL: verbal description and WHERE-clause
|
|
### of the chosen time period
|
|
my ($Period,$Type) = @_;
|
|
# define result variables
|
|
my ($Verbal, $SQL);
|
|
# check $Type
|
|
$Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day'));
|
|
# define a regular expressions for a month or day
|
|
my $REPeriod = '\d{4}-\d{2}';
|
|
$REPeriod .= '-\d{2}' if ($Type eq 'day');
|
|
|
|
# default to last month / day if option is not set
|
|
if(!$Period) {
|
|
$Period = &LastMonthDay($Type);
|
|
}
|
|
|
|
# check for valid input
|
|
if ($Period =~ /^$REPeriod$/) {
|
|
# single month/day [YYYY-MM(-DD)]
|
|
($Period) = &CheckPeriod($Type,$Period);
|
|
$Verbal = $Period;
|
|
$SQL = sprintf("%s = '%s'",$Type,$Period);
|
|
} elsif ($Period =~ /^$REPeriod:$REPeriod$/) {
|
|
# time period [YYYY-MM(-DD):YYYY-MM(-DD)]
|
|
$Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type));
|
|
$SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type,
|
|
&SplitPeriod($Period,$Type));
|
|
} elsif ($Period =~ /^all$/i) {
|
|
# special case: ALL
|
|
$Verbal = 'all time';
|
|
$SQL = '';
|
|
} else {
|
|
# invalid input
|
|
return (undef,undef);
|
|
}
|
|
|
|
return ($Verbal,$SQL);
|
|
};
|
|
|
|
################################################################################
|
|
sub LastMonthDay {
|
|
################################################################################
|
|
### get last month/day from todays date in YYYY-MM format
|
|
### IN : $Type : may be 'month' or 'day'
|
|
### OUT: last month/day as YYYY-MM(-DD)
|
|
my ($Type) = @_;
|
|
my ($Day,$Month,$Year);
|
|
if ($Type eq 'day') {
|
|
# get yesterdays's date
|
|
(undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400);
|
|
# $Month is defined from 0 to 11, so add 1
|
|
$Month++;
|
|
} else {
|
|
# get today's date (month and year)
|
|
(undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
|
|
# $Month is already defined from 0 to 11, so no need to decrease it by 1
|
|
if ($Month < 1) {
|
|
$Month = 12;
|
|
$Year--;
|
|
};
|
|
}
|
|
$Year += 1900;
|
|
# return last month / day
|
|
if ($Type eq 'day') {
|
|
return sprintf('%4d-%02d-%02d',$Year,$Month,$Day);
|
|
} else {
|
|
return sprintf('%4d-%02d',$Year,$Month);
|
|
}
|
|
};
|
|
|
|
################################################################################
|
|
sub CheckPeriod {
|
|
################################################################################
|
|
### check if input (in YYYY-MM(-DD) form) is a valid month / day;
|
|
### otherwise, fix it
|
|
### IN : $Type : may be 'month' or 'day'
|
|
### @Period: array of month/day
|
|
### OUT: @Period: a valid month/day
|
|
my ($Type,@Period) = @_;
|
|
foreach my $Period (@Period) {
|
|
my ($OldPeriod) = $Period;
|
|
my ($CalMonth,$CalDay);
|
|
$Period .= '-01' if ($Type eq 'month');
|
|
$CalDay = substr ($Period, -2);
|
|
$CalMonth = substr ($Period, 5, 2);
|
|
if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) {
|
|
$CalMonth = '12' if $CalMonth > 12;
|
|
$CalMonth = '01' if $CalMonth < 1;
|
|
substr($Period, 5, 2) = $CalMonth;
|
|
$CalDay = '01' if $CalDay < 1;
|
|
$CalDay = '31' if $CalDay > 31;
|
|
# FIXME! - month with less than 31 days ...
|
|
substr($Period, -2) = $CalDay;
|
|
&Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.",
|
|
$OldPeriod,$Period));
|
|
}
|
|
$Period = substr($Period,0,7) if ($Type eq 'month');
|
|
}
|
|
return @Period;
|
|
};
|
|
|
|
################################################################################
|
|
sub SplitPeriod {
|
|
################################################################################
|
|
### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end
|
|
### IN : $Period: time period
|
|
### $Type : may be 'month' or 'day'
|
|
### OUT: $StartTime, $EndTime
|
|
my ($Period,$Type) = @_;
|
|
my ($StartTime, $EndTime) = split /:/, $Period;
|
|
($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime);
|
|
# switch parameters as necessary
|
|
if ($EndTime gt $StartTime) {
|
|
return ($StartTime, $EndTime);
|
|
} else {
|
|
return ($EndTime, $StartTime);
|
|
};
|
|
};
|
|
|
|
################################################################################
|
|
sub ListMonth {
|
|
################################################################################
|
|
### return a list of months (YYYY-MM) between start and end month
|
|
### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
|
|
### OUT: @Months: array containing all months from $MonthExpression enumerated
|
|
my ($MonthExpression )= @_;
|
|
# return if single month
|
|
return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
|
|
# parse $MonthExpression
|
|
my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
|
|
# set $Year, $Month from $StartMonth
|
|
my ($Year, $Month) = split /-/, $StartMonth;
|
|
# define @Months
|
|
my (@Months);
|
|
until ("$Year-$Month" gt $EndMonth) {
|
|
push @Months, "$Year-$Month";
|
|
$Month = "$Month"; # force string context
|
|
$Month++;
|
|
if ($Month > 12) {
|
|
$Month = '01';
|
|
$Year++;
|
|
};
|
|
};
|
|
return @Months;
|
|
};
|
|
|
|
#####---------------------------- OutputFormats ---------------------------#####
|
|
|
|
################################################################################
|
|
sub OutputData {
|
|
################################################################################
|
|
### read database query results from DBHandle and print results with formatting
|
|
### IN : $Format : format specifier
|
|
### $Comments : print or suppress all comments for machine-readable output
|
|
### $GroupBy : primary sorting order (month or key)
|
|
### $Precision: number of digits right of decimal point (0 or 2)
|
|
### $ValidKeys: reference to a hash containing all valid keys
|
|
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
|
|
### $DBQuery : database query handle with executed query,
|
|
### containing $Month, $Key, $Value
|
|
### $PadField : padding length for key field (optional) for 'pretty'
|
|
### $PadValue : padding length for value field (optional) for 'pretty'
|
|
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
|
|
$DBQuery, $PadField, $PadValue) = @_;
|
|
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
|
|
my ($FileName, $Handle, $OUT);
|
|
our $LastIteration;
|
|
|
|
# define output types
|
|
my %LegalOutput;
|
|
@LegalOutput{('dump','list','pretty')} = ();
|
|
# bail out if format is unknown
|
|
&Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
|
|
|
|
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
|
|
# don't display invalid keys
|
|
if(%ValidKeys and !defined($ValidKeys{$Key})) {
|
|
# FIXME
|
|
# &Bleat(1,sprintf("DROPPED: %s",$Key));
|
|
next;
|
|
};
|
|
# care for correct sorting order and abstract from month and keys:
|
|
# $Caption will be $Month or $Key, according to sorting order,
|
|
# and $Key will be $Key or $Month, respectively
|
|
my $Caption;
|
|
if ($GroupBy eq 'key') {
|
|
$Caption = $Key;
|
|
$Key = $Month;
|
|
} else {
|
|
$Caption = $Month;
|
|
}
|
|
# set output file handle
|
|
if (!$FileTempl) {
|
|
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
|
|
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
|
|
close $OUT if ($LastIteration);
|
|
# safeguards for filename creation:
|
|
# replace potential problem characters with '_'
|
|
$FileName = sprintf('%s-%s',$FileTempl,$Caption);
|
|
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
|
open ($OUT,">$FileName")
|
|
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
|
|
$FileName));
|
|
$Handle = $OUT;
|
|
};
|
|
print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
|
|
$Precision, $PadField, $PadValue);
|
|
$LastIteration = $Caption;
|
|
};
|
|
close $OUT if ($FileTempl);
|
|
};
|
|
|
|
################################################################################
|
|
sub FormatOutput {
|
|
################################################################################
|
|
### format information for output according to format specifier
|
|
### IN : $Format : format specifier
|
|
### $Comments : print or suppress all comments for machine-readable output
|
|
### $Caption : month (as YYYY-MM) or $Key, according to sorting order
|
|
### $Key : newsgroup, client, ... or $Month, as above
|
|
### $Value : number of postings with that attribute
|
|
### $Precision: number of digits right of decimal point (0 or 2)
|
|
### $PadField : padding length for key field (optional) for 'pretty'
|
|
### $PadValue : padding length for value field (optional) for 'pretty'
|
|
### OUT: $Output: formatted output
|
|
my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
|
|
$PadValue) = @_;
|
|
my ($Output);
|
|
# keep last caption in mind
|
|
our ($LastIteration);
|
|
# create one line of output
|
|
if ($Format eq 'dump') {
|
|
# output as dump (key value)
|
|
$Output = sprintf ("# %s:\n",$Caption)
|
|
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
|
$Output .= sprintf ("%s %u\n",$Key,$Value);
|
|
} elsif ($Format eq 'list') {
|
|
# output as list (caption key value)
|
|
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
|
} elsif ($Format eq 'pretty') {
|
|
# output as a table
|
|
$Output = sprintf ("# ----- %s:\n",$Caption)
|
|
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
|
# increase $PadValue for numbers with decimal point
|
|
$PadValue += $Precision+1 if $Precision;
|
|
# add padding if $PadField is set; $PadValue HAS to be set then
|
|
$Output .= sprintf ($PadField ?
|
|
sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) :
|
|
"%s%s %.*f\n",$Key,$Comments ? ':' : '',
|
|
$Precision,$Value);
|
|
};
|
|
return $Output;
|
|
};
|
|
|
|
#####------------------------- QueryModifications -------------------------#####
|
|
|
|
################################################################################
|
|
sub SQLHierarchies {
|
|
################################################################################
|
|
### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
|
|
### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
|
|
### true, accordingly)
|
|
### IN : $ShowHierarchies: boolean value
|
|
### OUT: SQL code
|
|
my ($ShowHierarchies) = @_;
|
|
return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
|
|
};
|
|
|
|
################################################################################
|
|
sub GetMaxLength {
|
|
################################################################################
|
|
### get length of longest fields in future query result
|
|
### IN : $DBHandle : database handle
|
|
### $Table : table to query
|
|
### $Field : field (key!, i.e. month, newsgroup, ...) to check
|
|
### $Value : field (value!, i.e. postings) to check
|
|
### $WhereClause : WHERE clause
|
|
### $HavingClause: HAVING clause
|
|
### @BindVars : bind variables for WHERE clause
|
|
### OUT: $FieldLength : length of longest instance of $Field
|
|
### $ValueLength : length of longest instance of $Value
|
|
my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_;
|
|
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),".
|
|
"MAX(%s) ".
|
|
"FROM %s %s %s",$Field,,$Value,
|
|
$Table,$WhereClause,$HavingClause ?
|
|
'GROUP BY newsgroup' . $HavingClause .
|
|
' ORDER BY LENGTH(newsgroup) '.
|
|
'DESC LIMIT 1': ''));
|
|
$DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
|
|
"for '%s' from table '%s': ".
|
|
"$DBI::errstr",$Field,$Table));
|
|
my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array;
|
|
my $ValueLength = length($ValueMax) if ($ValueMax);
|
|
return ($FieldLength,$ValueLength);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLSortOrder {
|
|
################################################################################
|
|
### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
|
|
### $OptOrderBy (secondary sorting), both ascending or descending;
|
|
### descending sorting order is done by adding '-desc'
|
|
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
|
|
### $OrderBy: secondary sort by month/newsgroups (default)
|
|
### or number of 'postings'
|
|
### OUT: a SQL ORDER BY clause
|
|
my ($GroupBy,$OrderBy) = @_;
|
|
my ($GroupSort,$OrderSort) = ('','');
|
|
# $GroupBy (primary sorting)
|
|
if (!$GroupBy) {
|
|
$GroupBy = 'month';
|
|
} else {
|
|
($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
|
|
if ($GroupBy =~ /group/i) {
|
|
$GroupBy = 'newsgroup';
|
|
} else {
|
|
$GroupBy = 'month';
|
|
}
|
|
}
|
|
my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
|
|
# $OrderBy (secondary sorting)
|
|
if (!$OrderBy) {
|
|
$OrderBy = $Secondary;
|
|
} else {
|
|
($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
|
|
if ($OrderBy =~ /posting/i) {
|
|
$OrderBy = "postings $OrderSort, $Secondary";
|
|
} else {
|
|
$OrderBy = "$Secondary $OrderSort";
|
|
}
|
|
}
|
|
return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLParseOrder {
|
|
################################################################################
|
|
### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
|
|
### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
|
|
### IN : $OrderOption: order option (see above)
|
|
### OUT: parameter to sort by,
|
|
### sort order ('DESC' or nothing, meaning 'ASC')
|
|
my ($OrderOption) = @_;
|
|
my $SortOrder = '';
|
|
if ($OrderOption =~ s/-?desc$//i) {
|
|
$SortOrder = 'DESC';
|
|
} else {
|
|
$OrderOption =~ s/-?asc$//i
|
|
}
|
|
return ($OrderOption,$SortOrder);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLGroupList {
|
|
################################################################################
|
|
### explode list of newsgroups separated by : (with wildcards)
|
|
### to a SQL 'WHERE' expression
|
|
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
|
### OUT: SQL code to become part of a 'WHERE' clause,
|
|
### list of newsgroups for SQL bindings
|
|
my ($Newsgroups) = @_;
|
|
# substitute '*' wildcard with SQL wildcard character '%'
|
|
$Newsgroups =~ s/\*/%/g;
|
|
return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
|
|
# just one newsgroup?
|
|
return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
|
|
my ($SQL,@WildcardGroups,@NoWildcardGroups);
|
|
# list of newsgroups separated by ':'
|
|
my @GroupList = split /:/, $Newsgroups;
|
|
foreach (@GroupList) {
|
|
if ($_ !~ /%/) {
|
|
# add to list of newsgroup names WITHOUT wildcard
|
|
push (@NoWildcardGroups,$_);
|
|
} else {
|
|
# add to list of newsgroup names WITH wildcard
|
|
push (@WildcardGroups,$_);
|
|
# add wildcard to SQL clause
|
|
# 'OR' if SQL clause is not empty
|
|
$SQL .= ' OR ' if $SQL;
|
|
$SQL .= 'newsgroup LIKE ?'
|
|
}
|
|
};
|
|
if (scalar(@NoWildcardGroups)) {
|
|
# add 'OR' if SQL clause is not empty
|
|
$SQL .= ' OR ' if $SQL;
|
|
if (scalar(@NoWildcardGroups) < 2) {
|
|
# special case: just one newsgroup without wildcard
|
|
$SQL .= 'newsgroup = ?';
|
|
} else {
|
|
# create list of newsgroups to include: 'newsgroup IN (...)'
|
|
$SQL .= 'newsgroup IN (';
|
|
my $SQLin;
|
|
foreach (@NoWildcardGroups) {
|
|
$SQLin .= ',' if $SQLin;
|
|
$SQLin .= '?';
|
|
}
|
|
# add list to SQL clause
|
|
$SQL .= $SQLin .= ')';
|
|
}
|
|
}
|
|
# add brackets '()' to SQL clause as needed (more than one wildcard group)
|
|
if (scalar(@WildcardGroups)) {
|
|
$SQL = '(' . $SQL .')';
|
|
}
|
|
# rebuild @GroupList in (now) correct order
|
|
@GroupList = (@WildcardGroups,@NoWildcardGroups);
|
|
return ($SQL,@GroupList);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLGroupWildcard {
|
|
################################################################################
|
|
### build a valid SQL 'WHERE' expression with or without wildcards
|
|
### IN : $Newsgroup: newsgroup expression, probably with wildcard
|
|
### (group.name or group.name.%)
|
|
### OUT: SQL code to become part of a 'WHERE' clause
|
|
my ($Newsgroup) = @_;
|
|
if ($Newsgroup !~ /%/) {
|
|
return 'newsgroup = ?';
|
|
} else {
|
|
return 'newsgroup LIKE ?';
|
|
}
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLSetBounds {
|
|
################################################################################
|
|
### set upper and/or lower boundary (number of postings)
|
|
### IN : $Type: 'level', 'average', 'sum' or 'default'
|
|
### $LowBound,$UppBound: lower/upper boundary, respectively
|
|
### OUT: SQL code to become part of a WHERE or HAVING clause
|
|
my ($Type,$LowBound,$UppBound) = @_;
|
|
($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
|
|
if($LowBound and $UppBound and $LowBound > $UppBound) {
|
|
&Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
|
|
"$UppBound, exchanging boundaries.");
|
|
($LowBound,$UppBound) = ($UppBound,$LowBound);
|
|
}
|
|
# default to 'default'
|
|
my $WhereHavingFunction = 'postings';
|
|
# set $LowBound to SQL statement:
|
|
# 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
|
|
if ($Type eq 'level') {
|
|
$WhereHavingFunction = 'MIN(postings)'
|
|
} elsif ($Type eq 'average') {
|
|
$WhereHavingFunction = 'AVG(postings)'
|
|
} elsif ($Type eq 'sum') {
|
|
$WhereHavingFunction = 'SUM(postings)'
|
|
}
|
|
$LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
|
|
# set $LowBound to SQL statement:
|
|
# 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
|
|
if ($Type eq 'level') {
|
|
$WhereHavingFunction = 'MAX(postings)'
|
|
} elsif ($Type eq 'average') {
|
|
$WhereHavingFunction = 'AVG(postings)'
|
|
} elsif ($Type eq 'sum') {
|
|
$WhereHavingFunction = 'SUM(postings)'
|
|
}
|
|
$UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
|
|
return ($LowBound,$UppBound);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLCheckNumber {
|
|
################################################################################
|
|
### check if input is a valid positive integer; otherwise, make it one
|
|
### IN : @Numbers: array of parameters
|
|
### OUT: @Numbers: a valid positive integer
|
|
my (@Numbers) = @_;
|
|
foreach my $Number (@Numbers) {
|
|
if ($Number and $Number < 0) {
|
|
&Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
|
|
$Number = -$Number;
|
|
}
|
|
$Number = '' if ($Number and $Number !~ /^\d+$/);
|
|
}
|
|
return @Numbers;
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLBuildClause {
|
|
################################################################################
|
|
### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
|
|
### from multiple expressions which *may* be empty
|
|
### IN : $Type: 'where', 'having', 'group' or 'order'
|
|
### @Expressions: array of expressions
|
|
### OUT: $SQLClause: a SQL clause
|
|
my ($Type,@Expressions) = @_;
|
|
my ($SQLClause,$Separator,$Statement);
|
|
# set separator ('AND' or ',')
|
|
if ($Type eq 'where' or $Type eq 'having') {
|
|
$Separator = 'AND';
|
|
} else {
|
|
$Separator = ',';
|
|
}
|
|
# set statement
|
|
if ($Type eq 'where') {
|
|
$Statement = 'WHERE';
|
|
} elsif ($Type eq 'order') {
|
|
$Statement = 'ORDER BY';
|
|
} elsif ($Type eq 'having') {
|
|
$Statement = 'HAVING';
|
|
} else {
|
|
$Statement = 'GROUP BY';
|
|
}
|
|
# build query from expressions with separators
|
|
foreach my $Expression (@Expressions) {
|
|
if ($Expression) {
|
|
$SQLClause .= " $Separator " if ($SQLClause);
|
|
$SQLClause .= $Expression;
|
|
}
|
|
}
|
|
# add statement in front if not already present
|
|
$SQLClause = " $Statement " . $SQLClause
|
|
if ($SQLClause and $SQLClause !~ /$Statement/);
|
|
return $SQLClause;
|
|
};
|
|
|
|
#####--------------------------- Verifications ----------------------------#####
|
|
|
|
################################################################################
|
|
sub CheckValidNewsgroups {
|
|
################################################################################
|
|
### syntax check of newgroup list
|
|
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
|
### OUT: boolean
|
|
my ($Newsgroups) = @_;
|
|
my $InvalidCharRegExp = ',; ';
|
|
return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
|
|
};
|
|
|
|
|
|
#####------------------------------- done ---------------------------------#####
|
|
1;
|