6d72dad2c0
Incoming data is written to DBTableRaw without much interpretation. To allow for more and better analysis that raw data should be parsed daily and copied to another database table with separate fields for most header lines. All other scripts could use that pre-parsed data. * Add database schema to install.pl * Add DBTableParse to newsstats.conf.sample and as mandatory to NewsStats.pm * Add parsedb.pl TODO: - Documentation is only rudimentary. - From:, Sender:, Reply-To: and Subject: are not yet parsed. - gatherstats.pl does not yet use DbTableParse. Signed-off-by: Thomas Hochstein <thh@inter.net>
809 lines
30 KiB
Perl
809 lines
30 KiB
Perl
# NewsStats.pm
|
|
#
|
|
# Library functions for the NewsStats package.
|
|
#
|
|
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
|
#
|
|
# This module can be redistributed and/or modified under the same terms under
|
|
# which Perl itself is published.
|
|
|
|
package NewsStats;
|
|
|
|
use strict;
|
|
use warnings;
|
|
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
|
|
|
|
require Exporter;
|
|
@ISA = qw(Exporter);
|
|
@EXPORT = qw(
|
|
$MyVersion
|
|
$PackageVersion
|
|
$FullPath
|
|
$HomePath
|
|
ShowVersion
|
|
ShowPOD
|
|
ReadConfig
|
|
OverrideConfig
|
|
InitDB
|
|
Bleat
|
|
);
|
|
@EXPORT_OK = qw(
|
|
GetTimePeriod
|
|
LastMonth
|
|
SplitPeriod
|
|
ListMonth
|
|
ListNewsgroups
|
|
ParseHierarchies
|
|
ReadGroupList
|
|
OutputData
|
|
FormatOutput
|
|
SQLHierarchies
|
|
SQLSortOrder
|
|
SQLGroupList
|
|
SQLSetBounds
|
|
SQLBuildClause
|
|
GetMaxLength
|
|
);
|
|
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
|
|
ListMonth)],
|
|
Output => [qw(OutputData FormatOutput)],
|
|
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
|
|
SQLSetBounds SQLBuildClause GetMaxLength)]);
|
|
$VERSION = '0.01';
|
|
our $PackageVersion = '0.01';
|
|
|
|
use Data::Dumper;
|
|
use File::Basename;
|
|
use Cwd qw(realpath);
|
|
|
|
use Config::Auto;
|
|
use DBI;
|
|
|
|
#####-------------------------------- Vars --------------------------------#####
|
|
|
|
# save $0 in $FullPath
|
|
our $FullPath = $0;
|
|
# strip filename and /bin or /install directory to create the $HomePath
|
|
our $HomePath = dirname(realpath($0));
|
|
$HomePath =~ s/\/(bin|install)//;
|
|
# trim $0
|
|
$0 =~ s%.*/%%;
|
|
# set version string
|
|
our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
|
|
|
|
#####------------------------------- Basics -------------------------------#####
|
|
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub ShowVersion {
|
|
################################################################################
|
|
### display version and exit
|
|
print "NewsStats v$PackageVersion\n$MyVersion\n";
|
|
print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
|
|
print "This program is free software; you may redistribute it ".
|
|
"and/or modify it under the same terms as Perl itself.\n";
|
|
exit(100);
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub ShowPOD {
|
|
################################################################################
|
|
### feed myself to perldoc and exit
|
|
exec('perldoc', $FullPath);
|
|
exit(100);
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub ReadConfig {
|
|
################################################################################
|
|
### read config via Config::Auto
|
|
### IN : $ConfFile: config filename
|
|
### OUT: reference to a hash containing the configuration
|
|
my ($ConfFile) = @_;
|
|
# set default
|
|
$ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
|
|
# mandatory configuration options
|
|
my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
|
|
'DBTableRaw','DBTableParse','DBTableGrps');
|
|
# read config via Config::Auto
|
|
my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
|
|
my %Conf = %{$ConfR};
|
|
# check for mandatory options
|
|
foreach (@Mandatory) {
|
|
&Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_))
|
|
if (!defined($Conf{$_}));
|
|
}
|
|
# $Conf{'TLH'} is checked in gatherstats.pl
|
|
return $ConfR;
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub OverrideConfig {
|
|
################################################################################
|
|
### override configuration values
|
|
### IN : $ConfigR : reference to configuration hash
|
|
### $OverrideR: reference to a hash containing overrides
|
|
my ($ConfigR,$OverrideR) = @_;
|
|
my %Override = %$OverrideR;
|
|
# Config hash empty?
|
|
&Bleat(1,"Empty configuration hash passed to OverrideConfig()")
|
|
if ( keys %$ConfigR < 1);
|
|
# return if no overrides
|
|
return if (keys %Override < 1 or keys %$ConfigR < 1);
|
|
foreach my $Key (keys %Override) {
|
|
$$ConfigR{$Key} = $Override{$Key};
|
|
};
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub InitDB {
|
|
################################################################################
|
|
### initialise database connection
|
|
### IN : $ConfigR: reference to configuration hash
|
|
### $Die : if TRUE, die if connection fails
|
|
### OUT: DBHandle
|
|
my ($ConfigR,$Die) = @_;
|
|
my %Conf = %$ConfigR;
|
|
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
|
|
$Conf{'DBDriver'},$Conf{'DBDatabase'},
|
|
$Conf{'DBHost'}), $Conf{'DBUser'},
|
|
$Conf{'DBPw'}, { PrintError => 0 });
|
|
if (!$DBHandle) {
|
|
&Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
|
|
&Bleat(1,$DBI::errstr);
|
|
};
|
|
return $DBHandle;
|
|
};
|
|
################################################################################
|
|
|
|
################################################################################
|
|
sub Bleat {
|
|
################################################################################
|
|
### print warning or error messages and terminate in case of error
|
|
### IN : $Level : 1 = warning, 2 = error
|
|
### $Message: warning or error message
|
|
my ($Level,$Message) = @_;
|
|
if ($Level == 1) {
|
|
warn "$0 W: $Message\n"
|
|
} elsif ($Level == 2) {
|
|
die "$0 E: $Message\n"
|
|
} else {
|
|
print "$0: $Message\n"
|
|
}
|
|
};
|
|
################################################################################
|
|
|
|
#####------------------------------ GetStats ------------------------------#####
|
|
|
|
################################################################################
|
|
sub ListNewsgroups {
|
|
################################################################################
|
|
### explode a (scalar) list of newsgroup names to a list of newsgroup and
|
|
### hierarchy names where every newsgroup and hierarchy appears only once:
|
|
### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
|
|
### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
|
|
### $TLH : top level hierarchy (all other newsgroups are ignored)
|
|
### $ValidGroupsR: reference to a hash containing all valid newsgroups
|
|
### as keys
|
|
### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
|
|
my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
|
|
my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
|
|
my %Newsgroups;
|
|
chomp($Newsgroups);
|
|
# remove whitespace from contents of Newsgroups:
|
|
$Newsgroups =~ s/\s//;
|
|
# call &HierarchyCount for each newsgroup in $Newsgroups:
|
|
for (split /,/, $Newsgroups) {
|
|
# don't count newsgroup/hierarchy in wrong TLH
|
|
next if($TLH and !/^$TLH/);
|
|
# don't count invalid newsgroups
|
|
if(%ValidGroups and !defined($ValidGroups{$_})) {
|
|
warn (sprintf("DROPPED: %s\n",$_));
|
|
next;
|
|
}
|
|
# add original newsgroup to %Newsgroups
|
|
$Newsgroups{$_} = 1;
|
|
# add all hierarchy elements to %Newsgroups, amended by '.ALL',
|
|
# i.e. de.alt.ALL and de.ALL
|
|
foreach (ParseHierarchies($_)) {
|
|
$Newsgroups{$_.'.ALL'} = 1;
|
|
}
|
|
};
|
|
return %Newsgroups;
|
|
};
|
|
|
|
################################################################################
|
|
sub ParseHierarchies {
|
|
################################################################################
|
|
### return a list of all hierarchy levels a newsgroup belongs to
|
|
### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
|
|
### IN : $Newsgroup : a newsgroup name
|
|
### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
|
|
my ($Newsgroup) = @_;
|
|
my @Hierarchies;
|
|
# strip trailing dots
|
|
$Newsgroup =~ s/(.+)\.+$/$1/;
|
|
# butcher newsgroup name by "." and add each hierarchy to @Hierarchies
|
|
# i.e. de.alt.test: "de.alt" and "de"
|
|
while ($Newsgroup =~ /\./) {
|
|
$Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
|
|
push @Hierarchies, $Newsgroup;
|
|
};
|
|
return @Hierarchies;
|
|
};
|
|
|
|
################################################################################
|
|
sub ReadGroupList {
|
|
################################################################################
|
|
### read a list of valid newsgroups from file (each group on one line,
|
|
### ignoring everything after the first whitespace and so accepting files
|
|
### in checkgroups format as well as (parts of) an INN active file)
|
|
### IN : $Filename : file to read
|
|
### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups
|
|
my ($Filename) = @_;
|
|
my %ValidGroups;
|
|
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
|
|
while (<$LIST>) {
|
|
s/^\s*(\S+).*$/$1/;
|
|
chomp;
|
|
next if /^$/;
|
|
$ValidGroups{$_} = '1';
|
|
};
|
|
close $LIST;
|
|
return \%ValidGroups;
|
|
};
|
|
|
|
################################################################################
|
|
|
|
#####----------------------------- TimePeriods ----------------------------#####
|
|
|
|
################################################################################
|
|
sub GetTimePeriod {
|
|
################################################################################
|
|
### get a time period to act on from --month / --day option;
|
|
### if empty, default to last month / day
|
|
### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)'
|
|
### or 'all'
|
|
### $Type : may be 'month' or 'day'
|
|
### OUT: $Verbal,$SQL: verbal description and WHERE-clause
|
|
### of the chosen time period
|
|
my ($Period,$Type) = @_;
|
|
# define result variables
|
|
my ($Verbal, $SQL);
|
|
# check $Type
|
|
$Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day'));
|
|
# define a regular expressions for a month or day
|
|
my $REPeriod = '\d{4}-\d{2}';
|
|
$REPeriod .= '-\d{2}' if ($Type eq 'day');
|
|
|
|
# default to last month / day if option is not set
|
|
if(!$Period) {
|
|
$Period = &LastMonthDay($Type);
|
|
}
|
|
|
|
# check for valid input
|
|
if ($Period =~ /^$REPeriod$/) {
|
|
# single month/day [YYYY-MM(-DD)]
|
|
($Period) = &CheckPeriod($Type,$Period);
|
|
$Verbal = $Period;
|
|
$SQL = sprintf("%s = '%s'",$Type,$Period);
|
|
} elsif ($Period =~ /^$REPeriod:$REPeriod$/) {
|
|
# time period [YYYY-MM(-DD):YYYY-MM(-DD)]
|
|
$Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type));
|
|
$SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type,
|
|
&SplitPeriod($Period,$Type));
|
|
} elsif ($Period =~ /^all$/i) {
|
|
# special case: ALL
|
|
$Verbal = 'all time';
|
|
$SQL = '';
|
|
} else {
|
|
# invalid input
|
|
return (undef,undef);
|
|
}
|
|
|
|
return ($Verbal,$SQL);
|
|
};
|
|
|
|
################################################################################
|
|
sub LastMonthDay {
|
|
################################################################################
|
|
### get last month/day from todays date in YYYY-MM format
|
|
### IN : $Type : may be 'month' or 'day'
|
|
### OUT: last month/day as YYYY-MM(-DD)
|
|
my ($Type) = @_;
|
|
my ($Day,$Month,$Year);
|
|
if ($Type eq 'day') {
|
|
# get yesterdays's date
|
|
(undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400);
|
|
# $Month is defined from 0 to 11, so add 1
|
|
$Month++;
|
|
} else {
|
|
# get today's date (month and year)
|
|
(undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
|
|
# $Month is already defined from 0 to 11, so no need to decrease it by 1
|
|
if ($Month < 1) {
|
|
$Month = 12;
|
|
$Year--;
|
|
};
|
|
}
|
|
$Year += 1900;
|
|
# return last month / day
|
|
if ($Type eq 'day') {
|
|
return sprintf('%4d-%02d-%02d',$Year,$Month,$Day);
|
|
} else {
|
|
return sprintf('%4d-%02d',$Year,$Month);
|
|
}
|
|
};
|
|
|
|
################################################################################
|
|
sub CheckPeriod {
|
|
################################################################################
|
|
### check if input (in YYYY-MM(-DD) form) is a valid month / day;
|
|
### otherwise, fix it
|
|
### IN : $Type : may be 'month' or 'day'
|
|
### @Period: array of month/day
|
|
### OUT: @Period: a valid month/day
|
|
my ($Type,@Period) = @_;
|
|
foreach my $Period (@Period) {
|
|
my ($OldPeriod) = $Period;
|
|
my ($CalMonth,$CalDay);
|
|
$Period .= '-01' if ($Type eq 'month');
|
|
$CalDay = substr ($Period, -2);
|
|
$CalMonth = substr ($Period, 5, 2);
|
|
if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) {
|
|
$CalMonth = '12' if $CalMonth > 12;
|
|
$CalMonth = '01' if $CalMonth < 1;
|
|
substr($Period, 5, 2) = $CalMonth;
|
|
$CalDay = '01' if $CalDay < 1;
|
|
$CalDay = '31' if $CalDay > 31;
|
|
# FIXME! - month with less than 31 days ...
|
|
substr($Period, -2) = $CalDay;
|
|
&Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.",
|
|
$OldPeriod,$Period));
|
|
}
|
|
$Period = substr($Period,0,7) if ($Type eq 'month');
|
|
}
|
|
return @Period;
|
|
};
|
|
|
|
################################################################################
|
|
sub SplitPeriod {
|
|
################################################################################
|
|
### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end
|
|
### IN : $Period: time period
|
|
### $Type : may be 'month' or 'day'
|
|
### OUT: $StartTime, $EndTime
|
|
my ($Period,$Type) = @_;
|
|
my ($StartTime, $EndTime) = split /:/, $Period;
|
|
($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime);
|
|
# switch parameters as necessary
|
|
if ($EndTime gt $StartTime) {
|
|
return ($StartTime, $EndTime);
|
|
} else {
|
|
return ($EndTime, $StartTime);
|
|
};
|
|
};
|
|
|
|
################################################################################
|
|
sub ListMonth {
|
|
################################################################################
|
|
### return a list of months (YYYY-MM) between start and end month
|
|
### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
|
|
### OUT: @Months: array containing all months from $MonthExpression enumerated
|
|
my ($MonthExpression )= @_;
|
|
# return if single month
|
|
return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
|
|
# parse $MonthExpression
|
|
my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
|
|
# set $Year, $Month from $StartMonth
|
|
my ($Year, $Month) = split /-/, $StartMonth;
|
|
# define @Months
|
|
my (@Months);
|
|
until ("$Year-$Month" gt $EndMonth) {
|
|
push @Months, "$Year-$Month";
|
|
$Month = "$Month"; # force string context
|
|
$Month++;
|
|
if ($Month > 12) {
|
|
$Month = '01';
|
|
$Year++;
|
|
};
|
|
};
|
|
return @Months;
|
|
};
|
|
|
|
#####---------------------------- OutputFormats ---------------------------#####
|
|
|
|
################################################################################
|
|
sub OutputData {
|
|
################################################################################
|
|
### read database query results from DBHandle and print results with formatting
|
|
### IN : $Format : format specifier
|
|
### $Comments : print or suppress all comments for machine-readable output
|
|
### $GroupBy : primary sorting order (month or key)
|
|
### $Precision: number of digits right of decimal point (0 or 2)
|
|
### $ValidKeys: reference to a hash containing all valid keys
|
|
### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
|
|
### $DBQuery : database query handle with executed query,
|
|
### containing $Month, $Key, $Value
|
|
### $PadField : padding length for key field (optional) for 'pretty'
|
|
### $PadValue : padding length for value field (optional) for 'pretty'
|
|
my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
|
|
$DBQuery, $PadField, $PadValue) = @_;
|
|
my %ValidKeys = %{$ValidKeys} if $ValidKeys;
|
|
my ($FileName, $Handle, $OUT);
|
|
our $LastIteration;
|
|
|
|
# define output types
|
|
my %LegalOutput;
|
|
@LegalOutput{('dump','list','pretty')} = ();
|
|
# bail out if format is unknown
|
|
&Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
|
|
|
|
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
|
|
# don't display invalid keys
|
|
if(%ValidKeys and !defined($ValidKeys{$Key})) {
|
|
# FIXME
|
|
# &Bleat(1,sprintf("DROPPED: %s",$Key));
|
|
next;
|
|
};
|
|
# care for correct sorting order and abstract from month and keys:
|
|
# $Caption will be $Month or $Key, according to sorting order,
|
|
# and $Key will be $Key or $Month, respectively
|
|
my $Caption;
|
|
if ($GroupBy eq 'key') {
|
|
$Caption = $Key;
|
|
$Key = $Month;
|
|
} else {
|
|
$Caption = $Month;
|
|
}
|
|
# set output file handle
|
|
if (!$FileTempl) {
|
|
$Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
|
|
} elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
|
|
close $OUT if ($LastIteration);
|
|
# safeguards for filename creation:
|
|
# replace potential problem characters with '_'
|
|
$FileName = sprintf('%s-%s',$FileTempl,$Caption);
|
|
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
|
|
open ($OUT,">$FileName")
|
|
or &Bleat(2,sprintf("Cannot open output file '%s': $!",
|
|
$FileName));
|
|
$Handle = $OUT;
|
|
};
|
|
print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
|
|
$Precision, $PadField, $PadValue);
|
|
$LastIteration = $Caption;
|
|
};
|
|
close $OUT if ($FileTempl);
|
|
};
|
|
|
|
################################################################################
|
|
sub FormatOutput {
|
|
################################################################################
|
|
### format information for output according to format specifier
|
|
### IN : $Format : format specifier
|
|
### $Comments : print or suppress all comments for machine-readable output
|
|
### $Caption : month (as YYYY-MM) or $Key, according to sorting order
|
|
### $Key : newsgroup, client, ... or $Month, as above
|
|
### $Value : number of postings with that attribute
|
|
### $Precision: number of digits right of decimal point (0 or 2)
|
|
### $PadField : padding length for key field (optional) for 'pretty'
|
|
### $PadValue : padding length for value field (optional) for 'pretty'
|
|
### OUT: $Output: formatted output
|
|
my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
|
|
$PadValue) = @_;
|
|
my ($Output);
|
|
# keep last caption in mind
|
|
our ($LastIteration);
|
|
# create one line of output
|
|
if ($Format eq 'dump') {
|
|
# output as dump (key value)
|
|
$Output = sprintf ("# %s:\n",$Caption)
|
|
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
|
$Output .= sprintf ("%s %u\n",$Key,$Value);
|
|
} elsif ($Format eq 'list') {
|
|
# output as list (caption key value)
|
|
$Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
|
|
} elsif ($Format eq 'pretty') {
|
|
# output as a table
|
|
$Output = sprintf ("# ----- %s:\n",$Caption)
|
|
if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
|
|
# increase $PadValue for numbers with decimal point
|
|
$PadValue += $Precision+1 if $Precision;
|
|
# add padding if $PadField is set; $PadValue HAS to be set then
|
|
$Output .= sprintf ($PadField ?
|
|
sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) :
|
|
"%s%s %.*f\n",$Key,$Comments ? ':' : '',
|
|
$Precision,$Value);
|
|
};
|
|
return $Output;
|
|
};
|
|
|
|
#####------------------------- QueryModifications -------------------------#####
|
|
|
|
################################################################################
|
|
sub SQLHierarchies {
|
|
################################################################################
|
|
### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
|
|
### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
|
|
### true, accordingly)
|
|
### IN : $ShowHierarchies: boolean value
|
|
### OUT: SQL code
|
|
my ($ShowHierarchies) = @_;
|
|
return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
|
|
};
|
|
|
|
################################################################################
|
|
sub GetMaxLength {
|
|
################################################################################
|
|
### get length of longest fields in future query result
|
|
### IN : $DBHandle : database handle
|
|
### $Table : table to query
|
|
### $Field : field (key!, i.e. month, newsgroup, ...) to check
|
|
### $Value : field (value!, i.e. postings) to check
|
|
### $WhereClause : WHERE clause
|
|
### $HavingClause: HAVING clause
|
|
### @BindVars : bind variables for WHERE clause
|
|
### OUT: $FieldLength : length of longest instance of $Field
|
|
### $ValueLength : length of longest instance of $Value
|
|
my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_;
|
|
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),".
|
|
"MAX(%s) ".
|
|
"FROM %s %s %s",$Field,,$Value,
|
|
$Table,$WhereClause,$HavingClause ?
|
|
'GROUP BY newsgroup' . $HavingClause .
|
|
' ORDER BY LENGTH(newsgroup) '.
|
|
'DESC LIMIT 1': ''));
|
|
$DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
|
|
"for '%s' from table '%s': ".
|
|
"$DBI::errstr",$Field,$Table));
|
|
my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array;
|
|
my $ValueLength = length($ValueMax) if ($ValueMax);
|
|
return ($FieldLength,$ValueLength);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLSortOrder {
|
|
################################################################################
|
|
### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
|
|
### $OptOrderBy (secondary sorting), both ascending or descending;
|
|
### descending sorting order is done by adding '-desc'
|
|
### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
|
|
### $OrderBy: secondary sort by month/newsgroups (default)
|
|
### or number of 'postings'
|
|
### OUT: a SQL ORDER BY clause
|
|
my ($GroupBy,$OrderBy) = @_;
|
|
my ($GroupSort,$OrderSort) = ('','');
|
|
# $GroupBy (primary sorting)
|
|
if (!$GroupBy) {
|
|
$GroupBy = 'month';
|
|
} else {
|
|
($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
|
|
if ($GroupBy =~ /group/i) {
|
|
$GroupBy = 'newsgroup';
|
|
} else {
|
|
$GroupBy = 'month';
|
|
}
|
|
}
|
|
my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
|
|
# $OrderBy (secondary sorting)
|
|
if (!$OrderBy) {
|
|
$OrderBy = $Secondary;
|
|
} else {
|
|
($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
|
|
if ($OrderBy =~ /posting/i) {
|
|
$OrderBy = "postings $OrderSort, $Secondary";
|
|
} else {
|
|
$OrderBy = "$Secondary $OrderSort";
|
|
}
|
|
}
|
|
return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLParseOrder {
|
|
################################################################################
|
|
### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
|
|
### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
|
|
### IN : $OrderOption: order option (see above)
|
|
### OUT: parameter to sort by,
|
|
### sort order ('DESC' or nothing, meaning 'ASC')
|
|
my ($OrderOption) = @_;
|
|
my $SortOrder = '';
|
|
if ($OrderOption =~ s/-?desc$//i) {
|
|
$SortOrder = 'DESC';
|
|
} else {
|
|
$OrderOption =~ s/-?asc$//i
|
|
}
|
|
return ($OrderOption,$SortOrder);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLGroupList {
|
|
################################################################################
|
|
### explode list of newsgroups separated by : (with wildcards)
|
|
### to a SQL 'WHERE' expression
|
|
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
|
### OUT: SQL code to become part of a 'WHERE' clause,
|
|
### list of newsgroups for SQL bindings
|
|
my ($Newsgroups) = @_;
|
|
# substitute '*' wildcard with SQL wildcard character '%'
|
|
$Newsgroups =~ s/\*/%/g;
|
|
return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
|
|
# just one newsgroup?
|
|
return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
|
|
my ($SQL,@WildcardGroups,@NoWildcardGroups);
|
|
# list of newsgroups separated by ':'
|
|
my @GroupList = split /:/, $Newsgroups;
|
|
foreach (@GroupList) {
|
|
if ($_ !~ /%/) {
|
|
# add to list of newsgroup names WITHOUT wildcard
|
|
push (@NoWildcardGroups,$_);
|
|
} else {
|
|
# add to list of newsgroup names WITH wildcard
|
|
push (@WildcardGroups,$_);
|
|
# add wildcard to SQL clause
|
|
# 'OR' if SQL clause is not empty
|
|
$SQL .= ' OR ' if $SQL;
|
|
$SQL .= 'newsgroup LIKE ?'
|
|
}
|
|
};
|
|
if (scalar(@NoWildcardGroups)) {
|
|
# add 'OR' if SQL clause is not empty
|
|
$SQL .= ' OR ' if $SQL;
|
|
if (scalar(@NoWildcardGroups) < 2) {
|
|
# special case: just one newsgroup without wildcard
|
|
$SQL .= 'newsgroup = ?';
|
|
} else {
|
|
# create list of newsgroups to include: 'newsgroup IN (...)'
|
|
$SQL .= 'newsgroup IN (';
|
|
my $SQLin;
|
|
foreach (@NoWildcardGroups) {
|
|
$SQLin .= ',' if $SQLin;
|
|
$SQLin .= '?';
|
|
}
|
|
# add list to SQL clause
|
|
$SQL .= $SQLin .= ')';
|
|
}
|
|
}
|
|
# add brackets '()' to SQL clause as needed (more than one wildcard group)
|
|
if (scalar(@WildcardGroups)) {
|
|
$SQL = '(' . $SQL .')';
|
|
}
|
|
# rebuild @GroupList in (now) correct order
|
|
@GroupList = (@WildcardGroups,@NoWildcardGroups);
|
|
return ($SQL,@GroupList);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLGroupWildcard {
|
|
################################################################################
|
|
### build a valid SQL 'WHERE' expression with or without wildcards
|
|
### IN : $Newsgroup: newsgroup expression, probably with wildcard
|
|
### (group.name or group.name.%)
|
|
### OUT: SQL code to become part of a 'WHERE' clause
|
|
my ($Newsgroup) = @_;
|
|
if ($Newsgroup !~ /%/) {
|
|
return 'newsgroup = ?';
|
|
} else {
|
|
return 'newsgroup LIKE ?';
|
|
}
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLSetBounds {
|
|
################################################################################
|
|
### set upper and/or lower boundary (number of postings)
|
|
### IN : $Type: 'level', 'average', 'sum' or 'default'
|
|
### $LowBound,$UppBound: lower/upper boundary, respectively
|
|
### OUT: SQL code to become part of a WHERE or HAVING clause
|
|
my ($Type,$LowBound,$UppBound) = @_;
|
|
($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
|
|
if($LowBound and $UppBound and $LowBound > $UppBound) {
|
|
&Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
|
|
"$UppBound, exchanging boundaries.");
|
|
($LowBound,$UppBound) = ($UppBound,$LowBound);
|
|
}
|
|
# default to 'default'
|
|
my $WhereHavingFunction = 'postings';
|
|
# set $LowBound to SQL statement:
|
|
# 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
|
|
if ($Type eq 'level') {
|
|
$WhereHavingFunction = 'MIN(postings)'
|
|
} elsif ($Type eq 'average') {
|
|
$WhereHavingFunction = 'AVG(postings)'
|
|
} elsif ($Type eq 'sum') {
|
|
$WhereHavingFunction = 'SUM(postings)'
|
|
}
|
|
$LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
|
|
# set $LowBound to SQL statement:
|
|
# 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
|
|
if ($Type eq 'level') {
|
|
$WhereHavingFunction = 'MAX(postings)'
|
|
} elsif ($Type eq 'average') {
|
|
$WhereHavingFunction = 'AVG(postings)'
|
|
} elsif ($Type eq 'sum') {
|
|
$WhereHavingFunction = 'SUM(postings)'
|
|
}
|
|
$UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
|
|
return ($LowBound,$UppBound);
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLCheckNumber {
|
|
################################################################################
|
|
### check if input is a valid positive integer; otherwise, make it one
|
|
### IN : @Numbers: array of parameters
|
|
### OUT: @Numbers: a valid positive integer
|
|
my (@Numbers) = @_;
|
|
foreach my $Number (@Numbers) {
|
|
if ($Number and $Number < 0) {
|
|
&Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
|
|
$Number = -$Number;
|
|
}
|
|
$Number = '' if ($Number and $Number !~ /^\d+$/);
|
|
}
|
|
return @Numbers;
|
|
};
|
|
|
|
################################################################################
|
|
sub SQLBuildClause {
|
|
################################################################################
|
|
### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
|
|
### from multiple expressions which *may* be empty
|
|
### IN : $Type: 'where', 'having', 'group' or 'order'
|
|
### @Expressions: array of expressions
|
|
### OUT: $SQLClause: a SQL clause
|
|
my ($Type,@Expressions) = @_;
|
|
my ($SQLClause,$Separator,$Statement);
|
|
# set separator ('AND' or ',')
|
|
if ($Type eq 'where' or $Type eq 'having') {
|
|
$Separator = 'AND';
|
|
} else {
|
|
$Separator = ',';
|
|
}
|
|
# set statement
|
|
if ($Type eq 'where') {
|
|
$Statement = 'WHERE';
|
|
} elsif ($Type eq 'order') {
|
|
$Statement = 'ORDER BY';
|
|
} elsif ($Type eq 'having') {
|
|
$Statement = 'HAVING';
|
|
} else {
|
|
$Statement = 'GROUP BY';
|
|
}
|
|
# build query from expressions with separators
|
|
foreach my $Expression (@Expressions) {
|
|
if ($Expression) {
|
|
$SQLClause .= " $Separator " if ($SQLClause);
|
|
$SQLClause .= $Expression;
|
|
}
|
|
}
|
|
# add statement in front if not already present
|
|
$SQLClause = " $Statement " . $SQLClause
|
|
if ($SQLClause and $SQLClause !~ /$Statement/);
|
|
return $SQLClause;
|
|
};
|
|
|
|
#####--------------------------- Verifications ----------------------------#####
|
|
|
|
################################################################################
|
|
sub CheckValidNewsgroups {
|
|
################################################################################
|
|
### syntax check of newgroup list
|
|
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
|
|
### OUT: boolean
|
|
my ($Newsgroups) = @_;
|
|
my $InvalidCharRegExp = ',; ';
|
|
return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
|
|
};
|
|
|
|
|
|
#####------------------------------- done ---------------------------------#####
|
|
1;
|