Initial checkin of working branch.

Checkin of
- NewsStats.pm
- install/install.pl
- feedlog.pl
- gatherstats.pl
- groupstats.pl

Signed-off-by: Thomas Hochstein <thh@inter.net>
This commit is contained in:
Thomas Hochstein 2010-09-17 13:50:58 +02:00
parent 4db873f74d
commit 741336c210
5 changed files with 893 additions and 0 deletions

395
NewsStats.pm Normal file
View file

@ -0,0 +1,395 @@
# NewsStats.pm
#
# Library functions for the NewsStats package.
#
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
#
# This module can be redistributed and/or modified under the same terms under
# which Perl itself is published.
package NewsStats;
use strict;
use warnings;
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(
$MySelf
$MyVersion
ReadOptions
ReadConfig
OverrideConfig
InitDB
);
@EXPORT_OK = qw(
GetTimePeriod
LastMonth
CheckMonth
SplitPeriod
ListMonth
ListNewsgroups
OutputData
FormatOutput
SQLHierarchies
SQLGroupList
GetMaxLenght
);
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)],
Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]);
$VERSION = '0.1';
use Data::Dumper;
use File::Basename;
use Getopt::Std;
use Config::Auto;
use DBI;
#####-------------------------------- Vars --------------------------------#####
our $MySelf = fileparse($0, '.pl');
our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)";
#####------------------------------- Basics -------------------------------#####
################################################################################
sub ReadOptions {
################################################################################
### read commandline options and act on standard options
### IN : $Params: containing list of commandline paramaters (without -h and -V)
### OUT: a hash containing the commandline options
$Getopt::Std::STANDARD_HELP_VERSION = 1;
my ($Params) = @_;
my %Options;
getopts('Vh'.$Params, \%Options);
# -V: display version
&ShowVersion if ($Options{'V'});
# -h: feed myself to perldoc
&ShowPOD if ($Options{'h'});
return %Options;
};
################################################################################
################################################################################
sub ShowVersion {
################################################################################
### display version and exit
print "$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n";
print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n";
exit(100);
};
################################################################################
################################################################################
sub ShowPOD {
################################################################################
### feed myself to perldoc and exit
exec('perldoc', $0);
exit(100);
};
################################################################################
################################################################################
sub ReadConfig {
################################################################################
### read config via Config::Auto
### IN : $ConfFile: config filename
### OUT: reference to a hash containing the configuration
my ($ConfFile) = @_;
return Config::Auto::parse($ConfFile, format => 'equal');
};
################################################################################
################################################################################
sub OverrideConfig {
################################################################################
### override configuration values
### IN : $ConfigR : reference to configuration hash
### $OverrideR: reference to a hash containing overrides
my ($ConfigR,$OverrideR) = @_;
my %Override = %$OverrideR;
warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1);
return if (keys %Override < 1 or keys %$ConfigR < 1);
foreach my $Key (keys %Override) {
$$ConfigR{$Key} = $Override{$Key};
};
};
################################################################################
################################################################################
sub InitDB {
################################################################################
### initialise database connection
### IN : $ConfigR: reference to configuration hash
### $Die : if TRUE, die if connection failed
### OUT: DBHandle
my ($ConfigR,$Die) = @_;
my %Conf = %$ConfigR;
my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 });
if (!$DBHandle) {
die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die);
warn("$MySelf: W: $DBI::errstr\n");
};
return $DBHandle;
};
################################################################################
#####------------------------------ GetStats ------------------------------#####
################################################################################
sub ListNewsgroups {
################################################################################
### count each newsgroup and each hierarchy level, but only once
### IN : $Newsgroups: a list of newsgroups (content of Newsgroups:)
### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys
my ($Newsgroups) = @_;
my %Newsgroups;
chomp($Newsgroups);
# remove whitespace from contents of Newsgroups:
$Newsgroups =~ s/\s//;
# call &HierarchyCount for each newsgroup in $Newsgroups:
for (split /,/, $Newsgroups) {
# add original newsgroup to %Newsgroups
$Newsgroups{$_} = 1;
# add all hierarchy elements to %Newsgroups, amended by '.ALL',
# i.e. de.alt.ALL and de.ALL
foreach (ParseHierarchies($_)) {
$Newsgroups{$_.'.ALL'} = 1;
}
};
return %Newsgroups;
};
################################################################################
sub ParseHierarchies {
################################################################################
### get all hierarchies a newsgroup belongs to
### IN : $Newsgroup : a newsgroup name
### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
my ($Newsgroup) = @_;
my @Hierarchies;
# strip trailing dots
$Newsgroup =~ s/(.+)\.+$/$1/;
# butcher newsgroup name by "." and add each hierarchy to @Hierarchies
# i.e. de.alt.test: "de.alt" and "de"
while ($Newsgroup =~ /\./) {
$Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
push @Hierarchies, $Newsgroup;
};
return @Hierarchies;
};
################################################################################
#####----------------------------- TimePeriods ----------------------------#####
################################################################################
sub GetTimePeriod {
################################################################################
### get time period using -m / -p
### IN : $Month,$Period: contents of -m and -p
### OUT: $StartMonth, $EndMonth
my ($Month,$Period) = @_;
# exit if -m is set and not like YYYY-MM
die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month);
# default: set -m to last month
$Month = &LastMonth if (!defined($Month) and !defined($Period));
# set $StartMonth, $EndMonth
my ($StartMonth, $EndMonth);
if ($Period) {
# -p: get date range
($StartMonth, $EndMonth) = &SplitPeriod($Period);
die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth);
} else {
# set $StartMonth = $EndMonth = $Month if -p is not set
$StartMonth = $EndMonth = $Month;
};
return ($StartMonth, $EndMonth);
};
################################################################################
sub LastMonth {
################################################################################
### get last month from today in YYYY-MM format
### OUT: last month as YYYY-MM
# get today's date
my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
# $Month is already defined from 0 to 11, so no need to decrease it by 1
$Year += 1900;
if ($Month < 1) {
$Month = 12;
$Year--;
};
# return last month
return sprintf('%4d-%02d',$Year,$Month);
};
################################################################################
sub CheckMonth {
################################################################################
### check for valid month
### IN : $Month: month
### OUT: TRUE / FALSE
my ($Month) = @_;
return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/);
return 1;
};
################################################################################
sub SplitPeriod {
################################################################################
### split a time period YYYY-MM:YYYY-MM into start and end month
### IN : $Period: time period
### OUT: $StartMonth, Â$EndMonth
my ($Period) = @_;
return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/;
my ($StartMonth, $EndMonth) = split /:/, $Period;
# switch parameters as necessary
if ($EndMonth gt $StartMonth) {
return ($StartMonth, $EndMonth);
} else {
return ($EndMonth, $StartMonth);
};
};
################################################################################
sub ListMonth {
################################################################################
### return a list of month (YYYY-MM) between start and end month
### IN : $StartMonth, $EndMonth
### OUT: @Months: array containing all months from $StartMonth to $EndMonth
my ($StartMonth, $EndMonth) = @_;
return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/);
# return if $StartMonth = $EndMonth
return ($StartMonth) if ($StartMonth eq $EndMonth);
# set $Year, $Month from $StartMonth
my ($Year, $Month) = split /-/, $StartMonth;
# define @Months
my (@Months);
until ("$Year-$Month" gt $EndMonth) {
push @Months, "$Year-$Month";
$Month = "$Month"; # force string context
$Month++;
if ($Month > 12) {
$Month = '01';
$Year++;
};
};
return @Months;
};
#####---------------------------- OutputFormats ---------------------------#####
################################################################################
sub OutputData {
################################################################################
### output information with formatting from DBHandle
### IN : $Format : format specifier
### $DBQuery: database query handle with executed query,
### containing $Month, $Key, $Value
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
### OUT: $Output: formatted output
my ($Format, $DBQuery,$PadGroup) = @_;
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
};
};
################################################################################
sub FormatOutput {
################################################################################
### format information for output
### IN : $Format : format specifier
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
### $Month : month (as YYYY-MM)
### $Key : newsgroup, client, ...
### $Value : number of postings with that attribute
### OUT: $Output: formatted output
my ($Format, $Month, $Key, $Value, $PadGroup) = @_;
# define output types
my %LegalOutput;
@LegalOutput{('dump','dumpgroup','list','pretty')} = ();
# bail out if format is unknown
die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format});
my ($Output);
our ($LastIteration);
if ($Format eq 'dump') {
# output as dump (ng nnnnn)
$Output = sprintf ("%s %u\n",$Key,$Value);
} elsif ($Format eq 'dumpgroup') {
# output as dump (YYYY-NN: nnnnn)
$Output = sprintf ("%s: %5u\n",$Month,$Value);
} elsif ($Format eq 'list') {
# output as list (YYYY-NN: ng nnnnn)
$Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value);
} elsif ($Format eq 'pretty') {
# output as table
$Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration);
$LastIteration = $Month;
$Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value);
};
return $Output;
};
#####------------------------- QueryModifications -------------------------#####
################################################################################
sub SQLHierarchies {
################################################################################
### amend WHERE clause to include hierarchies
### IN : $ShowHierarchies: boolean value
### OUT: SQL code
my ($ShowHierarchies) = @_;
return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'";
};
################################################################################
sub GetMaxLenght {
################################################################################
### get length of longest field in query
### IN : $DBHandle : database handel
### $Table : table to query
### $Field : field to check
### $WhereClause: WHERE clause
### @BindVars : bind variables for WHERE clause
### OUT: $Length: length of longest instnace of $Field
my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_;
my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause));
$DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table);
my ($Length) = $DBQuery->fetchrow_array;
return $Length;
};
################################################################################
sub SQLGroupList {
################################################################################
### create part of WHERE clause for list of newsgroups separated by :
### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
### OUT: SQL code, list of newsgroups
my ($Newsgroups) = @_;
$Newsgroups =~ s/\*/%/g;
return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/;
my $SQL = '(';
my @GroupList = split /:/, $Newsgroups;
foreach (@GroupList) {
$SQL .= ' OR ' if $SQL gt '(';
$SQL .= 'newsgroup LIKE ?';
};
$SQL .= ')';
return ($SQL,@GroupList);
};
#####------------------------------- done ---------------------------------#####
1;

87
feedlog.pl Executable file
View file

@ -0,0 +1,87 @@
#! /usr/bin/perl -W
#
# feedlog.pl
#
# This script will log headers and other data to a database
# for further analysis by parsing a feed from INN.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
our $VERSION = "0.01";
use File::Basename;
push(@INC, dirname($0));
}
use strict;
use NewsStats;
use Sys::Syslog qw(:standard :macros);
use Date::Format;
use DBI;
################################# Main program #################################
### read commandline options
my %Options = &ReadOptions('qd');
### read configuration
my %Conf = %{ReadConfig('newsstats.conf')};
### init syslog
openlog($MySelf, 'nofatal,pid', LOG_NEWS);
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'};
### init database
my $DBHandle = InitDB(\%Conf,0);
if (!$DBHandle) {
syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
while (1) {}; # go into endless loop to suppress further errors and respawning
};
my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
### main loop
while (<>) {
chomp;
# catch empty lines trailing or leading
if ($_ eq '') {
next;
}
# first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
# remaining lines contain headers
my $Headers = "";
while (<>) {
chomp;
# empty line terminates this article
if ($_ eq '') {
last;
}
# collect headers
$Headers .= $_."\n" ;
}
# parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
my $Day = time2str("%Y-%m-%d", $Timestamp);
my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
# write to database
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) {
syslog(LOG_ERR, 'Database error: %s', $DBI::errstr);
};
$DBQuery->finish;
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'};
}
### close handles
$DBHandle->disconnect;
syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'};
closelog();

102
gatherstats.pl Executable file
View file

@ -0,0 +1,102 @@
#! /usr/bin/perl -W
#
# gatherstats.pl
#
# This script will gather statistical information from a database
# containing headers and other information from a INN feed.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
our $VERSION = "0.01";
use File::Basename;
push(@INC, dirname($0));
}
use strict;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);
use DBI;
################################# Definitions ##################################
# define types of information that can be gathered
# all / groups (/ clients / hosts)
my %LegalTypes;
@LegalTypes{('all','groups')} = ();
################################# Main program #################################
### read commandline options
my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');
### read configuration
my %Conf = %{ReadConfig('newsstats.conf')};
### override configuration via commandline options
my %ConfOverride;
$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};
$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};
$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};
&OverrideConfig(\%Conf,\%ConfOverride);
### get type of information to gather, default to 'all'
$Options{'t'} = 'all' if !$Options{'t'};
die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});
### get time period
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
### init database
my $DBHandle = InitDB(\%Conf,1);
### get data for each month
warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};
foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
print "---------- $Month ----------\n" if $Options{'d'};
if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
$DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
# count postings per group
my %Postings;
while (($_) = $DBQuery->fetchrow_array) {
# get list oft newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_);
# count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH
next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);
$Postings{$_}++;
};
};
print "----- GroupStats -----\n" if $Options{'d'};
foreach my $Newsgroup (sort keys %Postings) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};
if (!$Options{'o'}) {
# write to database
$DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
$DBQuery->finish;
};
};
};
};
### close handles
$DBHandle->disconnect;

157
groupstats.pl Executable file
View file

@ -0,0 +1,157 @@
#! /usr/bin/perl -W
#
# groupstats.pl
#
# This script will get statistical data on newgroup usage
# form a database.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
our $VERSION = "0.01";
use File::Basename;
push(@INC, dirname($0));
}
use strict;
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
use DBI;
################################# Definitions ##################################
# ...
################################# Main program #################################
### read commandline options
my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
### read configuration
my %Conf = %{ReadConfig('newsstats.conf')};
### override configuration via commandline options
my %ConfOverride;
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
&OverrideConfig(\%Conf,\%ConfOverride);
### default output type to 'dump'
$Options{'o'} = 'dump' if !$Options{'o'};
# fail if more than one newsgroup is combined with 'dumpgroup' type
die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
# accept 'dumpgroup' only with -n
if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
$Options{'o'} = 'dump';
warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
};
# you can't mix '-t' and '-b'
if ($Options{'b'}) {
if ($Options{'t'}) {
warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b). Threshold '-t $Options{'t'}' was ignored.\n");
undef($Options{'t'});
};
warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b).\n") if $Options{'q'};
warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b).\n") if $Options{'d'};
};
### get query type, default to 'postings'
#die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));
#die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});
### get time period
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
# reset to one month for 'dump' type
if ($Options{'o'} eq 'dump' and $Options{'p'}) {
$StartMonth = $EndMonth;
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
};
### init database
my $DBHandle = InitDB(\%Conf,1);
### get data
# get list of newsgroups (-n)
my ($QueryPart,@GroupList);
my $Newsgroups = $Options{'n'};
if ($Newsgroups) {
($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
} else {
$QueryPart = 1;
};
# manage thresholds
if (defined($Options{'t'})) {
if ($Options{'i'}) {
$QueryPart .= ' AND postings < ?';
} else {
$QueryPart .= ' AND postings > ?';
};
push @GroupList,$Options{'t'};
}
# construct WHERE clause
my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
# get lenght of longest newsgroup delivered by query for formatting purposes
my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
my ($OrderClause,$DBQuery);
# -b (best of) defined?
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
$OrderClause = 'newsgroup';
$OrderClause = 'postings' if $Options{'q'};
$OrderClause .= ' DESC' if $Options{'d'};
# do query: get number of postings per group from groups table for given months and newsgroups
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} elsif ($Options{'b'}) {
# set sorting order (-i)
if ($Options{'i'}) {
$OrderClause = 'postings';
} else {
$OrderClause = 'postings DESC';
};
# push LIMIT to GroupList to match number of binding vars
push @GroupList,$Options{'b'};
# do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} else { # -l
# set sorting order (-i)
if ($Options{'i'}) {
$OrderClause = '<';
} else {
$OrderClause = '>';
};
# push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars
push @GroupList,$Options{'l'};
push @GroupList,$StartMonth,$EndMonth;
# do query: get number of postings per group from groups table for given months and
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
};
# execute query
$DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
# output result
printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
&OutputData($Options{'o'},$DBQuery,$MaxLength);
} elsif ($Options{'b'}) {
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
};
} else { # -l
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
};
};
### close handles
$DBHandle->disconnect;

152
install/install.pl Executable file
View file

@ -0,0 +1,152 @@
#! /usr/bin/perl -W
#
# install.pl
#
# This script will create database tables as necessary.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
our $VERSION = "0.01";
use File::Basename;
# we're in .../install, so our module is in ..
push(@INC, dirname($0).'/..');
}
use strict;
use NewsStats qw(:DEFAULT);
use Cwd;
use DBI;
################################# Main program #################################
### read commandline options
my %Options = &ReadOptions('');
### change working directory to .. (as we're in .../install)
chdir '..';
### read configuration
print("Reading configuration.\n");
my %Conf = %{ReadConfig('newsstats.conf')};
##### --------------------------------------------------------------------------
##### Database table definitions
##### --------------------------------------------------------------------------
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);
--
-- Table structure for table DBTableRaw
--
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (
`id` bigint(20) unsigned NOT NULL auto_increment,
`day` date NOT NULL,
`mid` varchar(250) character set ascii NOT NULL,
`date` datetime NOT NULL,
`timestamp` bigint(20) NOT NULL,
`token` varchar(80) character set ascii NOT NULL,
`size` bigint(20) NOT NULL,
`peer` varchar(250) NOT NULL,
`path` varchar(1000) NOT NULL,
`newsgroups` varchar(1000) NOT NULL,
`headers` longtext NOT NULL,
`disregard` tinyint(1) default '0',
PRIMARY KEY (`id`),
KEY `day` (`day`),
KEY `mid` (`mid`),
KEY `peer` (`peer`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data';
RAW
--
-- Table structure for table DBTableGrps
--
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (
`id` bigint(20) unsigned NOT NULL auto_increment,
`month` varchar(7) character set ascii NOT NULL,
`newsgroup` varchar(100) NOT NULL,
`postings` int(11) NOT NULL,
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `month_newsgroup` (`month`,`newsgroup`),
KEY `newsgroup` (`newsgroup`),
KEY `postings` (`postings`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per newsgroup';
GRPS
##### --------------------------- End of definitions ---------------------------
### create database tables
print "-----\nStarting database table generation.\n";
# DB init
my $DBHandle = InitDB(\%Conf,1);
# read tables
my %TablesInDB = %{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')};
# check for tables and create them, if they don't exist yet
foreach my $Table (keys %DBCreate) {
if (defined($TablesInDB{$Conf{$Table}})) {
printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table});
next;
};
my $DBQuery = $DBHandle->prepare($DBCreate{$Table});
$DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr);
printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table});
};
# close handle
$DBHandle->disconnect;
print "Database table generation done.\n";
### output information on other necessary steps
my $Path = cwd();
print <<TODO;
-----
Things left to do:
1) Setup an INN feed to feedlog.pl
a) Edit your 'newsfeeds' file and insert something like
## gather statistics for NewsStats
newsstats!\
:!*,de.*\
:Tc,WmtfbsPNH,Ac:$Path/feedlog.pl
Please
* check that you got the path to feedlog.pl right
* check that feedlog.pl can be executed by the news user
* adapt the pattern (here: 'de.*') to your needs
b) Check your 'newsfeeds' syntax:
# ctlinnd checkfile
and reload 'newsfeeds':
# ctlinnd reload newsfeeds 'Adding newsstats! feed'
c) Watch your 'news.notice' and 'errlog' files:
# tail -f /var/log/news/news.notice
...
# tail -f /var/log/news/errlog
2) Watch your $Conf{'DBTableRaw'} table fill.
3) Read the documentation. ;)
Enjoy!
-thh <thh\@inter.net>
TODO