Compare commits

..

No commits in common. "pu" and "master" have entirely different histories.
pu ... master

10 changed files with 162 additions and 721 deletions

2
.gitignore vendored
View file

@ -1,3 +1,3 @@
tmp/ tmp/
tmp/* tmp/*
etc/newsstats.conf newsstats.conf

View file

@ -4,7 +4,7 @@
# #
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
# #
# This module can be redistributed and/or modified under the same terms under # This module can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
package NewsStats; package NewsStats;
@ -49,24 +49,20 @@ require Exporter;
Output => [qw(OutputData FormatOutput)], Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
SQLSetBounds SQLBuildClause GetMaxLength)]); SQLSetBounds SQLBuildClause GetMaxLength)]);
$VERSION = '0.02'; $VERSION = '0.01';
our $PackageVersion = '0.02'; our $PackageVersion = '0.01';
use Data::Dumper; use Data::Dumper;
use File::Basename; use File::Basename;
use Cwd qw(realpath);
use Config::Auto; use Config::Auto;
use DBI; use DBI;
#####-------------------------------- Vars --------------------------------##### #####-------------------------------- Vars --------------------------------#####
# save $0 in $FullPath # trim the path
our $FullPath = $0; our $FullPath = $0;
# strip filename and /bin or /install directory to create the $HomePath our $HomePath = dirname($0);
our $HomePath = dirname(realpath($0));
$HomePath =~ s/\/(bin|install)//;
# trim $0
$0 =~ s%.*/%%; $0 =~ s%.*/%%;
# set version string # set version string
our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
@ -80,7 +76,7 @@ sub ShowVersion {
################################################################################ ################################################################################
### display version and exit ### display version and exit
print "NewsStats v$PackageVersion\n$MyVersion\n"; print "NewsStats v$PackageVersion\n$MyVersion\n";
print "Copyright (c) 2010-2013 Thomas Hochstein <thh\@inter.net>\n"; print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
print "This program is free software; you may redistribute it ". print "This program is free software; you may redistribute it ".
"and/or modify it under the same terms as Perl itself.\n"; "and/or modify it under the same terms as Perl itself.\n";
exit(100); exit(100);
@ -103,11 +99,9 @@ sub ReadConfig {
### IN : $ConfFile: config filename ### IN : $ConfFile: config filename
### OUT: reference to a hash containing the configuration ### OUT: reference to a hash containing the configuration
my ($ConfFile) = @_; my ($ConfFile) = @_;
# set default
$ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
# mandatory configuration options # mandatory configuration options
my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
'DBTableRaw','DBTableParse','DBTableGrps'); 'DBTableRaw','DBTableGrps');
# read config via Config::Auto # read config via Config::Auto
my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
my %Conf = %{$ConfR}; my %Conf = %{$ConfR};
@ -244,7 +238,7 @@ sub ReadGroupList {
### ignoring everything after the first whitespace and so accepting files ### ignoring everything after the first whitespace and so accepting files
### in checkgroups format as well as (parts of) an INN active file) ### in checkgroups format as well as (parts of) an INN active file)
### IN : $Filename : file to read ### IN : $Filename : file to read
### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups ### OUT: \%ValidGroups: hash containing all valid newsgroups
my ($Filename) = @_; my ($Filename) = @_;
my %ValidGroups; my %ValidGroups;
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
@ -265,39 +259,33 @@ sub ReadGroupList {
################################################################################ ################################################################################
sub GetTimePeriod { sub GetTimePeriod {
################################################################################ ################################################################################
### get a time period to act on from --month / --day option; ### get a time period to act on from --month option;
### if empty, default to last month / day ### if empty, default to last month
### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)' ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all'
### or 'all'
### $Type : may be 'month' or 'day'
### OUT: $Verbal,$SQL: verbal description and WHERE-clause ### OUT: $Verbal,$SQL: verbal description and WHERE-clause
### of the chosen time period ### of the chosen time period
my ($Period,$Type) = @_; my ($Month) = @_;
# define result variables # define result variables
my ($Verbal, $SQL); my ($Verbal, $SQL);
# check $Type # define a regular expression for a month
$Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day')); my $REMonth = '\d{4}-\d{2}';
# define a regular expressions for a month or day
my $REPeriod = '\d{4}-\d{2}'; # default to last month if option is not set
$REPeriod .= '-\d{2}' if ($Type eq 'day'); if(!$Month) {
$Month = &LastMonth;
# default to last month / day if option is not set
if(!$Period) {
$Period = &LastMonthDay($Type);
} }
# check for valid input # check for valid input
if ($Period =~ /^$REPeriod$/) { if ($Month =~ /^$REMonth$/) {
# single month/day [YYYY-MM(-DD)] # single month (YYYY-MM)
($Period) = &CheckPeriod($Type,$Period); ($Month) = &CheckMonth($Month);
$Verbal = $Period; $Verbal = $Month;
$SQL = sprintf("%s = '%s'",$Type,$Period); $SQL = sprintf("month = '%s'",$Month);
} elsif ($Period =~ /^$REPeriod:$REPeriod$/) { } elsif ($Month =~ /^$REMonth:$REMonth$/) {
# time period [YYYY-MM(-DD):YYYY-MM(-DD)] # time period (YYYY-MM:YYYY-MM)
$Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type)); $Verbal = sprintf('%s to %s',&SplitPeriod($Month));
$SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type, $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month));
&SplitPeriod($Period,$Type)); } elsif ($Month =~ /^all$/i) {
} elsif ($Period =~ /^all$/i) {
# special case: ALL # special case: ALL
$Verbal = 'all time'; $Verbal = 'all time';
$SQL = ''; $SQL = '';
@ -305,87 +293,63 @@ sub GetTimePeriod {
# invalid input # invalid input
return (undef,undef); return (undef,undef);
} }
return ($Verbal,$SQL); return ($Verbal,$SQL);
}; };
################################################################################ ################################################################################
sub LastMonthDay { sub LastMonth {
################################################################################ ################################################################################
### get last month/day from todays date in YYYY-MM format ### get last month from todays date in YYYY-MM format
### IN : $Type : may be 'month' or 'day' ### OUT: last month as YYYY-MM
### OUT: last month/day as YYYY-MM(-DD) # get today's date
my ($Type) = @_; my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
my ($Day,$Month,$Year); # $Month is already defined from 0 to 11, so no need to decrease it by 1
if ($Type eq 'day') {
# get yesterdays's date
(undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400);
# $Month is defined from 0 to 11, so add 1
$Month++;
} else {
# get today's date (month and year)
(undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
# $Month is already defined from 0 to 11, so no need to decrease it by 1
if ($Month < 1) {
$Month = 12;
$Year--;
};
}
$Year += 1900; $Year += 1900;
# return last month / day if ($Month < 1) {
if ($Type eq 'day') { $Month = 12;
return sprintf('%4d-%02d-%02d',$Year,$Month,$Day); $Year--;
} else { };
return sprintf('%4d-%02d',$Year,$Month); # return last month
} return sprintf('%4d-%02d',$Year,$Month);
}; };
################################################################################ ################################################################################
sub CheckPeriod { sub CheckMonth {
################################################################################ ################################################################################
### check if input (in YYYY-MM(-DD) form) is a valid month / day; ### check if input (in YYYY-MM form) is valid with MM between 01 and 12;
### otherwise, fix it ### otherwise, fix it
### IN : $Type : may be 'month' or 'day' ### IN : @Month: array of month
### @Period: array of month/day ### OUT: @Month: a valid month
### OUT: @Period: a valid month/day my (@Month) = @_;
my ($Type,@Period) = @_; foreach my $Month (@Month) {
foreach my $Period (@Period) { my ($OldMonth) = $Month;
my ($OldPeriod) = $Period; my ($CalMonth) = substr ($Month, -2);
my ($CalMonth,$CalDay); if ($CalMonth < 1 or $CalMonth > 12) {
$Period .= '-01' if ($Type eq 'month');
$CalDay = substr ($Period, -2);
$CalMonth = substr ($Period, 5, 2);
if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) {
$CalMonth = '12' if $CalMonth > 12; $CalMonth = '12' if $CalMonth > 12;
$CalMonth = '01' if $CalMonth < 1; $CalMonth = '01' if $CalMonth < 1;
substr($Period, 5, 2) = $CalMonth; substr($Month, -2) = $CalMonth;
$CalDay = '01' if $CalDay < 1; &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ".
$CalDay = '31' if $CalDay > 31; "and '12'), set to '%s'.",$OldMonth,$Month));
# FIXME! - month with less than 31 days ...
substr($Period, -2) = $CalDay;
&Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.",
$OldPeriod,$Period));
} }
$Period = substr($Period,0,7) if ($Type eq 'month');
} }
return @Period; return @Month;
}; };
################################################################################ ################################################################################
sub SplitPeriod { sub SplitPeriod {
################################################################################ ################################################################################
### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
### IN : $Period: time period ### IN : $Period: time period
### $Type : may be 'month' or 'day' ### OUT: $StartMonth, $EndMonth
### OUT: $StartTime, $EndTime my ($Period) = @_;
my ($Period,$Type) = @_; my ($StartMonth, $EndMonth) = split /:/, $Period;
my ($StartTime, $EndTime) = split /:/, $Period; ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth);
($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime);
# switch parameters as necessary # switch parameters as necessary
if ($EndTime gt $StartTime) { if ($EndMonth gt $StartMonth) {
return ($StartTime, $EndTime); return ($StartMonth, $EndMonth);
} else { } else {
return ($EndTime, $StartTime); return ($EndMonth, $StartMonth);
}; };
}; };
@ -437,7 +401,7 @@ sub OutputData {
my %ValidKeys = %{$ValidKeys} if $ValidKeys; my %ValidKeys = %{$ValidKeys} if $ValidKeys;
my ($FileName, $Handle, $OUT); my ($FileName, $Handle, $OUT);
our $LastIteration; our $LastIteration;
# define output types # define output types
my %LegalOutput; my %LegalOutput;
@LegalOutput{('dump','list','pretty')} = (); @LegalOutput{('dump','list','pretty')} = ();
@ -469,7 +433,7 @@ sub OutputData {
# safeguards for filename creation: # safeguards for filename creation:
# replace potential problem characters with '_' # replace potential problem characters with '_'
$FileName = sprintf('%s-%s',$FileTempl,$Caption); $FileName = sprintf('%s-%s',$FileTempl,$Caption);
$FileName =~ s/[^a-zA-Z0-9_-]+/_/g; $FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
open ($OUT,">$FileName") open ($OUT,">$FileName")
or &Bleat(2,sprintf("Cannot open output file '%s': $!", or &Bleat(2,sprintf("Cannot open output file '%s': $!",
$FileName)); $FileName));
@ -806,3 +770,5 @@ sub CheckValidNewsgroups {
#####------------------------------- done ---------------------------------##### #####------------------------------- done ---------------------------------#####
1; 1;

View file

@ -1,430 +0,0 @@
#! /usr/bin/perl
#
# parsedb.pl
#
# This script will parse a database with raw header information
# from a INN feed to a structured database.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2013 Thomas Hochstein <thh@inter.net>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
BEGIN {
our $VERSION = "0.01";
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;
use NewsStats qw(:DEFAULT :TimePeriods :SQLHelper);
use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
use Encode qw/decode/;
use Mail::Address;
################################# Definitions ##################################
# define header names with separate database fields
my %DBFields = ('date' => 'date',
'references' => 'refs',
'followup-to' => 'fupto',
'from' => 'from_',
'sender' => 'sender',
'reply-to' => 'replyto',
'subject' => 'subject',
'organization' => 'organization',
'lines' => 'linecount',
'approved' => 'approved',
'supersedes' => 'supersedes',
'expires' => 'expires',
'user-agent' => 'useragent',
'x-newsreader' => 'xnewsreader',
'x-mailer' => 'xmailer',
'x-no-archive' => 'xnoarchive',
'content-type' => 'contenttype',
'content-transfer-encoding' => 'contentencoding',
'cancel-lock' => 'cancellock',
'injection-info' => 'injectioninfo',
'x-trace' => 'xtrace',
'nntp-posting-host' => 'postinghost');
# define field list for database
my @DBFields = qw/day mid refs date path newsgroups fupto from_ from_parsed
from_name from_address sender sender_parsed sender_name
sender_address replyto replyto_parsed replyto_name
replyto_address subject subject_parsed organization linecount
approved supersedes expires useragent xnewsreader xmailer
xnoarchive contenttype contentencoding cancellock injectioninfo
xtrace postinghost headers disregard/;
################################# Main program #################################
### read commandline options
my ($OptDay,$OptDebug,$OptParseDB,$OptRawDB,$OptTest,$OptConfFile);
GetOptions ('d|day=s' => \$OptDay,
'debug!' => \$OptDebug,
'parsedb=s' => \$OptParseDB,
'rawdb=s' => \$OptRawDB,
't|test!' => \$OptTest,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
### read configuration
my %Conf = %{ReadConfig($OptConfFile)};
### override configuration via commandline options
my %ConfOverride;
$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
$ConfOverride{'DBTableParse'} = $OptParseDB if $OptParseDB;
&OverrideConfig(\%Conf,\%ConfOverride);
### get time period
### and set $Period for output and expression for SQL 'WHERE' clause
my ($Period,$SQLWherePeriod) = &GetTimePeriod($OptDay,'day');
# bail out if --month is invalid or "all"
&Bleat(2,"--day option has an invalid format - please use 'YYYY-MM-DD' or ".
"'YYYY-MM-DD:YYYY-MM-DD'!") if (!$Period or $Period eq 'all time');
### init database
my $DBHandle = InitDB(\%Conf,1);
### get & write data
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
# create $SQLWhereClause
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,'NOT disregard');
# delete old data for current period
if (!$OptTest) {
print "----------- Deleting old data ... -----------\n" if $OptDebug;
my $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s %s",
$Conf{'DBDatabase'},$Conf{'DBTableParse'},
$SQLWhereClause))
or &Bleat(2,sprintf("Can't delete old parsed data for %s from %s.%s: ".
"$DBI::errstr\n",$Period,
$Conf{'DBDatabase'},$Conf{'DBTableParse'}));
};
# read from DBTableRaw
print "-------------- Reading data ... -------------\n" if $OptDebug;
my $DBQuery = $DBHandle->prepare(sprintf("SELECT id, day, mid, peer, path, ".
"newsgroups, headers, disregard ".
"FROM %s.%s %s", $Conf{'DBDatabase'},
$Conf{'DBTableRaw'}, $SQLWhereClause));
$DBQuery->execute()
or &Bleat(2,sprintf("Can't get data for %s from %s.%s: ".
"$DBI::errstr\n",$Period,
$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
# set output and database connection to UTF-8
# as we're going to write decoded header contents containing UTF-8 chars
binmode(STDOUT, ":utf8");
$DBHandle->do("SET NAMES 'utf8'");
# create a list of supported encondings
my %LegalEncodings;
foreach (Encode->encodings()) {
$LegalEncodings{$_} = 1;
}
# parse data in a loop and write it out
print "-------------- Parsing data ... -------------\n" if $OptDebug;
while (my $HeadersR = $DBQuery->fetchrow_hashref) {
my %Headers = %{$HeadersR};
# parse $Headers{'headers'} ('headers' from DBTableRaw)
# remove empty lines (that should not even exist in a header!)
$Headers{'headers'} =~ s/\n\s*\n/\n/g;
# merge continuation lines
# from Perl Cookbook, 1st German ed. 1999, pg. 91
$Headers{'headers'} =~ s/\n\s+/ /g;
# split headers in single lines
my $OtherHeaders;
for (split(/\n/,$Headers{'headers'})) {
# split header lines in header name and header content
my ($key,$value);
if ($_ =~ /:/) {
($key,$value) = split(/:/,$_,2);
$key =~ s/\s*//;
$value =~ s/^\s*(.+)\s*$/$1/;
} else {
&Bleat(1,sprintf("Illegal header line in %s.%s id %s: %s",
$Conf{'DBDatabase'}, $Conf{'DBTableRaw'},
$Headers{'id'},$_));
next;
}
# check for empty (mandatory) fields from DBTableRaw
# and set them from $Headers{'headers', if necessary
if (lc($key) =~ /^(message-id|path|newsgroups)$/) {
my $HeaderName = lc($key);
$HeaderName = 'mid' if ($HeaderName eq 'message-id');
if (!defined($Headers{$HeaderName}) or $Headers{$HeaderName} eq '') {
$Headers{$HeaderName} = $value;
&Bleat(1,sprintf("Taking missing %s from 'headers' in %s.%s id %s.",
$HeaderName, $Conf{'DBDatabase'}, $Conf{'DBTableRaw'},
$Headers{'id'}));
}
}
# save each header, separate database fields in %Headers,
# the rest in $OtherHeaders (but not Message-ID, Path, Peer
# and Newsgroups as those do already exist)
if (defined($DBFields{lc($key)})) {
$Headers{$DBFields{lc($key)}} = $value;
} else {
$OtherHeaders .= sprintf("%s: %s\n",$key,$value)
if lc($key) !~ /^(message-id|path|peer|newsgroups)$/;
}
}
# replace old (now parsed) $Headers{'headers'} with remanining $OtherHeaders
chomp($OtherHeaders);
$Headers{'headers'} = $OtherHeaders;
foreach ('from_','sender', 'replyto', 'subject') {
if ($Headers{$_}) {
my $HeaderName = $_;
$HeaderName =~ s/_$//;
# decode From: / Sender: / Reply-To: / Subject:
if ($Headers{$_} =~ /\?(B|Q)\?/) {
# check for legal encoding and decode
(my $Encoding) = $Headers{$_} =~ /\?([^?]+)\?(B|Q)\?/;
$Headers{$HeaderName.'_parsed'} = decode('MIME-Header',$Headers{$_})
if (exists($LegalEncodings{$Encoding}));
}
# forcibly modify headers with un-encoded 8bit data assuming utf-8
# TODO: try to guess correct enconding
elsif ($Headers{$_} =~ /[^\x00-\x7F]/) {
$Headers{$_} = decode('utf-8',$Headers{$_});
}
# extract name(s) and mail(s) from From: / Sender: / Reply-To:
# in parsed form, if available
if ($_ ne 'subject') {
my @Address;
# start parser on header or parsed header
# @Address will have an array of Mail::Address objects, one for
# each name/mail (you can have more than one person in From:!)
if (defined($Headers{$HeaderName.'_parsed'})) {
@Address = Mail::Address->parse($Headers{$HeaderName.'_parsed'});
} else {
@Address = Mail::Address->parse($Headers{$_});
}
# split each Mail::Address object to @Names and @Adresses
my (@Names,@Adresses);
foreach (@Address) {
# take address part in @Addresses
push (@Adresses, $_->address());
# take name part form "phrase", if there is one:
# From: My Name <addr@ess> (Comment)
# otherwise, take it from "comment":
# From: addr@ess (Comment)
# and push it in @Names
my ($Name);
$Name = $_->comment() unless $Name = $_->phrase;
$Name =~ s/^\((.+)\)$/$1/;
push (@Names, $Name);
}
# put all @Adresses and all @Names in %Headers as comma separated lists
$Headers{$HeaderName.'_address'} = join(', ',@Adresses);
$Headers{$HeaderName.'_name'} = join(', ',@Names);
}
}
}
# order output for database entry: fill @SQLBindVars
print "-------------- Next entry:\n" if $OptDebug;
my @SQLBindVars;
foreach (@DBFields) {
if (defined($Headers{$_}) and $Headers{$_} ne '') {
push (@SQLBindVars,$Headers{$_});
printf ("FOUND: %s -> %s\n",$_,$Headers{$_}) if $OptDebug;
} else {
push (@SQLBindVars,undef);
}
}
# write data to DBTableParse
if (!$OptTest) {
print "-------------- Writing data ... -------------\n" if $OptDebug;
my $DBWrite =
$DBHandle->prepare(sprintf("INSERT INTO %s.%s (%s) VALUES (%s)",
$Conf{'DBDatabase'},
$Conf{'DBTableParse'},
# get field names from @DBFields
join(', ',@DBFields),
# create a list of '?' for each DBField
join(', ',
split(/ /,'? ' x scalar(@DBFields)))
));
$DBWrite->execute(@SQLBindVars)
or &Bleat(2,sprintf("Can't write parsed data for %s to %s.%s for %s: ".
"$DBI::errstr\n",$Period,
$Conf{'DBDatabase'},$Conf{'DBTableParse'}, $Headers{'mid'}));
$DBWrite->finish;
}
};
$DBQuery->finish;
### close handles
$DBHandle->disconnect;
print "------------------- DONE! -------------------\n" if $OptDebug;
__END__
################################ Documentation #################################
=head1 NAME
parsedb - parse raw data and save it to a database
=head1 SYNOPSIS
B<parsedb> [B<-Vht>] [B<--day> I<YYYY-MM-DD> | I<YYYY-MM-DD:YYYY-MM-DD>] [B<--rawdb> I<database table>] [B<--parsedb> I<database table>] [B<--conffile> I<filename>] [B<--debug>]
=head1 REQUIREMENTS
See L<doc/README>.
=head1 DESCRIPTION
This script will parse raw, unstructured headers from a database table which is
fed from F<feedlog.pl> for a given time period and write its results to
nother database table with separate fields (columns) for most (or even all)
relevant headers.
I<Subject:>, I<From:>, I<Sender:> and I<Reply-To:> will be parsed from MIME
encoded words to UTF-8 as needed while the unparsed copy is kept. From that
parsed copy, I<From:>, I<Sender:> and I<Reply-To:> will also be split into
separate name(s) and address(es) fields while the un-splitted copy is kept,
too.
B<parsedb> should be run nightly from cron for yesterdays data so all
other scripts get current information. The time period to act on defaults to
yesterday, accordingly; you can assign another time period or a single day via
the B<--day> option (see below).
=head2 Configuration
B<parsedb> will read its configuration from F<newsstats.conf>
should be present in etc/ via Config::Auto or from a configuration file
submitted by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options.
You can override configuration options via the B<--rawdb> and
B<--parsedb> options, respectively.
=head1 OPTIONS
=over 3
=item B<-V>, B<--version>
Print out version and copyright information and exit.
=item B<-h>, B<--help>
Print this man page and exit.
=item B<--debug>
Output (rather much) debugging information to STDOUT while processing.
=item B<-t>, B<--test>
Do not write results to database. You should use B<--debug> in
conjunction with B<--test> ... everything else seems a bit pointless.
=item B<-d>, B<--day> I<YYYY-MM-DD[:YYYY-MM-DD]>
Set processing period to a single day in YYYY-MM-DD format or to a time
period between two days in YYYY-MM-DD:YYYY-MM-DD format (two days, separated
by a colon).
Defaults to yesterday.
=item B<--rawdb> I<table> (raw data table)
Override I<DBTableRaw> from F<newsstats.conf>.
=item B<--parsedb> I<table> (parsed data table)
Override I<DBTableParse> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back
=head1 INSTALLATION
See L<doc/INSTALL>.
=head1 EXAMPLES
An example crontab entry:
0 1 * * * /path/to/bin/parsedb.pl
Do a dry run for yesterday's data, showing results of processing:
parsedb --debug --test | less
=head1 FILES
=over 4
=item F<bin/parsedb.pl>
The script itself.
=item F<lib/NewsStats.pm>
Library functions for the NewsStats package.
=item F<etc/newsstats.conf>
Runtime configuration file.
=back
=head1 BUGS
Please report any bugs or feature requests to the author or use the
bug tracker at L<http://bugs.th-h.de/>!
=head1 SEE ALSO
=over 2
=item -
L<doc/README>
=item -
L<doc/INSTALL>
=back
This script is part of the B<NewsStats> package.
=head1 AUTHOR
Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2013 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=cut

View file

@ -13,8 +13,6 @@ INSTALLATION INSTRUCTIONS
1) Install the scripts 1) Install the scripts
* Get INN, mysql, Perl, and the necessary modules installed (see README).
* Download the current version of NewsStats from * Download the current version of NewsStats from
<http://th-h.de/download/scripts.php>. <http://th-h.de/download/scripts.php>.
@ -30,21 +28,21 @@ INSTALLATION INSTRUCTIONS
* Copy the sample configuration file newsstats.conf.sample to * Copy the sample configuration file newsstats.conf.sample to
newsstats.conf and modify it for your purposes: newsstats.conf and modify it for your purposes:
# cp etc/newsstats.conf.sample etc/newsstats.conf # cp newsstats.conf.sample newsstats.conf
# vim etc/newsstats.conf # vim newsstats.conf
a) Mandatory configuration options a) Mandatory configuration options
* DBDriver = mysql * DBDriver = mysql
Database driver used; currently only mysql is supported. Database driver used; currently only mysql is supported.
* DBHost = localhost * DBHost = localhost
The host your mysql server is running on. The host your mysql server is running on.
* DBUser = * DBUser =
The username to connect to the database server. The username to connect to the database server.
* DBPw = * DBPw =
Matching password for your username. Matching password for your username.
* DBDatabase = newsstats * DBDatabase = newsstats
@ -63,17 +61,17 @@ INSTALLATION INSTRUCTIONS
* TLH = de * TLH = de
Limit examination to that top-level hierarchy. Limit examination to that top-level hierarchy.
3) Database (mysql) setup 3) Database (mysql) setup
* Setup your database server with a username, password and * Setup your database server with a username, password and
database matching the NewsStats configuration (see 2 a). database matching the NewsStats configuration (see 2 a).
* Start the installation script: * Start the installation script:
# install/install.pl # install/install.pl
It will setup the necessary database tables and display some It will setup the necessary database tables and display some
information on the next steps. information on the next steps.
4) Feed (INN) setup 4) Feed (INN) setup

View file

@ -47,7 +47,6 @@ Prerequisites
- Config::Auto - Config::Auto
- Date::Format - Date::Format
- DBI - DBI
- Mail::Address
* mysql 5.0.x * mysql 5.0.x

View file

@ -4,19 +4,18 @@
# #
# This script will log headers and other data to a database # This script will log headers and other data to a database
# for further analysis by parsing a feed from INN. # for further analysis by parsing a feed from INN.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.02"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
# we're in .../bin, so our module is in ../lib push(@INC, dirname($0));
push(@INC, dirname($0).'/../lib');
} }
use strict; use strict;
use warnings; use warnings;
@ -69,15 +68,14 @@ sub PrepareDB {
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my ($OptDebug,$OptQuiet,$OptConfFile); my ($OptDebug,$OptQuiet);
GetOptions ('d|debug!' => \$OptDebug, GetOptions ('d|debug!' => \$OptDebug,
'q|test!' => \$OptQuiet, 'q|test!' => \$OptQuiet,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD, 'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1; 'V|version' => \&ShowVersion) or exit 1;
### read configuration ### read configuration
my %Conf = %{ReadConfig($OptConfFile)}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### init syslog ### init syslog
openlog($0, 'nofatal,pid', LOG_NEWS); openlog($0, 'nofatal,pid', LOG_NEWS);
@ -131,7 +129,7 @@ while (<>) {
}; };
}; };
$DBQuery->finish; $DBQuery->finish;
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n". warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
"Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n", "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
@ -153,7 +151,7 @@ feedlog - log data from an INN feed to a database
=head1 SYNOPSIS =head1 SYNOPSIS
B<feedlog> [B<-Vhdq>] [B<--conffile> I<filename>] B<feedlog> [B<-Vhdq>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -174,8 +172,7 @@ terminating would only result in a rapid respawn.
=head2 Configuration =head2 Configuration
B<feedlog> will read its configuration from F<newsstats.conf> which B<feedlog> will read its configuration from F<newsstats.conf> which
should be present in etc/ via Config::Auto or from a configuration file should be present in the same directory via Config::Auto.
submitted by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
@ -200,10 +197,6 @@ find that information most probably in your B<INN> F<errlog> file.
Suppress logging to syslog. Suppress logging to syslog.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back
=head1 INSTALLATION =head1 INSTALLATION
@ -225,15 +218,15 @@ See L<doc/INSTALL> for further information.
=over 4 =over 4
=item F<bin/feedlog.pl> =item F<feedlog.pl>
The script itself. The script itself.
=item F<lib/NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<etc/newsstats.conf> =item F<newsstats.conf>
Runtime configuration file. Runtime configuration file.
@ -266,7 +259,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

59
bin/gatherstats.pl → gatherstats.pl Normal file → Executable file
View file

@ -3,20 +3,19 @@
# gatherstats.pl # gatherstats.pl
# #
# This script will gather statistical information from a database # This script will gather statistical information from a database
# containing headers and other information from an INN feed. # containing headers and other information from a INN feed.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.02"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
# we're in .../bin, so our module is in ../lib push(@INC, dirname($0));
push(@INC, dirname($0).'/../lib');
} }
use strict; use strict;
use warnings; use warnings;
@ -38,7 +37,7 @@ my %LegalStats;
### read commandline options ### read commandline options
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
$OptHostsDB,$OptMonth,$OptParseDB,$OptStatsType,$OptTest,$OptConfFile); $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'clientsdb=s' => \$OptClientsDB, 'clientsdb=s' => \$OptClientsDB,
'd|debug!' => \$OptDebug, 'd|debug!' => \$OptDebug,
@ -46,19 +45,18 @@ GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'hierarchy=s' => \$OptTLH, 'hierarchy=s' => \$OptTLH,
'hostsdb=s' => \$OptHostsDB, 'hostsdb=s' => \$OptHostsDB,
'm|month=s' => \$OptMonth, 'm|month=s' => \$OptMonth,
'parsedb=s' => \$OptParseDB, 'rawdb=s' => \$OptRawDB,
's|stats=s' => \$OptStatsType, 's|stats=s' => \$OptStatsType,
't|test!' => \$OptTest, 't|test!' => \$OptTest,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD, 'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1; 'V|version' => \&ShowVersion) or exit 1;
### read configuration ### read configuration
my %Conf = %{ReadConfig($OptConfFile)}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### override configuration via commandline options ### override configuration via commandline options
my %ConfOverride; my %ConfOverride;
$ConfOverride{'DBTableParse'} = $OptParseDB if $OptParseDB; $ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; $ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB; $ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
@ -73,8 +71,6 @@ $OptStatsType = 'all' if !$OptStatsType;
### get time period from --month ### get time period from --month
# get verbal description of time period, drop SQL code # get verbal description of time period, drop SQL code
my ($Period) = &GetTimePeriod($OptMonth); my ($Period) = &GetTimePeriod($OptMonth);
# bail out if --month is invalid or set to 'ALL';
# we don't support the latter
&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ". &Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
"'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time'); "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
@ -124,15 +120,15 @@ foreach my $Month (&ListMonth($Period)) {
### ---------------------------------------------- ### ----------------------------------------------
### get groups data (number of postings per group) ### get groups data (number of postings per group)
# get groups data from parsed table for given month # get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ". my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
"WHERE day LIKE ? AND NOT disregard", "WHERE day LIKE ? AND NOT disregard",
$Conf{'DBDatabase'}, $Conf{'DBDatabase'},
$Conf{'DBTableParse'})); $Conf{'DBTableRaw'}));
$DBQuery->execute($Month.'-%') $DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month, "$DBI::errstr\n",$Month,
$Conf{'DBDatabase'},$Conf{'DBTableParse'})); $Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
# count postings per group # count postings per group
my %Postings; my %Postings;
@ -164,7 +160,7 @@ foreach my $Month (&ListMonth($Period)) {
} }
}; };
}; };
# delete old data for that month # delete old data for that month
if (!$OptTest) { if (!$OptTest) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
@ -206,11 +202,11 @@ __END__
=head1 NAME =head1 NAME
gatherstats - process statistical data from a parsed source gatherstats - process statistical data from a raw source
=head1 SYNOPSIS =head1 SYNOPSIS
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--parsedb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--conffile> I<filename>] B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -219,7 +215,7 @@ See L<doc/README>.
=head1 DESCRIPTION =head1 DESCRIPTION
This script will extract and process statistical information from a This script will extract and process statistical information from a
database table which is filled from F<parsedb.pl> for a given time period database table which is fed from F<feedlog.pl> for a given time period
and write its results to (an)other database table(s). Entries marked and write its results to (an)other database table(s). Entries marked
with I<'disregard'> in the database will be ignored; currently, you with I<'disregard'> in the database will be ignored; currently, you
have to set this flag yourself, using your database management tools. have to set this flag yourself, using your database management tools.
@ -261,13 +257,12 @@ override that default through the B<--groupsdb> option.
=head2 Configuration =head2 Configuration
B<gatherstats> will read its configuration from F<newsstats.conf> B<gatherstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration file which should be present in the same directory via Config::Auto.
submitted by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
You can override configuration options via the B<--hierarchy>, You can override configuration options via the B<--hierarchy>,
B<--parsedb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
respectively. respectively.
=head1 OPTIONS =head1 OPTIONS
@ -296,7 +291,7 @@ conjunction with B<--test> ... everything else seems a bit pointless.
Set processing period to a single month in YYYY-MM format or to a time Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). by a colon).
=item B<-s>, B<--stats> I<type> =item B<-s>, B<--stats> I<type>
@ -328,9 +323,9 @@ will be added with a count of 0 (and logged to STDERR).
Override I<TLH> from F<newsstats.conf>. Override I<TLH> from F<newsstats.conf>.
=item B<--parsedb> I<table> (parsed data table) =item B<--rawdb> I<table> (raw data table)
Override I<DBTableParse> from F<newsstats.conf>. Override I<DBTableRaw> from F<newsstats.conf>.
=item B<--groupsdb> I<table> (postings per group table) =item B<--groupsdb> I<table> (postings per group table)
@ -344,10 +339,6 @@ Override I<DBTableClnts> from F<newsstats.conf>.
Override I<DBTableHosts> from F<newsstats.conf>. Override I<DBTableHosts> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back
=head1 INSTALLATION =head1 INSTALLATION
@ -377,15 +368,15 @@ checking against checkgroups-*:
=over 4 =over 4
=item F<bin/gatherstats.pl> =item F<gatherstats.pl>
The script itself. The script itself.
=item F<lib/NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<etc/newsstats.conf> =item F<newsstats.conf>
Runtime configuration file. Runtime configuration file.
@ -418,7 +409,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

View file

@ -4,19 +4,18 @@
# #
# This script will get statistical data on newgroup usage # This script will get statistical data on newgroup usage
# from a database. # from a database.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.02"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
# we're in .../bin, so our module is in ../lib push(@INC, dirname($0));
push(@INC, dirname($0).'/../lib');
} }
use strict; use strict;
use warnings; use warnings;
@ -32,7 +31,7 @@ Getopt::Long::config ('bundling');
### read commandline options ### read commandline options
my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
$OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
$OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound,$OptConfFile); $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
GetOptions ('b|boundary=s' => \$OptBoundType, GetOptions ('b|boundary=s' => \$OptBoundType,
'c|captions!' => \$OptCaptions, 'c|captions!' => \$OptCaptions,
'checkgroups=s' => \$OptCheckgroupsFile, 'checkgroups=s' => \$OptCheckgroupsFile,
@ -48,7 +47,6 @@ GetOptions ('b|boundary=s' => \$OptBoundType,
'r|report=s' => \$OptReportType, 'r|report=s' => \$OptReportType,
's|sums!' => \$OptSums, 's|sums!' => \$OptSums,
'u|upper=i' => \$UppBound, 'u|upper=i' => \$UppBound,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD, 'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1; 'V|version' => \&ShowVersion) or exit 1;
# parse parameters # parse parameters
@ -78,19 +76,12 @@ if ($OptReportType) {
$OptReportType = 'default'; $OptReportType = 'default';
} }
} }
# honor $OptCheckgroupsFile, # read list of newsgroups from --checkgroups
# warn for $OptSums if set concurrently # into a hash reference
my $ValidGroups; my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
if ($OptCheckgroupsFile) {
# read list of newsgroups from --checkgroups
# into a hash reference
$ValidGroups = &ReadGroupList($OptCheckgroupsFile);
&Bleat(1,"--sums option can't possibly work with --checkgroups option set")
if $OptSums;
}
### read configuration ### read configuration
my %Conf = %{ReadConfig($OptConfFile)}; my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
### override configuration via commandline options ### override configuration via commandline options
my %ConfOverride; my %ConfOverride;
@ -133,17 +124,12 @@ if ($OptBoundType and $OptBoundType ne 'default') {
} }
### get sort order and build SQL 'ORDER BY' clause ### get sort order and build SQL 'ORDER BY' clause
# force to 'month' for $OptReportType 'average' or 'sum'
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
# default to 'newsgroup' for $OptBoundType 'level' or 'average' # default to 'newsgroup' for $OptBoundType 'level' or 'average'
$OptGroupBy = 'newsgroup' if (!$OptGroupBy and $OptGroupBy = 'newsgroup' if (!$OptGroupBy and
$OptBoundType and $OptBoundType ne 'default'); $OptBoundType and $OptBoundType ne 'default');
# default to 'newsgroup' if $OptGroupBy is not set and # force to 'month' for $OptReportType 'average' or 'sum'
# just one newsgroup is requested, but more than one month $OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
$OptGroupBy = 'newsgroup' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
and $OptNewsgroups and $OptNewsgroups !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause # parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) # $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData() # set it to 'month' or 'key' for OutputData()
@ -258,7 +244,7 @@ if ($OptCaptions && $OptComments) {
($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
} }
# output data # output data
&OutputData($OptFormat,$OptComments,$GroupBy,$Precision, &OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
$OptCheckgroupsFile ? $ValidGroups : '', $OptCheckgroupsFile ? $ValidGroups : '',
@ -277,7 +263,7 @@ groupstats - create reports on newsgroup usage
=head1 SYNOPSIS =head1 SYNOPSIS
B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>] [B<--conffile> I<filename>] B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -342,8 +328,7 @@ Captions and comments are automatically disabled in this case.
=head2 Configuration =head2 Configuration
B<groupstats> will read its configuration from F<newsstats.conf> B<groupstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration file which should be present in the same directory via Config::Auto.
submitted by the B<--conffile> option.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
@ -361,7 +346,7 @@ Print out version and copyright information and exit.
Print this man page and exit. Print this man page and exit.
=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all> =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
Set processing period to a single month in YYYY-MM format or to a time Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated period between two month in YYYY-MM:YYYY-MM format (two month, separated
@ -388,9 +373,6 @@ example:
See the B<gatherstats> man page for details. See the B<gatherstats> man page for details.
This option does not work together with the B<--checkgroups> option as
all "virtual" groups will not be present in the checkgroups file.
=item B<--checkgroups> I<filename> =item B<--checkgroups> I<filename>
Restrict output to those newgroups present in a file in checkgroups format Restrict output to those newgroups present in a file in checkgroups format
@ -400,9 +382,6 @@ line is ignored). All other newsgroups will be removed from output.
Contrary to B<gatherstats>, I<filename> is not a template, but refers to Contrary to B<gatherstats>, I<filename> is not a template, but refers to
a single file in checkgroups format. a single file in checkgroups format.
The B<--sums> option will not work together with this option as "virtual"
groups will not be present in the checkgroups file.
=item B<-r>, B<--report> I<default|average|sums> =item B<-r>, B<--report> I<default|average|sums>
Choose the report type: I<default>, I<average> or I<sums> Choose the report type: I<default>, I<average> or I<sums>
@ -613,10 +592,6 @@ B<--nocomments> is enforced, see above.
Override I<DBTableGrps> from F<newsstats.conf>. Override I<DBTableGrps> from F<newsstats.conf>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back
=head1 INSTALLATION =head1 INSTALLATION
@ -660,15 +635,15 @@ machine-readable form (without formatting):
=over 4 =over 4
=item F<bin/groupstats.pl> =item F<groupstats.pl>
The script itself. The script itself.
=item F<lib/NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<etc/newsstats.conf> =item F<newsstats.conf>
Runtime configuration file. Runtime configuration file.
@ -705,7 +680,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

View file

@ -3,25 +3,27 @@
# install.pl # install.pl
# #
# This script will create database tables as necessary. # This script will create database tables as necessary.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.02"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
# we're in .../install, so our module is in ../lib # we're in .../install, so our module is in ..
push(@INC, dirname($0).'/../lib'); push(@INC, dirname($0).'/..');
} }
use strict; use strict;
use warnings; use warnings;
use NewsStats qw(:DEFAULT); use NewsStats qw(:DEFAULT);
use Cwd;
use DBI; use DBI;
use Getopt::Long qw(GetOptions); use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling'); Getopt::Long::config ('bundling');
@ -29,15 +31,18 @@ Getopt::Long::config ('bundling');
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my ($OptUpdate,$OptConfFile); my ($OptUpdate);
GetOptions ('u|update=s' => \$OptUpdate, GetOptions ('u|update=s' => \$OptUpdate,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD, 'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1; 'V|version' => \&ShowVersion) or exit 1;
### change working directory to .. (as we're in .../install)
chdir dirname($FullPath).'/..';
my $Path = cwd();
### read configuration ### read configuration
print("Reading configuration.\n"); print("Reading configuration.\n");
my %Conf = %{ReadConfig($OptConfFile)}; my %Conf = %{ReadConfig($Path.'/newsstats.conf')};
##### -------------------------------------------------------------------------- ##### --------------------------------------------------------------------------
##### Database table definitions ##### Database table definitions
@ -47,10 +52,10 @@ my $DBCreate = <<SQLDB;
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8; CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
SQLDB SQLDB
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableParse' => <<PARSE, 'DBTableGrps' => <<GRPS); my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);
-- --
-- Table structure for table DBTableRaw -- Table structure for table DBTableRaw
-- --
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` ( CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (
`id` bigint(20) unsigned NOT NULL auto_increment, `id` bigint(20) unsigned NOT NULL auto_increment,
@ -71,59 +76,9 @@ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (
KEY `peer` (`peer`) KEY `peer` (`peer`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data'; ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data';
RAW RAW
-- --
-- Table structure for table DBTableParse
--
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableParse'}` (
`id` bigint(20) unsigned NOT NULL auto_increment,
`day` date NOT NULL,
`mid` varchar(250) character set ascii NOT NULL,
`refs` varchar(1000) character set ascii,
`date` varchar(100) NOT NULL,
`path` varchar(1000) NOT NULL,
`newsgroups` varchar(1000) NOT NULL,
`fupto` varchar(200),
`from_` varchar(500),
`from_parsed` varchar(200),
`from_name` varchar(200),
`from_address` varchar(200),
`sender` varchar(500),
`sender_parsed` varchar(200),
`sender_name` varchar(200),
`sender_address` varchar(200),
`replyto` varchar(500),
`replyto_parsed` varchar(200),
`replyto_name` varchar(200),
`replyto_address` varchar(200),
`subject` varchar(1000) NOT NULL,
`subject_parsed` varchar(1000),
`organization` varchar(1000),
`linecount` int(4) unsigned,
`approved` varchar(250),
`supersedes` varchar(250),
`expires` varchar(100),
`useragent` varchar(500),
`xnewsreader` varchar(500),
`xmailer` varchar(500),
`xnoarchive` varchar(100),
`contenttype` varchar(500),
`contentencoding` varchar(500),
`cancellock` varchar(500),
`injectioninfo` varchar(500),
`xtrace` varchar(500),
`postinghost` varchar(1000),
`headers` longtext,
`disregard` tinyint(1) default '0',
PRIMARY KEY (`id`),
KEY `day` (`day`),
KEY `mid` (`mid`),
KEY `newsgroups` (`newsgroups`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Parsed data';
PARSE
--
-- Table structure for table DBTableGrps -- Table structure for table DBTableGrps
-- --
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` ( CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (
`id` bigint(20) unsigned NOT NULL auto_increment, `id` bigint(20) unsigned NOT NULL auto_increment,
@ -153,7 +108,7 @@ Things left to do:
## gather statistics for NewsStats ## gather statistics for NewsStats
newsstats!\\ newsstats!\\
:!*,de.*\\ :!*,de.*\\
:Tc,WmtfbsPNH,Ac:$HomePath/bin/feedlog.pl :Tc,WmtfbsPNH,Ac:$Path/feedlog.pl
Please Please
@ -212,7 +167,7 @@ if (!$OptUpdate) {
my $DBQuery = $DBHandle->prepare($DBCreate); my $DBQuery = $DBHandle->prepare($DBCreate);
$DBQuery->execute() or &Bleat(2, sprintf("Can't create database %s: %s%\n", $DBQuery->execute() or &Bleat(2, sprintf("Can't create database %s: %s%\n",
$Conf{'DBDatabase'}, $DBI::errstr)); $Conf{'DBDatabase'}, $DBI::errstr));
printf("Database table %s created succesfully.\n",$Conf{'DBDatabase'}); printf("Database table %s created succesfully.\n",$Conf{'DBDatabase'});
$DBHandle->disconnect; $DBHandle->disconnect;
}; };
@ -230,7 +185,7 @@ if (!$OptUpdate) {
&CreateTable($Table); &CreateTable($Table);
}; };
print "Database table generation done.\n"; print "Database table generation done.\n";
# Display install instructions # Display install instructions
print $Install; print $Install;
} else { } else {
@ -300,7 +255,7 @@ install - installation script
=head1 SYNOPSIS =head1 SYNOPSIS
B<install> [B<-Vh> [--update I<version>] [B<--conffile> I<filename>] B<install> [B<-Vh> [--update I<version>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
@ -312,9 +267,8 @@ This script will create database tables as necessary and configured.
=head2 Configuration =head2 Configuration
B<install> will read its configuration from F<newsstats.conf> which should B<install> will read its configuration from F<newsstats.conf> via
be present in etc/ via Config::Auto or from a configuration file submitted Config::Auto.
by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options. See L<doc/INSTALL> for an overview of possible configuration options.
@ -334,25 +288,21 @@ Print this man page and exit.
Don't do a fresh install, but update from I<version>. Don't do a fresh install, but update from I<version>.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
=back =back
=head1 FILES =head1 FILES
=over 4 =over 4
=item F<install/install.pl> =item F<install.pl>
The script itself. The script itself.
=item F<lib/NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<etc/newsstats.conf> =item F<newsstats.conf>
Runtime configuration file. Runtime configuration file.
@ -385,7 +335,7 @@ Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.

View file

@ -4,14 +4,13 @@
# #
DBDriver = mysql DBDriver = mysql
DBHost = localhost DBHost = localhost
DBUser = DBUser =
DBPw = DBPw =
DBDatabase = newsstats DBDatabase = newsstats
# #
# tables # tables
# #
DBTableRaw = raw_de DBTableRaw = raw_de
DBTableParse = parsed_de
DBTableGrps = groups_de DBTableGrps = groups_de
#DBTableClnts = #DBTableClnts =
#DBTableHosts = #DBTableHosts =