#! /usr/bin/perl
# This script will log headers and other data to a database
# for further analysis by parsing a feed from INN.
# It is part of the NewsStats package.
# Copyright (c) 2010-2013 Thomas Hochstein <>
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.
our $VERSION = "0.02";
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
use strict;
use warnings;
use NewsStats;
use Sys::Syslog qw(:standard :macros);
use Date::Format;
use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
################################# Subroutines ##################################
sub PrepareDB {
### initialise database connection, prepare statement
### and catch errors
### IN : \%Conf : reference to configuration hash
### OUT: $DBHandle: database handle
### $DBQuery : prepared statement
our ($DBHandle, $DBQuery, $OptQuiet);
my ($ConfigR) = @_;
my %Conf = %$ConfigR;
# drop current database connection - hard, if necessary
if ($DBHandle) {
undef $DBHandle;
# connect to database; try again every 5 seconds
while (!$DBHandle) {
$DBHandle = InitDB($ConfigR,0);
if (!$DBHandle) {
syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
} else {;
syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet;
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
VALUES (?,?,?,?,?,?,?,?,?,?)",
return ($DBHandle,$DBQuery);
################################# Main program #################################
### read commandline options
my ($OptDebug,$OptQuiet,$OptConfFile);
GetOptions ('d|debug!' => \$OptDebug,
'q|test!' => \$OptQuiet,
'conffile=s' => \$OptConfFile,
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
### read configuration
my %Conf = %{ReadConfig($OptConfFile)};
### init syslog
openlog($0, 'nofatal,pid', LOG_NEWS);
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
### init database
my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
### main loop
while (<>) {
# catch empty lines trailing or leading
if ($_ eq '') {
# first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
# remaining lines contain headers
my $Headers = "";
while (<>) {
# empty line terminates this article
if ($_ eq '') {
# collect headers
$Headers .= $_."\n" ;
# parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
my $Day = time2str("%Y-%m-%d", $Timestamp);
my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
# write to database
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
$Path, $Newsgroups, $Headers)) {
syslog(LOG_ERR, 'Database error %s while processing %s: %s',
$DBI::err, $Mid, $DBI::errstr);
# if "MySQL server has gone away", try to recover
if ($DBI::err == 2006) {
# try to reconnect to database
($DBHandle,$DBQuery) = PrepareDB(\%Conf);
# try to repeat the write attempt as before
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
$Path, $Newsgroups, $Headers)) {
syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
# otherwise log missing posting
} else {
syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
"Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
$Newsgroups, $Headers) if $OptDebug;
### close handles
syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
################################ Documentation #################################
=head1 NAME
feedlog - log data from an INN feed to a database
B<feedlog> [B<-Vhdq>] [B<--conffile> I<filename>]
See L<doc/README>.
This script will log overview data and complete headers to a database
table for further examination by parsing a feed from INN. It will
parse that information and write it to a mysql database table in real
All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
fails to initiate a database connection at startup, it will log to
I<syslog> with I<CRIT> priority and go in an endless loop, as
terminating would only result in a rapid respawn.
=head2 Configuration
B<feedlog> will read its configuration from F<newsstats.conf> which
should be present in etc/ via Config::Auto or from a configuration file
submitted by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options.
=head1 OPTIONS
=over 3
=item B<-V>, B<--version>
Print out version and copyright information and exit.
=item B<-h>, B<--help>
Print this man page and exit.
=item B<-d>, B<--debug>
Output debugging information to STDERR while parsing STDIN. You'll
find that information most probably in your B<INN> F<errlog> file.
=item B<-q>, B<--quiet>
Suppress logging to syslog.
=item B<--conffile> I<filename>
Load configuration from I<filename> instead of F<newsstats.conf>.
See L<doc/INSTALL>.
Set up a feed like that in your B<INN> F<newsfeeds> file:
## gather statistics for NewsStats
See L<doc/INSTALL> for further information.
=head1 FILES
=over 4
=item F<bin/>
The script itself.
=item F<lib/>
Library functions for the NewsStats package.
=item F<etc/newsstats.conf>
Runtime configuration file.
=head1 BUGS
Please report any bugs or feature requests to the author or use the
bug tracker at L<>!
=head1 SEE ALSO
=over 2
=item -
=item -
This script is part of the B<NewsStats> package.
=head1 AUTHOR
Thomas Hochstein <>
Copyright (c) 2010-2013 Thomas Hochstein <>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.