2013-09-02 11:15:12 +02:00
|
|
|
#! /usr/bin/perl
|
2010-09-18 18:45:20 +02:00
|
|
|
#
|
|
|
|
# feedlog.pl
|
|
|
|
#
|
|
|
|
# This script will log headers and other data to a database
|
|
|
|
# for further analysis by parsing a feed from INN.
|
2013-09-03 09:41:36 +02:00
|
|
|
#
|
2010-09-18 18:45:20 +02:00
|
|
|
# It is part of the NewsStats package.
|
|
|
|
#
|
2013-09-02 13:14:33 +02:00
|
|
|
# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
|
2010-09-18 18:45:20 +02:00
|
|
|
#
|
2013-09-03 09:41:36 +02:00
|
|
|
# It can be redistributed and/or modified under the same terms under
|
2010-09-18 18:45:20 +02:00
|
|
|
# which Perl itself is published.
|
|
|
|
|
|
|
|
BEGIN {
|
|
|
|
our $VERSION = "0.01";
|
|
|
|
use File::Basename;
|
2013-09-03 09:21:55 +02:00
|
|
|
# we're in .../bin, so our module is in ../lib
|
|
|
|
push(@INC, dirname($0).'/../lib');
|
2010-09-18 18:45:20 +02:00
|
|
|
}
|
|
|
|
use strict;
|
2013-09-02 11:15:12 +02:00
|
|
|
use warnings;
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
use NewsStats;
|
|
|
|
|
|
|
|
use Sys::Syslog qw(:standard :macros);
|
|
|
|
|
|
|
|
use Date::Format;
|
|
|
|
use DBI;
|
2012-05-27 14:30:14 +02:00
|
|
|
use Getopt::Long qw(GetOptions);
|
|
|
|
Getopt::Long::config ('bundling');
|
2010-09-18 18:45:20 +02:00
|
|
|
|
2012-10-13 01:27:54 +02:00
|
|
|
################################# Subroutines ##################################
|
|
|
|
|
|
|
|
sub PrepareDB {
|
|
|
|
### initialise database connection, prepare statement
|
|
|
|
### and catch errors
|
|
|
|
### IN : \%Conf : reference to configuration hash
|
|
|
|
### OUT: $DBHandle: database handle
|
|
|
|
### $DBQuery : prepared statement
|
|
|
|
our ($DBHandle, $DBQuery, $OptQuiet);
|
|
|
|
my ($ConfigR) = @_;
|
|
|
|
my %Conf = %$ConfigR;
|
|
|
|
# drop current database connection - hard, if necessary
|
|
|
|
if ($DBHandle) {
|
|
|
|
$DBHandle->disconnect;
|
|
|
|
undef $DBHandle;
|
|
|
|
};
|
|
|
|
# connect to database; try again every 5 seconds
|
|
|
|
while (!$DBHandle) {
|
|
|
|
$DBHandle = InitDB($ConfigR,0);
|
|
|
|
if (!$DBHandle) {
|
|
|
|
syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
|
|
|
|
sleep(5);
|
|
|
|
} else {;
|
|
|
|
syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
$DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
|
|
|
|
timestamp,token,size,peer,path,
|
|
|
|
newsgroups,headers)
|
|
|
|
VALUES (?,?,?,?,?,?,?,?,?,?)",
|
|
|
|
$Conf{'DBDatabase'},
|
|
|
|
$Conf{'DBTableRaw'}));
|
|
|
|
return ($DBHandle,$DBQuery);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-09-18 18:45:20 +02:00
|
|
|
################################# Main program #################################
|
|
|
|
|
|
|
|
### read commandline options
|
2013-09-03 09:58:17 +02:00
|
|
|
my ($OptDebug,$OptQuiet,$OptConfFile);
|
2012-05-27 14:30:14 +02:00
|
|
|
GetOptions ('d|debug!' => \$OptDebug,
|
|
|
|
'q|test!' => \$OptQuiet,
|
2013-09-03 09:58:17 +02:00
|
|
|
'conffile=s' => \$OptConfFile,
|
2012-05-27 14:30:14 +02:00
|
|
|
'h|help' => \&ShowPOD,
|
|
|
|
'V|version' => \&ShowVersion) or exit 1;
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
### read configuration
|
2013-09-03 09:58:17 +02:00
|
|
|
my %Conf = %{ReadConfig($OptConfFile)};
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
### init syslog
|
2012-05-27 14:30:14 +02:00
|
|
|
openlog($0, 'nofatal,pid', LOG_NEWS);
|
|
|
|
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
### init database
|
2012-10-13 01:27:54 +02:00
|
|
|
my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
### main loop
|
|
|
|
while (<>) {
|
|
|
|
chomp;
|
|
|
|
# catch empty lines trailing or leading
|
|
|
|
if ($_ eq '') {
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
# first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
|
|
|
|
my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
|
|
|
|
# remaining lines contain headers
|
|
|
|
my $Headers = "";
|
|
|
|
while (<>) {
|
|
|
|
chomp;
|
|
|
|
# empty line terminates this article
|
|
|
|
if ($_ eq '') {
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
# collect headers
|
|
|
|
$Headers .= $_."\n" ;
|
|
|
|
}
|
|
|
|
|
|
|
|
# parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
|
|
|
|
my $Day = time2str("%Y-%m-%d", $Timestamp);
|
|
|
|
my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
|
|
|
|
|
|
|
|
# write to database
|
2012-05-27 14:30:14 +02:00
|
|
|
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
|
|
|
|
$Path, $Newsgroups, $Headers)) {
|
2012-10-13 01:27:54 +02:00
|
|
|
syslog(LOG_ERR, 'Database error %s while processing %s: %s',
|
|
|
|
$DBI::err, $Mid, $DBI::errstr);
|
|
|
|
# if "MySQL server has gone away", try to recover
|
|
|
|
if ($DBI::err == 2006) {
|
|
|
|
# try to reconnect to database
|
|
|
|
($DBHandle,$DBQuery) = PrepareDB(\%Conf);
|
|
|
|
# try to repeat the write attempt as before
|
|
|
|
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
|
|
|
|
$Path, $Newsgroups, $Headers)) {
|
|
|
|
syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
|
|
|
|
};
|
|
|
|
# otherwise log missing posting
|
|
|
|
} else {
|
|
|
|
syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
|
|
|
|
};
|
2010-09-18 18:45:20 +02:00
|
|
|
};
|
|
|
|
$DBQuery->finish;
|
2013-09-03 09:58:17 +02:00
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
|
|
|
|
"Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
|
|
|
|
$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
|
|
|
|
$Newsgroups, $Headers) if $OptDebug;
|
2010-09-18 18:45:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
### close handles
|
|
|
|
$DBHandle->disconnect;
|
2012-05-27 14:30:14 +02:00
|
|
|
syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
|
2010-09-18 18:45:20 +02:00
|
|
|
closelog();
|
|
|
|
|
|
|
|
__END__
|
|
|
|
|
|
|
|
################################ Documentation #################################
|
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
feedlog - log data from an INN feed to a database
|
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
2013-09-03 09:58:17 +02:00
|
|
|
B<feedlog> [B<-Vhdq>] [--conffile I<filename>]
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=head1 REQUIREMENTS
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
See L<doc/README>.
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
This script will log overview data and complete headers to a database
|
|
|
|
table for further examination by parsing a feed from INN. It will
|
|
|
|
parse that information and write it to a mysql database table in real
|
|
|
|
time.
|
|
|
|
|
|
|
|
All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
|
|
|
|
fails to initiate a database connection at startup, it will log to
|
|
|
|
I<syslog> with I<CRIT> priority and go in an endless loop, as
|
|
|
|
terminating would only result in a rapid respawn.
|
|
|
|
|
|
|
|
=head2 Configuration
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
B<feedlog> will read its configuration from F<newsstats.conf> which
|
2010-09-18 18:45:20 +02:00
|
|
|
should be present in the same directory via Config::Auto.
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
See L<doc/INSTALL> for an overview of possible configuration options.
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=head1 OPTIONS
|
|
|
|
|
|
|
|
=over 3
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
=item B<-V>, B<--version>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
Print out version and copyright information and exit.
|
2010-09-18 18:45:20 +02:00
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
=item B<-h>, B<--help>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
Print this man page and exit.
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
=item B<-d>, B<--debug>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
Output debugging information to STDERR while parsing STDIN. You'll
|
|
|
|
find that information most probably in your B<INN> F<errlog> file.
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
=item B<-q>, B<--quiet>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
Suppress logging to syslog.
|
|
|
|
|
2013-09-03 09:58:17 +02:00
|
|
|
=item B<--conffile> I<filename>
|
|
|
|
|
|
|
|
Load configuration from I<filename> instead of F<newsstats.conf>.
|
|
|
|
|
2010-09-18 18:45:20 +02:00
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 INSTALLATION
|
|
|
|
|
2013-08-11 22:44:08 +02:00
|
|
|
See L<doc/INSTALL>.
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=head1 EXAMPLES
|
|
|
|
|
|
|
|
Set up a feed like that in your B<INN> F<newsfeeds> file:
|
|
|
|
|
|
|
|
## gather statistics for NewsStats
|
|
|
|
newsstats!
|
|
|
|
:!*,de.*
|
|
|
|
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
See L<doc/INSTALL> for further information.
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=head1 FILES
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
2013-09-03 09:21:55 +02:00
|
|
|
=item F<bin/feedlog.pl>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
The script itself.
|
|
|
|
|
2013-09-03 09:21:55 +02:00
|
|
|
=item F<lib/NewsStats.pm>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
Library functions for the NewsStats package.
|
|
|
|
|
2013-09-03 09:21:55 +02:00
|
|
|
=item F<etc/newsstats.conf>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
Runtime configuration file.
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 BUGS
|
|
|
|
|
|
|
|
Please report any bugs or feature requests to the author or use the
|
|
|
|
bug tracker at L<http://bugs.th-h.de/>!
|
|
|
|
|
|
|
|
=head1 SEE ALSO
|
|
|
|
|
|
|
|
=over 2
|
|
|
|
|
|
|
|
=item -
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
L<doc/README>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=item -
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
L<doc/INSTALL>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
This script is part of the B<NewsStats> package.
|
|
|
|
|
|
|
|
=head1 AUTHOR
|
|
|
|
|
|
|
|
Thomas Hochstein <thh@inter.net>
|
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
|
2012-05-27 14:30:14 +02:00
|
|
|
Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
|
2010-09-18 18:45:20 +02:00
|
|
|
|
|
|
|
This program is free software; you may redistribute it and/or modify it
|
|
|
|
under the same terms as Perl itself.
|
|
|
|
|
|
|
|
=cut
|