Introduce end-of-line normalization and add .gitattributes.

Signed-off-by: Thomas Hochstein <thh@inter.net>
This commit is contained in:
Thomas Hochstein 2010-09-18 18:45:20 +02:00
parent 3430c89868
commit 2832c235b2
5 changed files with 1272 additions and 1271 deletions

1
.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
* text=auto

View file

@ -1,227 +1,227 @@
#! /usr/bin/perl -W #! /usr/bin/perl -W
# #
# feedlog.pl # feedlog.pl
# #
# This script will log headers and other data to a database # This script will log headers and other data to a database
# for further analysis by parsing a feed from INN. # for further analysis by parsing a feed from INN.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.01"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
push(@INC, dirname($0)); push(@INC, dirname($0));
} }
use strict; use strict;
use NewsStats; use NewsStats;
use Sys::Syslog qw(:standard :macros); use Sys::Syslog qw(:standard :macros);
use Date::Format; use Date::Format;
use DBI; use DBI;
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('qd'); my %Options = &ReadOptions('qd');
### read configuration ### read configuration
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig('newsstats.conf')};
### init syslog ### init syslog
openlog($MySelf, 'nofatal,pid', LOG_NEWS); openlog($MySelf, 'nofatal,pid', LOG_NEWS);
syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'}; syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'};
### init database ### init database
my $DBHandle = InitDB(\%Conf,0); my $DBHandle = InitDB(\%Conf,0);
if (!$DBHandle) { if (!$DBHandle) {
syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
while (1) {}; # go into endless loop to suppress further errors and respawning while (1) {}; # go into endless loop to suppress further errors and respawning
}; };
my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
### main loop ### main loop
while (<>) { while (<>) {
chomp; chomp;
# catch empty lines trailing or leading # catch empty lines trailing or leading
if ($_ eq '') { if ($_ eq '') {
next; next;
} }
# first line contains: mid, timestamp, token, size, peer, Path, Newsgroups # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
# remaining lines contain headers # remaining lines contain headers
my $Headers = ""; my $Headers = "";
while (<>) { while (<>) {
chomp; chomp;
# empty line terminates this article # empty line terminates this article
if ($_ eq '') { if ($_ eq '') {
last; last;
} }
# collect headers # collect headers
$Headers .= $_."\n" ; $Headers .= $_."\n" ;
} }
# parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
my $Day = time2str("%Y-%m-%d", $Timestamp); my $Day = time2str("%Y-%m-%d", $Timestamp);
my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
# write to database # write to database
if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) {
syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); syslog(LOG_ERR, 'Database error: %s', $DBI::errstr);
}; };
$DBQuery->finish; $DBQuery->finish;
warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'}; warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'};
} }
### close handles ### close handles
$DBHandle->disconnect; $DBHandle->disconnect;
syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'}; syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'};
closelog(); closelog();
__END__ __END__
################################ Documentation ################################# ################################ Documentation #################################
=head1 NAME =head1 NAME
feedlog - log data from an INN feed to a database feedlog - log data from an INN feed to a database
=head1 SYNOPSIS =head1 SYNOPSIS
B<feedlog> [B<-Vhdq>] B<feedlog> [B<-Vhdq>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See doc/README: Perl 5.8.x itself and the following modules from CPAN:
=over 2 =over 2
=item - =item -
Config::Auto Config::Auto
=item - =item -
Date::Format Date::Format
=item - =item -
DBI DBI
=back =back
=head1 DESCRIPTION =head1 DESCRIPTION
This script will log overview data and complete headers to a database This script will log overview data and complete headers to a database
table for further examination by parsing a feed from INN. It will table for further examination by parsing a feed from INN. It will
parse that information and write it to a mysql database table in real parse that information and write it to a mysql database table in real
time. time.
All reporting is done to I<syslog> via I<news> facility. If B<feedlog> All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
fails to initiate a database connection at startup, it will log to fails to initiate a database connection at startup, it will log to
I<syslog> with I<CRIT> priority and go in an endless loop, as I<syslog> with I<CRIT> priority and go in an endless loop, as
terminating would only result in a rapid respawn. terminating would only result in a rapid respawn.
=head2 Configuration =head2 Configuration
F<feedlog.pl> will read its configuration from F<newsstats.conf> which F<feedlog.pl> will read its configuration from F<newsstats.conf> which
should be present in the same directory via Config::Auto. should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V> (version)
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information on B<yapfaq> and exit.
=item B<-h> (help) =item B<-h> (help)
Print this man page and exit. Print this man page and exit.
=item B<-d> (debug) =item B<-d> (debug)
Output debugging information to STDERR while parsing STDIN. You'll Output debugging information to STDERR while parsing STDIN. You'll
find that information most probably in your B<INN> F<errlog> file. find that information most probably in your B<INN> F<errlog> file.
=item B<-q> (quiet) =item B<-q> (quiet)
Suppress logging to syslog. Suppress logging to syslog.
=back =back
=head1 INSTALLATION =head1 INSTALLATION
See doc/INSTALL. See doc/INSTALL.
=head1 EXAMPLES =head1 EXAMPLES
Set up a feed like that in your B<INN> F<newsfeeds> file: Set up a feed like that in your B<INN> F<newsfeeds> file:
## gather statistics for NewsStats ## gather statistics for NewsStats
newsstats! newsstats!
:!*,de.* :!*,de.*
:Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
See doc/INSTALL for further information. See doc/INSTALL for further information.
=head1 FILES =head1 FILES
=over 4 =over 4
=item F<feedlog.pl> =item F<feedlog.pl>
The script itself. The script itself.
=item F<NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file for B<yapfaq>.
=back =back
=head1 BUGS =head1 BUGS
Please report any bugs or feature requests to the author or use the Please report any bugs or feature requests to the author or use the
bug tracker at L<http://bugs.th-h.de/>! bug tracker at L<http://bugs.th-h.de/>!
=head1 SEE ALSO =head1 SEE ALSO
=over 2 =over 2
=item - =item -
doc/README doc/README
=item - =item -
doc/INSTALL doc/INSTALL
=back =back
This script is part of the B<NewsStats> package. This script is part of the B<NewsStats> package.
=head1 AUTHOR =head1 AUTHOR
Thomas Hochstein <thh@inter.net> Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.
=cut =cut

View file

@ -1,312 +1,312 @@
#! /usr/bin/perl -W #! /usr/bin/perl -W
# #
# gatherstats.pl # gatherstats.pl
# #
# This script will gather statistical information from a database # This script will gather statistical information from a database
# containing headers and other information from a INN feed. # containing headers and other information from a INN feed.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.01"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
push(@INC, dirname($0)); push(@INC, dirname($0));
} }
use strict; use strict;
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);
use DBI; use DBI;
################################# Definitions ################################## ################################# Definitions ##################################
# define types of information that can be gathered # define types of information that can be gathered
# all / groups (/ clients / hosts) # all / groups (/ clients / hosts)
my %LegalTypes; my %LegalTypes;
@LegalTypes{('all','groups')} = (); @LegalTypes{('all','groups')} = ();
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');
### read configuration ### read configuration
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig('newsstats.conf')};
### override configuration via commandline options ### override configuration via commandline options
my %ConfOverride; my %ConfOverride;
$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; $ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; $ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};
$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; $ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};
$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; $ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};
&OverrideConfig(\%Conf,\%ConfOverride); &OverrideConfig(\%Conf,\%ConfOverride);
### get type of information to gather, defaulting to 'all' ### get type of information to gather, defaulting to 'all'
$Options{'t'} = 'all' if !$Options{'t'}; $Options{'t'} = 'all' if !$Options{'t'};
die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});
### get time period (-m or -p) ### get time period (-m or -p)
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
### init database ### init database
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
### get data for each month ### get data for each month
warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};
foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
print "---------- $Month ----------\n" if $Options{'d'}; print "---------- $Month ----------\n" if $Options{'d'};
if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {
### ---------------------------------------------- ### ----------------------------------------------
### get groups data (number of postings per group) ### get groups data (number of postings per group)
# get groups data from raw table for given month # get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
$DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
# count postings per group # count postings per group
my %Postings; my %Postings;
while (($_) = $DBQuery->fetchrow_array) { while (($_) = $DBQuery->fetchrow_array) {
# get list oft newsgroups and hierarchies from Newsgroups: # get list oft newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_); my %Newsgroups = ListNewsgroups($_);
# count each newsgroup and hierarchy once # count each newsgroup and hierarchy once
foreach (sort keys %Newsgroups) { foreach (sort keys %Newsgroups) {
# don't count newsgroup/hierarchy in wrong TLH # don't count newsgroup/hierarchy in wrong TLH
next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);
$Postings{$_}++; $Postings{$_}++;
}; };
}; };
print "----- GroupStats -----\n" if $Options{'d'}; print "----- GroupStats -----\n" if $Options{'d'};
foreach my $Newsgroup (sort keys %Postings) { foreach my $Newsgroup (sort keys %Postings) {
print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};
if (!$Options{'o'}) { if (!$Options{'o'}) {
# write to database # write to database
$DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
$DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
$DBQuery->finish; $DBQuery->finish;
}; };
}; };
} else { } else {
# other types of information go here - later on # other types of information go here - later on
}; };
}; };
### close handles ### close handles
$DBHandle->disconnect; $DBHandle->disconnect;
__END__ __END__
################################ Documentation ################################# ################################ Documentation #################################
=head1 NAME =head1 NAME
gatherstats - process statistical data from a raw source gatherstats - process statistical data from a raw source
=head1 SYNOPSIS =head1 SYNOPSIS
B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>] B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See doc/README: Perl 5.8.x itself and the following modules from CPAN:
=over 2 =over 2
=item - =item -
Config::Auto Config::Auto
=item - =item -
DBI DBI
=back =back
=head1 DESCRIPTION =head1 DESCRIPTION
This script will extract and process statistical information from a This script will extract and process statistical information from a
database table which is fed from F<feedlog.pl> for a given time period database table which is fed from F<feedlog.pl> for a given time period
and write its results to (an)other database table(s). and write its results to (an)other database table(s).
The time period to act on defaults to last month; you can assign The time period to act on defaults to last month; you can assign
another month via the B<-m> switch or a time period via the B<-p> another month via the B<-m> switch or a time period via the B<-p>
switch; the latter takes preference. switch; the latter takes preference.
By default B<gatherstats> will process all types of information; you By default B<gatherstats> will process all types of information; you
can change that using the B<-t> switch and assigning the type of can change that using the B<-t> switch and assigning the type of
information to process. Currently only processing of the number of information to process. Currently only processing of the number of
postings per group per month is implemented anyway, so that doesn't postings per group per month is implemented anyway, so that doesn't
matter yet. matter yet.
Possible information types include: Possible information types include:
=over 3 =over 3
=item B<groups> (postings per group per month) =item B<groups> (postings per group per month)
B<gatherstats> will examine Newsgroups: headers. Crosspostings will be B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
counted for each single group they appear in. Groups not in I<TLH> counted for each single group they appear in. Groups not in I<TLH>
will be ignored. will be ignored.
B<gatherstats> will also add up the number of postings for each B<gatherstats> will also add up the number of postings for each
hierarchy level, but only count each posting once. A posting to hierarchy level, but only count each posting once. A posting to
de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
respectively. A crossposting to de.alt.test and de.alt.admin, on the respectively. A crossposting to de.alt.test and de.alt.admin, on the
other hand, will be counted for de.alt.test and de.alt.admin each, but other hand, will be counted for de.alt.test and de.alt.admin each, but
only once for de.alt.ALL and de.ALL. only once for de.alt.ALL and de.ALL.
Data is written to I<DBTableGrps> (see doc/INSTALL). Data is written to I<DBTableGrps> (see doc/INSTALL).
=back =back
=head2 Configuration =head2 Configuration
F<gatherstats.pl> will read its configuration from F<newsstats.conf> F<gatherstats.pl> will read its configuration from F<newsstats.conf>
which should be present in the same directory via Config::Auto. which should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
You can override configuration options via the B<-n>, B<-r>, B<-g>, You can override configuration options via the B<-n>, B<-r>, B<-g>,
B<-c> and B<-s> switches, respectively. B<-c> and B<-s> switches, respectively.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V> (version)
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information on B<yapfaq> and exit.
=item B<-h> (help) =item B<-h> (help)
Print this man page and exit. Print this man page and exit.
=item B<-d> (debug) =item B<-d> (debug)
Output debugging information to STDOUT while processing (number of Output debugging information to STDOUT while processing (number of
postings per group). postings per group).
=item B<-o> (output only) =item B<-o> (output only)
Do not write results to database. You should use B<-d> in conjunction Do not write results to database. You should use B<-d> in conjunction
with B<-o> ... everything else seems a bit pointless. with B<-o> ... everything else seems a bit pointless.
=item B<-m> I<YYYY-MM> (month) =item B<-m> I<YYYY-MM> (month)
Set processing period to a month in YYYY-MM format. Ignored if B<-p> Set processing period to a month in YYYY-MM format. Ignored if B<-p>
is set. is set.
=item B<-p> I<YYYY-MM:YYYY-MM> (period) =item B<-p> I<YYYY-MM:YYYY-MM> (period)
Set processing period to a time period between two month, each in Set processing period to a time period between two month, each in
YYYY-MM format, separated by a colon. Overrides B<-m>. YYYY-MM format, separated by a colon. Overrides B<-m>.
=item B<-t> I<type> (type) =item B<-t> I<type> (type)
Set processing type to one of I<all> and I<groups>. Defaults to all Set processing type to one of I<all> and I<groups>. Defaults to all
(and is currently rather pointless as only I<groups> has been (and is currently rather pointless as only I<groups> has been
implemented). implemented).
=item B<-n> I<TLH> (newsgroup hierarchy) =item B<-n> I<TLH> (newsgroup hierarchy)
Override I<TLH> from F<newsstats.conf>. Override I<TLH> from F<newsstats.conf>.
=item B<-r> I<table> (raw data table) =item B<-r> I<table> (raw data table)
Override I<DBTableRaw> from F<newsstats.conf>. Override I<DBTableRaw> from F<newsstats.conf>.
=item B<-g> I<table> (postings per group table) =item B<-g> I<table> (postings per group table)
Override I<DBTableGrps> from F<newsstats.conf>. Override I<DBTableGrps> from F<newsstats.conf>.
=item B<-c> I<table> (client data table) =item B<-c> I<table> (client data table)
Override I<DBTableClnts> from F<newsstats.conf>. Override I<DBTableClnts> from F<newsstats.conf>.
=item B<-s> I<table> (server/host data table) =item B<-s> I<table> (server/host data table)
Override I<DBTableHosts> from F<newsstats.conf>. Override I<DBTableHosts> from F<newsstats.conf>.
=back =back
=head1 INSTALLATION =head1 INSTALLATION
See doc/INSTALL. See doc/INSTALL.
=head1 EXAMPLES =head1 EXAMPLES
Process all types of information for lasth month: Process all types of information for lasth month:
gatherstats gatherstats
Do a dry run, showing results of processing: Do a dry run, showing results of processing:
gatherstats -do gatherstats -do
Process all types of information for January of 2010: Process all types of information for January of 2010:
gatherstats -m 2010-01 gatherstats -m 2010-01
Process only number of postings for the year of 2010: Process only number of postings for the year of 2010:
gatherstats -p 2010-01:2010-12 -t groups gatherstats -p 2010-01:2010-12 -t groups
=head1 FILES =head1 FILES
=over 4 =over 4
=item F<gatherstats.pl> =item F<gatherstats.pl>
The script itself. The script itself.
=item F<NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file for B<yapfaq>.
=back =back
=head1 BUGS =head1 BUGS
Please report any bugs or feature requests to the author or use the Please report any bugs or feature requests to the author or use the
bug tracker at L<http://bugs.th-h.de/>! bug tracker at L<http://bugs.th-h.de/>!
=head1 SEE ALSO =head1 SEE ALSO
=over 2 =over 2
=item - =item -
doc/README doc/README
=item - =item -
doc/INSTALL doc/INSTALL
=back =back
This script is part of the B<NewsStats> package. This script is part of the B<NewsStats> package.
=head1 AUTHOR =head1 AUTHOR
Thomas Hochstein <thh@inter.net> Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.
=cut =cut

View file

@ -1,475 +1,475 @@
#! /usr/bin/perl -W #! /usr/bin/perl -W
# #
# groupstats.pl # groupstats.pl
# #
# This script will get statistical data on newgroup usage # This script will get statistical data on newgroup usage
# form a database. # form a database.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.01"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
push(@INC, dirname($0)); push(@INC, dirname($0));
} }
use strict; use strict;
use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
use DBI; use DBI;
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
### read configuration ### read configuration
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig('newsstats.conf')};
### override configuration via commandline options ### override configuration via commandline options
my %ConfOverride; my %ConfOverride;
$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
&OverrideConfig(\%Conf,\%ConfOverride); &OverrideConfig(\%Conf,\%ConfOverride);
### check for incompatible command line options ### check for incompatible command line options
# you can't mix '-t', '-b' and '-l' # you can't mix '-t', '-b' and '-l'
# -b/-l take preference over -t, and -b takes preference over -l # -b/-l take preference over -t, and -b takes preference over -l
if ($Options{'b'} or $Options{'l'}) { if ($Options{'b'} or $Options{'l'}) {
if ($Options{'t'}) { if ($Options{'t'}) {
# drop -t # drop -t
warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
undef($Options{'t'}); undef($Options{'t'});
}; };
if ($Options{'b'} and $Options{'l'}) { if ($Options{'b'} and $Options{'l'}) {
# drop -l # drop -l
warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
undef($Options{'l'}); undef($Options{'l'});
}; };
# -q/-d don't work with -b or -l # -q/-d don't work with -b or -l
warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
}; };
### check output type ### check output type
# default output type to 'dump' # default output type to 'dump'
$Options{'o'} = 'dump' if !$Options{'o'}; $Options{'o'} = 'dump' if !$Options{'o'};
# fail if more than one newsgroup is combined with 'dumpgroup' type # fail if more than one newsgroup is combined with 'dumpgroup' type
die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
# accept 'dumpgroup' only with -n # accept 'dumpgroup' only with -n
if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
$Options{'o'} = 'dump'; $Options{'o'} = 'dump';
warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
}; };
# set output type to 'pretty' for -l # set output type to 'pretty' for -l
if ($Options{'l'}) { if ($Options{'l'}) {
$Options{'o'} = 'pretty'; $Options{'o'} = 'pretty';
warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
}; };
### get time period ### get time period
my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
# reset to one month for 'dump' output type # reset to one month for 'dump' output type
if ($Options{'o'} eq 'dump' and $Options{'p'}) { if ($Options{'o'} eq 'dump' and $Options{'p'}) {
$StartMonth = $EndMonth; $StartMonth = $EndMonth;
warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n"); warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
}; };
### init database ### init database
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
### create report ### create report
# get list of newsgroups (-n) # get list of newsgroups (-n)
my ($QueryPart,@GroupList); my ($QueryPart,@GroupList);
my $Newsgroups = $Options{'n'}; my $Newsgroups = $Options{'n'};
if ($Newsgroups) { if ($Newsgroups) {
# explode list of newsgroups for WHERE clause # explode list of newsgroups for WHERE clause
($QueryPart,@GroupList) = &SQLGroupList($Newsgroups); ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
} else { } else {
# set to dummy value (always true) # set to dummy value (always true)
$QueryPart = 1; $QueryPart = 1;
}; };
# manage thresholds # manage thresholds
if (defined($Options{'t'})) { if (defined($Options{'t'})) {
if ($Options{'i'}) { if ($Options{'i'}) {
# -i: list groups below threshold # -i: list groups below threshold
$QueryPart .= ' AND postings < ?'; $QueryPart .= ' AND postings < ?';
} else { } else {
# default: list groups above threshold # default: list groups above threshold
$QueryPart .= ' AND postings > ?'; $QueryPart .= ' AND postings > ?';
}; };
# push threshold to GroupList to match number of binding vars for DBQuery->execute # push threshold to GroupList to match number of binding vars for DBQuery->execute
push @GroupList,$Options{'t'}; push @GroupList,$Options{'t'};
} }
# construct WHERE clause # construct WHERE clause
# $QueryPart is "list of newsgroup" (or 1), # $QueryPart is "list of newsgroup" (or 1),
# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) # &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
# according to setting of -s # according to setting of -s
my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'})); my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
# get lenght of longest newsgroup delivered by query for formatting purposes # get lenght of longest newsgroup delivered by query for formatting purposes
# FIXME # FIXME
my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList); my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
my ($OrderClause,$DBQuery); my ($OrderClause,$DBQuery);
# -b (best of / top list) defined? # -b (best of / top list) defined?
if (!defined($Options{'b'}) and !defined($Options{'l'})) { if (!defined($Options{'b'}) and !defined($Options{'l'})) {
# default: neither -b nor -l # default: neither -b nor -l
# set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
# according to -q and -d # according to -q and -d
$OrderClause = 'newsgroup'; $OrderClause = 'newsgroup';
$OrderClause = 'postings' if $Options{'q'}; $OrderClause = 'postings' if $Options{'q'};
$OrderClause .= ' DESC' if $Options{'d'}; $OrderClause .= ' DESC' if $Options{'d'};
# prepare query: get number of postings per group from groups table for given months and newsgroups # prepare query: get number of postings per group from groups table for given months and newsgroups
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} elsif ($Options{'b'}) { } elsif ($Options{'b'}) {
# -b is set (then -l can't be!) # -b is set (then -l can't be!)
# set sorting order (-i) # set sorting order (-i)
if ($Options{'i'}) { if ($Options{'i'}) {
$OrderClause = 'postings'; $OrderClause = 'postings';
} else { } else {
$OrderClause = 'postings DESC'; $OrderClause = 'postings DESC';
}; };
# push LIMIT to GroupList to match number of binding vars for DBQuery->execute # push LIMIT to GroupList to match number of binding vars for DBQuery->execute
push @GroupList,$Options{'b'}; push @GroupList,$Options{'b'};
# prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} else { } else {
# -l must be set now, as all other cases have been taken care of # -l must be set now, as all other cases have been taken care of
# set sorting order (-i) # set sorting order (-i)
if ($Options{'i'}) { if ($Options{'i'}) {
$OrderClause = '<'; $OrderClause = '<';
} else { } else {
$OrderClause = '>'; $OrderClause = '>';
}; };
# push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
# FIXME -- together with the query (see below) # FIXME -- together with the query (see below)
push @GroupList,$Options{'l'}; push @GroupList,$Options{'l'};
push @GroupList,$StartMonth,$EndMonth; push @GroupList,$StartMonth,$EndMonth;
# prepare query: get number of postings per group from groups table for given months and # prepare query: get number of postings per group from groups table for given months and
# FIXME -- this query is ... in dire need of impromevent # FIXME -- this query is ... in dire need of impromevent
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause)); $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
}; };
# execute query # execute query
$DBQuery->execute($StartMonth,$EndMonth,@GroupList) $DBQuery->execute($StartMonth,$EndMonth,@GroupList)
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
# output results # output results
# print caption (-c) with time period if -m or -p is set # print caption (-c) with time period if -m or -p is set
# FIXME - month or period should handled differently # FIXME - month or period should handled differently
printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
# print caption (-c) with newsgroup list if -n is set # print caption (-c) with newsgroup list if -n is set
printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
# print caption (-c) with threshold if -t is set, taking -i in account # print caption (-c) with threshold if -t is set, taking -i in account
printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
if (!defined($Options{'b'}) and !defined($Options{'l'})) { if (!defined($Options{'b'}) and !defined($Options{'l'})) {
# default: neither -b nor -l # default: neither -b nor -l
&OutputData($Options{'o'},$DBQuery,$MaxLength); &OutputData($Options{'o'},$DBQuery,$MaxLength);
} elsif ($Options{'b'}) { } elsif ($Options{'b'}) {
# -b is set (then -l can't be!) # -b is set (then -l can't be!)
# we have to read in the query results ourselves, as they do not have standard layout # we have to read in the query results ourselves, as they do not have standard layout
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
# we just assign "top x" or "bottom x" instead of a month for the caption # we just assign "top x" or "bottom x" instead of a month for the caption
# FIXME # FIXME
print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
}; };
} else { } else {
# -l must be set now, as all other cases have been taken care of # -l must be set now, as all other cases have been taken care of
# we have to read in the query results ourselves, as they do not have standard layout # we have to read in the query results ourselves, as they do not have standard layout
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
# we just switch $Newsgroups and $Month for output generation # we just switch $Newsgroups and $Month for output generation
# FIXME # FIXME
print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
}; };
}; };
### close handles ### close handles
$DBHandle->disconnect; $DBHandle->disconnect;
__END__ __END__
################################ Documentation ################################# ################################ Documentation #################################
=head1 NAME =head1 NAME
groupstats - create reports on newsgroup usage groupstats - create reports on newsgroup usage
=head1 SYNOPSIS =head1 SYNOPSIS
B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>] B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See doc/README: Perl 5.8.x itself and the following modules from CPAN:
=over 2 =over 2
=item - =item -
Config::Auto Config::Auto
=item - =item -
DBI DBI
=back =back
=head1 DESCRIPTION =head1 DESCRIPTION
This script create reports on newsgroup usage (number of postings per This script create reports on newsgroup usage (number of postings per
group per month) taken from result tables created by group per month) taken from result tables created by
F<gatherstats.pl>. F<gatherstats.pl>.
The time period to act on defaults to last month; you can assign The time period to act on defaults to last month; you can assign
another month via the B<-m> switch or a time period via the B<-p> another month via the B<-m> switch or a time period via the B<-p>
switch; the latter takes preference. switch; the latter takes preference.
B<groupstats> will process all newsgroups by default; you can limit B<groupstats> will process all newsgroups by default; you can limit
that to only some newsgroups by supplying a list of those groups via that to only some newsgroups by supplying a list of those groups via
B<-n> (see below). You can include hierarchy levels in the output by B<-n> (see below). You can include hierarchy levels in the output by
adding the B<-s> switch (see below). adding the B<-s> switch (see below).
Furthermore you can set a threshold via B<-t> so that only newsgroups Furthermore you can set a threshold via B<-t> so that only newsgroups
with more postings per month will be included in the report. You can with more postings per month will be included in the report. You can
invert that by the B<-i> switch so only newsgroups with less than invert that by the B<-i> switch so only newsgroups with less than
I<threshold> postings per month will be included. I<threshold> postings per month will be included.
You can sort the output by number of postings per month instead of the You can sort the output by number of postings per month instead of the
default (alphabetical list of newsgroups) by using B<-q>; you can default (alphabetical list of newsgroups) by using B<-q>; you can
reverse the sorting order (from highest to lowest or in reversed reverse the sorting order (from highest to lowest or in reversed
alphabetical order) by using B<-d>. alphabetical order) by using B<-d>.
Furthermore, you can create a list of newsgroups that had consistently Furthermore, you can create a list of newsgroups that had consistently
more (or less) than x postings per month during the whole report more (or less) than x postings per month during the whole report
period by using B<-l> (together with B<i> as needed). period by using B<-l> (together with B<i> as needed).
Last but not least you can create a "best of" list of the top x Last but not least you can create a "best of" list of the top x
newsgroups via B<-b> (or a "worst of" list by adding B<i>). newsgroups via B<-b> (or a "worst of" list by adding B<i>).
By default, B<groupstats> will dump a very simple alphabetical list of By default, B<groupstats> will dump a very simple alphabetical list of
newsgroups, one per line, followed by the number of postings in that newsgroups, one per line, followed by the number of postings in that
month. This output format of course cannot sensibly be combined with month. This output format of course cannot sensibly be combined with
time periods, so you can set the output format by using B<-o> (see time periods, so you can set the output format by using B<-o> (see
below). Captions can be added by setting the B<-c> switch. below). Captions can be added by setting the B<-c> switch.
=head2 Configuration =head2 Configuration
F<groupstats.pl> will read its configuration from F<newsstats.conf> F<groupstats.pl> will read its configuration from F<newsstats.conf>
which should be present in the same directory via Config::Auto. which should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
You can override configuration options via the B<-g> switch. You can override configuration options via the B<-g> switch.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V> (version)
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information on B<yapfaq> and exit.
=item B<-h> (help) =item B<-h> (help)
Print this man page and exit. Print this man page and exit.
=item B<-m> I<YYYY-MM> (month) =item B<-m> I<YYYY-MM> (month)
Set processing period to a month in YYYY-MM format. Ignored if B<-p> Set processing period to a month in YYYY-MM format. Ignored if B<-p>
is set. is set.
=item B<-p> I<YYYY-MM:YYYY-MM> (period) =item B<-p> I<YYYY-MM:YYYY-MM> (period)
Set processing period to a time period between two month, each in Set processing period to a time period between two month, each in
YYYY-MM format, separated by a colon. Overrides B<-m>. YYYY-MM format, separated by a colon. Overrides B<-m>.
=item B<-n> I<newsgroup(s)> (newsgroups) =item B<-n> I<newsgroup(s)> (newsgroups)
Limit processing to a certain set of newsgroups. I<newsgroup(s)> can Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
be a single newsgroup name (de.alt.test), a newsgroup hierarchy be a single newsgroup name (de.alt.test), a newsgroup hierarchy
(de.alt.*) or a list of either of these, separated by colons, for (de.alt.*) or a list of either of these, separated by colons, for
example example
de.test:de.alt.test:de.newusers.* de.test:de.alt.test:de.newusers.*
=item B<-t> I<threshold> (threshold) =item B<-t> I<threshold> (threshold)
Only include newsgroups with more than I<threshold> postings per Only include newsgroups with more than I<threshold> postings per
month. Can be inverted by the B<-i> switch so that only newsgroups month. Can be inverted by the B<-i> switch so that only newsgroups
with less than I<threshold> postings will be included. with less than I<threshold> postings will be included.
This setting will be ignored if B<-l> or B<-b> is set. This setting will be ignored if B<-l> or B<-b> is set.
=item B<-l> I<level> (level) =item B<-l> I<level> (level)
Only include newsgroups with more than I<level> postings per Only include newsgroups with more than I<level> postings per
month, every month during the whole reporting period. Can be inverted month, every month during the whole reporting period. Can be inverted
by the B<-i> switch so that only newsgroups with less than I<level> by the B<-i> switch so that only newsgroups with less than I<level>
postings every single month will be included. Output will be ordered postings every single month will be included. Output will be ordered
by newsgroup name, followed by month. by newsgroup name, followed by month.
This setting will be ignored if B<-b> is set. Overrides B<-t> and This setting will be ignored if B<-b> is set. Overrides B<-t> and
can't be used together with B<-q> or B<-d>. can't be used together with B<-q> or B<-d>.
=item B<-b> I<n> (best of) =item B<-b> I<n> (best of)
Create a list of the I<n> newsgroups with the most postings over the Create a list of the I<n> newsgroups with the most postings over the
whole reporting period. Can be inverted by the B<-i> switch so that a whole reporting period. Can be inverted by the B<-i> switch so that a
list of the I<n> newsgroups with the least postings over the whole list of the I<n> newsgroups with the least postings over the whole
period is generated. Output will be ordered by sum of postings. period is generated. Output will be ordered by sum of postings.
Overrides B<-t> and B<-l> and can't be used together with B<-q> or Overrides B<-t> and B<-l> and can't be used together with B<-q> or
B<-d>. Output format is set to I<pretty> (see below). B<-d>. Output format is set to I<pretty> (see below).
=item B<-i> (invert) =item B<-i> (invert)
Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
threshold or level or generate a "bottom list" instead of a top list. threshold or level or generate a "bottom list" instead of a top list.
=item B<-s> (sum per hierarchy level) =item B<-s> (sum per hierarchy level)
Include "virtual" groups for every hierarchy level in output, for Include "virtual" groups for every hierarchy level in output, for
example: example:
de.alt.ALL 10 de.alt.ALL 10
de.alt.test 5 de.alt.test 5
de.alt.admin 7 de.alt.admin 7
See the B<gatherstats> man page for details. See the B<gatherstats> man page for details.
=item B<-o> I<output type> (output format) =item B<-o> I<output type> (output format)
Set output format. Default is I<dump>, consisting of an alphabetical Set output format. Default is I<dump>, consisting of an alphabetical
list of newsgroups, each on a new line, followed by the number of list of newsgroups, each on a new line, followed by the number of
postings in that month. This default format can't be used with time postings in that month. This default format can't be used with time
periods of more than one month. periods of more than one month.
I<list> format is like I<dump>, but will print the month in front of I<list> format is like I<dump>, but will print the month in front of
the newsgroup name. the newsgroup name.
I<dumpgroup> format can only be use with a group list (see B<-n>) of I<dumpgroup> format can only be use with a group list (see B<-n>) of
exactly one newsgroup and is like I<dump>, but will output months, exactly one newsgroup and is like I<dump>, but will output months,
followed by the number of postings. followed by the number of postings.
If you don't need easily parsable output, you'll mostly use I<pretty> If you don't need easily parsable output, you'll mostly use I<pretty>
format, which will print a header for each new month and try to align format, which will print a header for each new month and try to align
newsgroup names and posting counts. Usage of B<-b> will force this newsgroup names and posting counts. Usage of B<-b> will force this
format. format.
=item B<-c> (captions) =item B<-c> (captions)
Add captions to output (reporting period, newsgroups list, threshold). Add captions to output (reporting period, newsgroups list, threshold).
=item B<-q> (quantity of postings) =item B<-q> (quantity of postings)
Sort by number of postings instead of by newsgroup names. Sort by number of postings instead of by newsgroup names.
Cannot be used with B<-l> or B<-b>. Cannot be used with B<-l> or B<-b>.
=item B<-d> (descending) =item B<-d> (descending)
Change sort order to descending. Change sort order to descending.
Cannot be used with B<-l> or B<-b>. Cannot be used with B<-l> or B<-b>.
=item B<-g> I<table> (postings per group table) =item B<-g> I<table> (postings per group table)
Override I<DBTableGrps> from F<newsstats.conf>. Override I<DBTableGrps> from F<newsstats.conf>.
=back =back
=head1 INSTALLATION =head1 INSTALLATION
See doc/INSTALL. See doc/INSTALL.
=head1 EXAMPLES =head1 EXAMPLES
Show number of postings per group for lasth month in I<dump> format: Show number of postings per group for lasth month in I<dump> format:
groupstats groupstats
Show that report for January of 2010 and de.alt.* plus de.test, Show that report for January of 2010 and de.alt.* plus de.test,
including display of hierarchy levels: including display of hierarchy levels:
groupstats -m 2010-01 -n de.alt.*:de.test -s groupstats -m 2010-01 -n de.alt.*:de.test -s
Show that report for the year of 2010 in I<pretty> format: Show that report for the year of 2010 in I<pretty> format:
groupstats -p 2010-01:2010-12 -o pretty groupstats -p 2010-01:2010-12 -o pretty
Only show newsgroups with less than 30 postings last month, ordered Only show newsgroups with less than 30 postings last month, ordered
by number of postings, descending, in I<pretty> format: by number of postings, descending, in I<pretty> format:
groupstats -iqdt 30 -o pretty groupstats -iqdt 30 -o pretty
Show top 10 for the first half-year of of 2010 in I<pretty> format: Show top 10 for the first half-year of of 2010 in I<pretty> format:
groupstats -p 2010-01:2010-06 -b 10 -o pretty groupstats -p 2010-01:2010-06 -b 10 -o pretty
Report all groups that had less than 30 postings every singele month Report all groups that had less than 30 postings every singele month
in the year of 2010 (I<pretty> format is forced) in the year of 2010 (I<pretty> format is forced)
groupstats -p 2010-01:2010-12 -il 30 groupstats -p 2010-01:2010-12 -il 30
=head1 FILES =head1 FILES
=over 4 =over 4
=item F<groupstats.pl> =item F<groupstats.pl>
The script itself. The script itself.
=item F<NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file for B<yapfaq>.
=back =back
=head1 BUGS =head1 BUGS
Please report any bugs or feature requests to the author or use the Please report any bugs or feature requests to the author or use the
bug tracker at L<http://bugs.th-h.de/>! bug tracker at L<http://bugs.th-h.de/>!
=head1 SEE ALSO =head1 SEE ALSO
=over 2 =over 2
=item - =item -
doc/README doc/README
=item - =item -
doc/INSTALL doc/INSTALL
=item - =item -
gatherstats -h gatherstats -h
=back =back
This script is part of the B<NewsStats> package. This script is part of the B<NewsStats> package.
=head1 AUTHOR =head1 AUTHOR
Thomas Hochstein <thh@inter.net> Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.
=cut =cut

View file

@ -1,257 +1,257 @@
#! /usr/bin/perl -W #! /usr/bin/perl -W
# #
# install.pl # install.pl
# #
# This script will create database tables as necessary. # This script will create database tables as necessary.
# #
# It is part of the NewsStats package. # It is part of the NewsStats package.
# #
# Copyright (c) 2010 Thomas Hochstein <thh@inter.net> # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
# #
# It can be redistributed and/or modified under the same terms under # It can be redistributed and/or modified under the same terms under
# which Perl itself is published. # which Perl itself is published.
BEGIN { BEGIN {
our $VERSION = "0.01"; our $VERSION = "0.01";
use File::Basename; use File::Basename;
# we're in .../install, so our module is in .. # we're in .../install, so our module is in ..
push(@INC, dirname($0).'/..'); push(@INC, dirname($0).'/..');
} }
use strict; use strict;
use NewsStats qw(:DEFAULT); use NewsStats qw(:DEFAULT);
use Cwd; use Cwd;
use DBI; use DBI;
################################# Main program ################################# ################################# Main program #################################
### read commandline options ### read commandline options
my %Options = &ReadOptions(''); my %Options = &ReadOptions('');
### change working directory to .. (as we're in .../install) ### change working directory to .. (as we're in .../install)
chdir dirname($0).'/..'; chdir dirname($0).'/..';
### read configuration ### read configuration
print("Reading configuration.\n"); print("Reading configuration.\n");
my %Conf = %{ReadConfig('newsstats.conf')}; my %Conf = %{ReadConfig('newsstats.conf')};
##### -------------------------------------------------------------------------- ##### --------------------------------------------------------------------------
##### Database table definitions ##### Database table definitions
##### -------------------------------------------------------------------------- ##### --------------------------------------------------------------------------
my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS); my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);
-- --
-- Table structure for table DBTableRaw -- Table structure for table DBTableRaw
-- --
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` ( CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (
`id` bigint(20) unsigned NOT NULL auto_increment, `id` bigint(20) unsigned NOT NULL auto_increment,
`day` date NOT NULL, `day` date NOT NULL,
`mid` varchar(250) character set ascii NOT NULL, `mid` varchar(250) character set ascii NOT NULL,
`date` datetime NOT NULL, `date` datetime NOT NULL,
`timestamp` bigint(20) NOT NULL, `timestamp` bigint(20) NOT NULL,
`token` varchar(80) character set ascii NOT NULL, `token` varchar(80) character set ascii NOT NULL,
`size` bigint(20) NOT NULL, `size` bigint(20) NOT NULL,
`peer` varchar(250) NOT NULL, `peer` varchar(250) NOT NULL,
`path` varchar(1000) NOT NULL, `path` varchar(1000) NOT NULL,
`newsgroups` varchar(1000) NOT NULL, `newsgroups` varchar(1000) NOT NULL,
`headers` longtext NOT NULL, `headers` longtext NOT NULL,
`disregard` tinyint(1) default '0', `disregard` tinyint(1) default '0',
PRIMARY KEY (`id`), PRIMARY KEY (`id`),
KEY `day` (`day`), KEY `day` (`day`),
KEY `mid` (`mid`), KEY `mid` (`mid`),
KEY `peer` (`peer`) KEY `peer` (`peer`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data'; ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data';
RAW RAW
-- --
-- Table structure for table DBTableGrps -- Table structure for table DBTableGrps
-- --
CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` ( CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (
`id` bigint(20) unsigned NOT NULL auto_increment, `id` bigint(20) unsigned NOT NULL auto_increment,
`month` varchar(7) character set ascii NOT NULL, `month` varchar(7) character set ascii NOT NULL,
`newsgroup` varchar(100) NOT NULL, `newsgroup` varchar(100) NOT NULL,
`postings` int(11) NOT NULL, `postings` int(11) NOT NULL,
`revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP, `revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`id`), PRIMARY KEY (`id`),
UNIQUE KEY `month_newsgroup` (`month`,`newsgroup`), UNIQUE KEY `month_newsgroup` (`month`,`newsgroup`),
KEY `newsgroup` (`newsgroup`), KEY `newsgroup` (`newsgroup`),
KEY `postings` (`postings`) KEY `postings` (`postings`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per newsgroup'; ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per newsgroup';
GRPS GRPS
##### --------------------------- End of definitions --------------------------- ##### --------------------------- End of definitions ---------------------------
### create database tables ### create database tables
print "-----\nStarting database table generation.\n"; print "-----\nStarting database table generation.\n";
# DB init # DB init
my $DBHandle = InitDB(\%Conf,1); my $DBHandle = InitDB(\%Conf,1);
# read tables # read tables
my %TablesInDB = %{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; my %TablesInDB = %{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')};
# check for tables and create them, if they don't exist yet # check for tables and create them, if they don't exist yet
foreach my $Table (keys %DBCreate) { foreach my $Table (keys %DBCreate) {
if (defined($TablesInDB{$Conf{$Table}})) { if (defined($TablesInDB{$Conf{$Table}})) {
printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table}); printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table});
next; next;
}; };
my $DBQuery = $DBHandle->prepare($DBCreate{$Table}); my $DBQuery = $DBHandle->prepare($DBCreate{$Table});
$DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr); $DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr);
printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table}); printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table});
}; };
# close handle # close handle
$DBHandle->disconnect; $DBHandle->disconnect;
print "Database table generation done.\n"; print "Database table generation done.\n";
### output information on other necessary steps ### output information on other necessary steps
my $Path = cwd(); my $Path = cwd();
print <<TODO; print <<TODO;
----- -----
Things left to do: Things left to do:
1) Setup an INN feed to feedlog.pl 1) Setup an INN feed to feedlog.pl
a) Edit your 'newsfeeds' file and insert something like a) Edit your 'newsfeeds' file and insert something like
## gather statistics for NewsStats ## gather statistics for NewsStats
newsstats!\ newsstats!\
:!*,de.*\ :!*,de.*\
:Tc,WmtfbsPNH,Ac:$Path/feedlog.pl :Tc,WmtfbsPNH,Ac:$Path/feedlog.pl
Please Please
* check that you got the path to feedlog.pl right * check that you got the path to feedlog.pl right
* check that feedlog.pl can be executed by the news user * check that feedlog.pl can be executed by the news user
* adapt the pattern (here: 'de.*') to your needs * adapt the pattern (here: 'de.*') to your needs
b) Check your 'newsfeeds' syntax: b) Check your 'newsfeeds' syntax:
# ctlinnd checkfile # ctlinnd checkfile
and reload 'newsfeeds': and reload 'newsfeeds':
# ctlinnd reload newsfeeds 'Adding newsstats! feed' # ctlinnd reload newsfeeds 'Adding newsstats! feed'
c) Watch your 'news.notice' and 'errlog' files: c) Watch your 'news.notice' and 'errlog' files:
# tail -f /var/log/news/news.notice # tail -f /var/log/news/news.notice
... ...
# tail -f /var/log/news/errlog # tail -f /var/log/news/errlog
2) Watch your $Conf{'DBTableRaw'} table fill. 2) Watch your $Conf{'DBTableRaw'} table fill.
3) Read the documentation. ;) 3) Read the documentation. ;)
Enjoy! Enjoy!
-thh <thh\@inter.net> -thh <thh\@inter.net>
TODO TODO
__END__ __END__
################################ Documentation ################################# ################################ Documentation #################################
=head1 NAME =head1 NAME
install - installation script install - installation script
=head1 SYNOPSIS =head1 SYNOPSIS
B<install> [B<-Vh>] B<install> [B<-Vh>]
=head1 REQUIREMENTS =head1 REQUIREMENTS
See doc/README: Perl 5.8.x itself and the following modules from CPAN: See doc/README: Perl 5.8.x itself and the following modules from CPAN:
=over 2 =over 2
=item - =item -
Config::Auto Config::Auto
=item - =item -
DBI DBI
=back =back
=head1 DESCRIPTION =head1 DESCRIPTION
This script will create database tables as necessary and configured. This script will create database tables as necessary and configured.
=head2 Configuration =head2 Configuration
F<install.pl> will read its configuration from F<newsstats.conf> via F<install.pl> will read its configuration from F<newsstats.conf> via
Config::Auto. Config::Auto.
See doc/INSTALL for an overview of possible configuration options. See doc/INSTALL for an overview of possible configuration options.
=head1 OPTIONS =head1 OPTIONS
=over 3 =over 3
=item B<-V> (version) =item B<-V> (version)
Print out version and copyright information on B<yapfaq> and exit. Print out version and copyright information on B<yapfaq> and exit.
=item B<-h> (help) =item B<-h> (help)
Print this man page and exit. Print this man page and exit.
=back =back
=head1 FILES =head1 FILES
=over 4 =over 4
=item F<install.pl> =item F<install.pl>
The script itself. The script itself.
=item F<NewsStats.pm> =item F<NewsStats.pm>
Library functions for the NewsStats package. Library functions for the NewsStats package.
=item F<newsstats.conf> =item F<newsstats.conf>
Runtime configuration file for B<yapfaq>. Runtime configuration file for B<yapfaq>.
=back =back
=head1 BUGS =head1 BUGS
Please report any bugs or feature requests to the author or use the Please report any bugs or feature requests to the author or use the
bug tracker at L<http://bugs.th-h.de/>! bug tracker at L<http://bugs.th-h.de/>!
=head1 SEE ALSO =head1 SEE ALSO
=over 2 =over 2
=item - =item -
doc/README doc/README
=item - =item -
doc/INSTALL doc/INSTALL
=back =back
This script is part of the B<NewsStats> package. This script is part of the B<NewsStats> package.
=head1 AUTHOR =head1 AUTHOR
Thomas Hochstein <thh@inter.net> Thomas Hochstein <thh@inter.net>
=head1 COPYRIGHT AND LICENSE =head1 COPYRIGHT AND LICENSE
Copyright (c) 2010 Thomas Hochstein <thh@inter.net> Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself. under the same terms as Perl itself.
=cut =cut