Merge branch 'gatherstats' into next
This commit is contained in:
commit
c3973e7d0d
|
@ -184,7 +184,7 @@ sub ListNewsgroups {
|
||||||
next if($TLH and !/^$TLH/);
|
next if($TLH and !/^$TLH/);
|
||||||
# don't count invalid newsgroups
|
# don't count invalid newsgroups
|
||||||
if(%ValidGroups and !defined($ValidGroups{$_})) {
|
if(%ValidGroups and !defined($ValidGroups{$_})) {
|
||||||
&Bleat(1,sprintf("DROPPED: %s",$_));
|
warn (sprintf("DROPPED: %s\n",$_));
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
# add original newsgroup to %Newsgroups
|
# add original newsgroup to %Newsgroups
|
||||||
|
@ -230,8 +230,9 @@ sub ReadGroupList {
|
||||||
my %ValidGroups;
|
my %ValidGroups;
|
||||||
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
|
open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
|
||||||
while (<$LIST>) {
|
while (<$LIST>) {
|
||||||
s/^(\S+).*$/$1/;
|
s/^\s*(\S+).*$/$1/;
|
||||||
chomp;
|
chomp;
|
||||||
|
next if /^$/;
|
||||||
$ValidGroups{$_} = '1';
|
$ValidGroups{$_} = '1';
|
||||||
};
|
};
|
||||||
close $LIST;
|
close $LIST;
|
||||||
|
|
|
@ -78,28 +78,30 @@ my $TLH;
|
||||||
if ($Conf{'TLH'}) {
|
if ($Conf{'TLH'}) {
|
||||||
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
# $Conf{'TLH'} is parsed as an array by Config::Auto;
|
||||||
# make a flat list again, separated by :
|
# make a flat list again, separated by :
|
||||||
if (ref($TLH) eq 'ARRAY') {
|
if (ref($Conf{'TLH'}) eq 'ARRAY') {
|
||||||
$TLH = join(':',@{$Conf{'TLH'}});
|
$TLH = join(':',@{$Conf{'TLH'}});
|
||||||
} else {
|
} else {
|
||||||
$TLH = $Conf{'TLH'};
|
$TLH = $Conf{'TLH'};
|
||||||
}
|
}
|
||||||
# strip whitespace
|
# strip whitespace
|
||||||
$TLH =~ s/\s//g;
|
$TLH =~ s/\s//g;
|
||||||
|
# add trailing dots if none are present yet
|
||||||
|
# (using negative look-behind assertions)
|
||||||
|
$TLH =~ s/(?<!\.):/.:/g;
|
||||||
|
$TLH =~ s/(?<!\.)$/./;
|
||||||
# check for illegal characters
|
# check for illegal characters
|
||||||
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
&Bleat(2,'Config error - illegal characters in TLH definition!')
|
||||||
if ($TLH !~ /^[a-zA-Z0-9:]+$/);
|
if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
|
||||||
|
# escape dots
|
||||||
|
$TLH =~ s/\./\\./g;
|
||||||
if ($TLH =~ /:/) {
|
if ($TLH =~ /:/) {
|
||||||
# reformat $TLH from a:b to (a)|(b),
|
# reformat $TLH from a:b to (a)|(b),
|
||||||
# e.g. replace '.' by '|'
|
# e.g. replace ':' by ')|('
|
||||||
$TLH =~ s/:/)|(/g;
|
$TLH =~ s/:/)|(/g;
|
||||||
$TLH = '(' . $TLH . ')';
|
$TLH = '(' . $TLH . ')';
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# read list of newsgroups from --checkgroups
|
|
||||||
# into a hash
|
|
||||||
my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile;
|
|
||||||
|
|
||||||
### init database
|
### init database
|
||||||
my $DBHandle = InitDB(\%Conf,1);
|
my $DBHandle = InitDB(\%Conf,1);
|
||||||
|
|
||||||
|
@ -110,6 +112,11 @@ foreach my $Month (&ListMonth($Period)) {
|
||||||
print "---------- $Month ----------\n" if $OptDebug;
|
print "---------- $Month ----------\n" if $OptDebug;
|
||||||
|
|
||||||
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
|
||||||
|
# read list of newsgroups from --checkgroups
|
||||||
|
# into a hash
|
||||||
|
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
|
||||||
|
if $OptCheckgroupsFile;
|
||||||
|
|
||||||
### ----------------------------------------------
|
### ----------------------------------------------
|
||||||
### get groups data (number of postings per group)
|
### get groups data (number of postings per group)
|
||||||
# get groups data from raw table for given month
|
# get groups data from raw table for given month
|
||||||
|
@ -125,7 +132,7 @@ foreach my $Month (&ListMonth($Period)) {
|
||||||
# count postings per group
|
# count postings per group
|
||||||
my %Postings;
|
my %Postings;
|
||||||
while (($_) = $DBQuery->fetchrow_array) {
|
while (($_) = $DBQuery->fetchrow_array) {
|
||||||
# get list oft newsgroups and hierarchies from Newsgroups:
|
# get list of newsgroups and hierarchies from Newsgroups:
|
||||||
my %Newsgroups = ListNewsgroups($_,$TLH,
|
my %Newsgroups = ListNewsgroups($_,$TLH,
|
||||||
$OptCheckgroupsFile ? \%ValidGroups : '');
|
$OptCheckgroupsFile ? \%ValidGroups : '');
|
||||||
# count each newsgroup and hierarchy once
|
# count each newsgroup and hierarchy once
|
||||||
|
@ -137,9 +144,16 @@ foreach my $Month (&ListMonth($Period)) {
|
||||||
# add valid but empty groups if --checkgroups is set
|
# add valid but empty groups if --checkgroups is set
|
||||||
if (%ValidGroups) {
|
if (%ValidGroups) {
|
||||||
foreach (sort keys %ValidGroups) {
|
foreach (sort keys %ValidGroups) {
|
||||||
|
if (!defined($Postings{$_})) {
|
||||||
|
# expand newsgroup with hierarchies
|
||||||
|
my @Newsgroups = ParseHierarchies($_);
|
||||||
|
# add each empty newsgroup and empty hierarchies, too, as needed
|
||||||
|
foreach (@Newsgroups) {
|
||||||
if (!defined($Postings{$_})) {
|
if (!defined($Postings{$_})) {
|
||||||
$Postings{$_} = 0;
|
$Postings{$_} = 0;
|
||||||
warn (sprintf("ADDED: %s as empty group\n",$_));
|
warn (sprintf("ADDED: %s as empty group\n",$_));
|
||||||
|
};
|
||||||
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -189,7 +203,7 @@ gatherstats - process statistical data from a raw source
|
||||||
|
|
||||||
=head1 SYNOPSIS
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats] [B<-c> I<checkgroups file>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
|
B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
|
||||||
|
|
||||||
=head1 REQUIREMENTS
|
=head1 REQUIREMENTS
|
||||||
|
|
||||||
|
@ -283,15 +297,23 @@ Set processing type to one of I<all> and I<groups>. Defaults to all
|
||||||
(and is currently rather pointless as only I<groups> has been
|
(and is currently rather pointless as only I<groups> has been
|
||||||
implemented).
|
implemented).
|
||||||
|
|
||||||
=item B<-c>, B<--checkgroups> I<filename>
|
=item B<-c>, B<--checkgroups> I<filename template>
|
||||||
|
|
||||||
Check each group against a list of valid newsgroups read from
|
Check each group against a list of valid newsgroups read from a file,
|
||||||
I<filename>, one group on each line and ignoring everything after the
|
one group on each line and ignoring everything after the first
|
||||||
first whitespace (so you can use a file in checkgroups format or (part
|
whitespace (so you can use a file in checkgroups format or (part of)
|
||||||
of) your INN active file).
|
your INN active file).
|
||||||
|
|
||||||
Newsgroups not found in I<filename> will be dropped (and logged to
|
The filename is taken from I<filename template>, amended by each B<--
|
||||||
STDERR), and newsgroups found in I<filename> but having no postings
|
month> B<gatherstats> is processing, so that
|
||||||
|
|
||||||
|
gatherstats -m 2010-01:2010-12 -c checkgroups
|
||||||
|
|
||||||
|
will check against F<checkgroups-2010-01> for January 2010, against
|
||||||
|
F<checkgroups-2010-02> for February 2010 and so on.
|
||||||
|
|
||||||
|
Newsgroups not found in the checkgroups file will be dropped (and
|
||||||
|
logged to STDERR), and newsgroups found there but having no postings
|
||||||
will be added with a count of 0 (and logged to STDERR).
|
will be added with a count of 0 (and logged to STDERR).
|
||||||
|
|
||||||
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
|
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
|
||||||
|
@ -335,9 +357,9 @@ Process all types of information for January of 2010:
|
||||||
gatherstats --month 2010-01
|
gatherstats --month 2010-01
|
||||||
|
|
||||||
Process only number of postings for the year of 2010,
|
Process only number of postings for the year of 2010,
|
||||||
checking against checkgroups-2010.txt:
|
checking against checkgroups-*:
|
||||||
|
|
||||||
gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt
|
gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
|
||||||
|
|
||||||
=head1 FILES
|
=head1 FILES
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue