Add --mid to gatherstats.

Parse just a specific entry defined by Message-ID
and set --test and --debug modes.

Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
Thomas Hochstein 2025-05-11 01:55:09 +02:00
parent 57af475b80
commit c6432dcd44

View file

@ -37,13 +37,15 @@ my %LegalStats;
### read commandline options
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
$OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,$OptConfFile);
$OptHostsDB,$OptMID,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,
$OptConfFile);
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'clientsdb=s' => \$OptClientsDB,
'd|debug!' => \$OptDebug,
'groupsdb=s' => \$OptGroupsDB,
'hierarchy=s' => \$OptTLH,
'hostsdb=s' => \$OptHostsDB,
'mid=s' => \$OptMID,
'm|month=s' => \$OptMonth,
'rawdb=s' => \$OptRawDB,
's|stats=s' => \$OptStatsType,
@ -64,6 +66,11 @@ $ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride);
# set --debug and --test if --mid is set
if ($OptMID) {
$OptDebug = 1; $OptTest = 1;
}
### get type of information to gather, defaulting to 'all'
$OptStatsType = 'all' if !$OptStatsType;
&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
@ -120,7 +127,7 @@ foreach my $Month (&ListMonth($Period)) {
### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
};
### HostStats
@ -129,7 +136,7 @@ foreach my $Month (&ListMonth($Period)) {
my @KnownHosts = qw(aioe.org arcor-online.net arcor-ip.de news.astraweb.com read.cnntp.org easynews.com eternal-september.org
fernuni-hagen.de free.fr newsread.freenet.ag googlegroups.com
news.neostrada.pl newsdawg.com newscene.com news-service.com octanews.com xsnews.nl news.xs4all.nl);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTest,$OptDebug,@KnownHosts);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
};
};
@ -145,29 +152,40 @@ sub GroupStats {
### $DBRaw : database table for raw data (to read from)
### $DBGrps : database table for groups data (to write to)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $TLH : TLHs to collect
### $Checkgroupsfile : filename template for checkgroups file
### (expanded to $Checkgroupsfile-$Month)
### $Test : test mode
### $Debug : debug mode
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$Test,$Debug) = @_;
my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$MID,$Test,$Debug) = @_;
# read list of newsgroups from --checkgroups
# into a hash
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))}
if $CheckgroupsFile;
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$Month,
$DBRaw));
my $DBQuery;
if (!$MID) {
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$Month,
$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$MID,
$DBRaw));
}
# count postings per group
my %Postings;
@ -232,22 +250,30 @@ sub HostStats {
### $DBRaw : database table for raw data (to read from)
### $DBHosts : database table for hosts data (to write to)
### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode
### $Debug : debug mode
### @KnownHosts : list of known hosts with subdomains
### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBHosts,$Month,$Test,$Debug,@KnownHosts) = @_;
my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
# define known hosts using subdomains
my %Postings;
my (%Postings,$DBQuery);
# get raw header data from raw table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,$DBRaw));
if (!$MID) {
# get raw header data from raw table for given month
$DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
"WHERE day LIKE ? AND NOT disregard",
$DBRaw));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
### ----------------------------------------------
print "----- HostStats -----\n" if $Debug;
@ -319,7 +345,7 @@ sub HostStats {
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
}
# !DEBUG! printf("%s: %s\n", $Header{'message-id'}, $Host);
printf("%s: %s\n", $Header{'message-id'}, $Host) if $MID;
};
# delete old data for that month