Add --mid to gatherstats.

Parse just a specific entry defined by Message-ID
and set --test and --debug modes.

Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
Thomas Hochstein 2025-05-11 01:55:09 +02:00
parent 57af475b80
commit c6432dcd44

View file

@ -37,13 +37,15 @@ my %LegalStats;
### read commandline options ### read commandline options
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
$OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,$OptConfFile); $OptHostsDB,$OptMID,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,
$OptConfFile);
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'clientsdb=s' => \$OptClientsDB, 'clientsdb=s' => \$OptClientsDB,
'd|debug!' => \$OptDebug, 'd|debug!' => \$OptDebug,
'groupsdb=s' => \$OptGroupsDB, 'groupsdb=s' => \$OptGroupsDB,
'hierarchy=s' => \$OptTLH, 'hierarchy=s' => \$OptTLH,
'hostsdb=s' => \$OptHostsDB, 'hostsdb=s' => \$OptHostsDB,
'mid=s' => \$OptMID,
'm|month=s' => \$OptMonth, 'm|month=s' => \$OptMonth,
'rawdb=s' => \$OptRawDB, 'rawdb=s' => \$OptRawDB,
's|stats=s' => \$OptStatsType, 's|stats=s' => \$OptStatsType,
@ -64,6 +66,11 @@ $ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
$ConfOverride{'TLH'} = $OptTLH if $OptTLH; $ConfOverride{'TLH'} = $OptTLH if $OptTLH;
&OverrideConfig(\%Conf,\%ConfOverride); &OverrideConfig(\%Conf,\%ConfOverride);
# set --debug and --test if --mid is set
if ($OptMID) {
$OptDebug = 1; $OptTest = 1;
}
### get type of information to gather, defaulting to 'all' ### get type of information to gather, defaulting to 'all'
$OptStatsType = 'all' if !$OptStatsType; $OptStatsType = 'all' if !$OptStatsType;
&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType)) &Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
@ -120,7 +127,7 @@ foreach my $Month (&ListMonth($Period)) {
### GroupStats ### GroupStats
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') { if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
&GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug); &GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptMID,$OptTest,$OptDebug);
}; };
### HostStats ### HostStats
@ -129,7 +136,7 @@ foreach my $Month (&ListMonth($Period)) {
my @KnownHosts = qw(aioe.org arcor-online.net arcor-ip.de news.astraweb.com read.cnntp.org easynews.com eternal-september.org my @KnownHosts = qw(aioe.org arcor-online.net arcor-ip.de news.astraweb.com read.cnntp.org easynews.com eternal-september.org
fernuni-hagen.de free.fr newsread.freenet.ag googlegroups.com fernuni-hagen.de free.fr newsread.freenet.ag googlegroups.com
news.neostrada.pl newsdawg.com newscene.com news-service.com octanews.com xsnews.nl news.xs4all.nl); news.neostrada.pl newsdawg.com newscene.com news-service.com octanews.com xsnews.nl news.xs4all.nl);
&HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTest,$OptDebug,@KnownHosts); &HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptMID,$OptTest,$OptDebug,@KnownHosts);
}; };
}; };
@ -145,29 +152,40 @@ sub GroupStats {
### $DBRaw : database table for raw data (to read from) ### $DBRaw : database table for raw data (to read from)
### $DBGrps : database table for groups data (to write to) ### $DBGrps : database table for groups data (to write to)
### $Month : current month to do ### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $TLH : TLHs to collect ### $TLH : TLHs to collect
### $Checkgroupsfile : filename template for checkgroups file ### $Checkgroupsfile : filename template for checkgroups file
### (expanded to $Checkgroupsfile-$Month) ### (expanded to $Checkgroupsfile-$Month)
### $Test : test mode ### $Test : test mode
### $Debug : debug mode ### $Debug : debug mode
### OUT: (nothing) ### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$Test,$Debug) = @_; my ($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$CheckgroupsFile,$MID,$Test,$Debug) = @_;
# read list of newsgroups from --checkgroups # read list of newsgroups from --checkgroups
# into a hash # into a hash
my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))} my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$CheckgroupsFile,$Month))}
if $CheckgroupsFile; if $CheckgroupsFile;
### ---------------------------------------------- my $DBQuery;
### get groups data (number of postings per group) if (!$MID) {
# get groups data from raw table for given month ### ----------------------------------------------
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ". ### get groups data (number of postings per group)
"WHERE day LIKE ? AND NOT disregard", # get groups data from raw table for given month
$DBRaw)); $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
$DBQuery->execute($Month.'-%') "WHERE day LIKE ? AND NOT disregard",
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ". $DBRaw));
"$DBI::errstr\n",$Month, $DBQuery->execute($Month.'-%')
$DBRaw)); or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$Month,
$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get groups data for %s from %s: ".
"$DBI::errstr\n",$MID,
$DBRaw));
}
# count postings per group # count postings per group
my %Postings; my %Postings;
@ -232,22 +250,30 @@ sub HostStats {
### $DBRaw : database table for raw data (to read from) ### $DBRaw : database table for raw data (to read from)
### $DBHosts : database table for hosts data (to write to) ### $DBHosts : database table for hosts data (to write to)
### $Month : current month to do ### $Month : current month to do
### $MID : specific Message-ID to fetch (testing purposes)
### $Test : test mode ### $Test : test mode
### $Debug : debug mode ### $Debug : debug mode
### @KnownHosts : list of known hosts with subdomains ### @KnownHosts : list of known hosts with subdomains
### OUT: (nothing) ### OUT: (nothing)
my ($DBHandle,$DBRaw,$DBHosts,$Month,$Test,$Debug,@KnownHosts) = @_; my ($DBHandle,$DBRaw,$DBHosts,$Month,$MID,$Test,$Debug,@KnownHosts) = @_;
# define known hosts using subdomains my (%Postings,$DBQuery);
my %Postings;
# get raw header data from raw table for given month if (!$MID) {
my $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ". # get raw header data from raw table for given month
"WHERE day LIKE ? AND NOT disregard", $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
$DBRaw)); "WHERE day LIKE ? AND NOT disregard",
$DBQuery->execute($Month.'-%') $DBRaw));
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ". $DBQuery->execute($Month.'-%')
"$DBI::errstr\n",$Month,$DBRaw)); or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,$DBRaw));
} else {
$DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
"WHERE mid = ?", $DBRaw));
$DBQuery->execute($MID)
or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
"$DBI::errstr\n",$MID,$DBRaw));
}
### ---------------------------------------------- ### ----------------------------------------------
print "----- HostStats -----\n" if $Debug; print "----- HostStats -----\n" if $Debug;
@ -319,7 +345,7 @@ sub HostStats {
&Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host; &Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
} }
# !DEBUG! printf("%s: %s\n", $Header{'message-id'}, $Host); printf("%s: %s\n", $Header{'message-id'}, $Host) if $MID;
}; };
# delete old data for that month # delete old data for that month