First version of HostStats.
Signed-off-by: Thomas Hochstein <thh@thh.name>
This commit is contained in:
		
							parent
							
								
									73a2d70f16
								
							
						
					
					
						commit
						9ccb915d77
					
				
					 1 changed files with 132 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -20,7 +20,7 @@ BEGIN {
 | 
			
		|||
use strict;
 | 
			
		||||
use warnings;
 | 
			
		||||
 | 
			
		||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
 | 
			
		||||
use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList ParseHeaders);
 | 
			
		||||
 | 
			
		||||
use DBI;
 | 
			
		||||
use Getopt::Long qw(GetOptions);
 | 
			
		||||
| 
						 | 
				
			
			@ -31,7 +31,7 @@ Getopt::Long::config ('bundling');
 | 
			
		|||
# define types of information that can be gathered
 | 
			
		||||
# all / groups (/ clients / hosts)
 | 
			
		||||
my %LegalStats;
 | 
			
		||||
@LegalStats{('all','groups')} = ();
 | 
			
		||||
@LegalStats{('all','groups','hosts')} = ();
 | 
			
		||||
 | 
			
		||||
################################# Main program #################################
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -110,6 +110,7 @@ if ($Conf{'TLH'}) {
 | 
			
		|||
my $DBHandle = InitDB(\%Conf,1);
 | 
			
		||||
my $DBRaw    = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
 | 
			
		||||
my $DBGrps   = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
 | 
			
		||||
my $DBHosts  = sprintf('%s.%s',$Conf{'DBDatabase'},$Conf{'DBTableHosts'});
 | 
			
		||||
 | 
			
		||||
### get data for each month
 | 
			
		||||
&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
 | 
			
		||||
| 
						 | 
				
			
			@ -121,6 +122,15 @@ foreach my $Month (&ListMonth($Period)) {
 | 
			
		|||
  if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
 | 
			
		||||
    &GroupStats($DBHandle,$DBRaw,$DBGrps,$Month,$TLH,$OptCheckgroupsFile,$OptTest,$OptDebug);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  ### HostStats
 | 
			
		||||
  if ($OptStatsType eq 'all' or $OptStatsType eq 'hosts') {
 | 
			
		||||
    # define known hosts using subdomains
 | 
			
		||||
    my @KnownHosts = qw(aioe.org arcor-online.net arcor-ip.de news.astraweb.com read.cnntp.org easynews.com eternal-september.org
 | 
			
		||||
                        fernuni-hagen.de free.fr newsread.freenet.ag googlegroups.com
 | 
			
		||||
                        news.neostrada.pl newsdawg.com newscene.com news-service.com octanews.com xsnews.nl news.xs4all.nl);
 | 
			
		||||
    &HostStats($DBHandle,$DBRaw,$DBHosts,$Month,$OptTest,$OptDebug,@KnownHosts);
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
### close handles
 | 
			
		||||
| 
						 | 
				
			
			@ -215,6 +225,126 @@ sub GroupStats {
 | 
			
		|||
};
 | 
			
		||||
### ----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
sub HostStats {
 | 
			
		||||
### ----------------------------------------------------------------------------
 | 
			
		||||
### collect number of postings per server
 | 
			
		||||
### IN : $DBHandle         : database handle
 | 
			
		||||
###      $DBRaw            : database table for raw data (to read from)
 | 
			
		||||
###      $DBHosts          : database table for hosts data (to write to)
 | 
			
		||||
###      $Month            : current month to do
 | 
			
		||||
###      $Test             : test mode
 | 
			
		||||
###      $Debug            : debug mode
 | 
			
		||||
###      @KnownHosts       : list of known hosts with subdomains
 | 
			
		||||
### OUT: (nothing)
 | 
			
		||||
  my ($DBHandle,$DBRaw,$DBHosts,$Month,$Test,$Debug,@KnownHosts) = @_;
 | 
			
		||||
 | 
			
		||||
  # define known hosts using subdomains
 | 
			
		||||
  my %Postings;
 | 
			
		||||
 | 
			
		||||
  # get raw header data from raw table for given month
 | 
			
		||||
  my $DBQuery = $DBHandle->prepare(sprintf("SELECT headers FROM %s ".
 | 
			
		||||
                                           "WHERE day LIKE ? AND NOT disregard",
 | 
			
		||||
                                           $DBRaw));
 | 
			
		||||
  $DBQuery->execute($Month.'-%')
 | 
			
		||||
    or &Bleat(2,sprintf("Can't get hosts data for %s from %s.%s: ".
 | 
			
		||||
                        "$DBI::errstr\n",$Month,$DBRaw));
 | 
			
		||||
 | 
			
		||||
  ### ----------------------------------------------
 | 
			
		||||
  print "----- HostStats -----\n" if $Debug;
 | 
			
		||||
  ### parse headers
 | 
			
		||||
  while (($_) = $DBQuery->fetchrow_array) {
 | 
			
		||||
    my $Host;
 | 
			
		||||
    my %Header = ParseHeaders(split(/\n/,$_));
 | 
			
		||||
 | 
			
		||||
    # ([a-z0-9-_]+\.[a-z0-9-_.]+) tries to match a hostname
 | 
			
		||||
    # Injection-Info
 | 
			
		||||
    if($Header{'injection-info'}) {
 | 
			
		||||
      ($Host) = $Header{'injection-info'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+);/i;
 | 
			
		||||
      # reset if IP address
 | 
			
		||||
      undef($Host) if $Host && $Host !~ /[g-z]/i;
 | 
			
		||||
    }
 | 
			
		||||
    # X-Trace
 | 
			
		||||
    if (!$Host && $Header{'x-trace'}) {
 | 
			
		||||
      (undef, $Host) = $Header{'x-trace'} =~ /^(\s|\d)*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
 | 
			
		||||
      # reset if IP address
 | 
			
		||||
      undef($Host) if $Host && $Host !~ /[g-z]/i;
 | 
			
		||||
    }
 | 
			
		||||
    # NNTP-Posting-Host
 | 
			
		||||
    if (!$Host && $Header{'nntp-posting-host'}) {
 | 
			
		||||
      ($Host) = $Header{'nntp-posting-host'} =~ /^\s*([a-z0-9-_]+\.[a-z0-9-_.]+)/i;
 | 
			
		||||
      # reset if IP address
 | 
			
		||||
      undef($Host) if $Host && $Host !~ /[g-z]/i;
 | 
			
		||||
    }
 | 
			
		||||
    # Path
 | 
			
		||||
    if (!$Host) {
 | 
			
		||||
      if ($Header{'path'} =~ /!([^!]+)!.POSTED!/) {
 | 
			
		||||
        $Host = "$1";
 | 
			
		||||
      } elsif ($Header{'path'} =~ /!.POSTED.([^!]+)!/) {
 | 
			
		||||
        $Host = "$1";
 | 
			
		||||
      } else {
 | 
			
		||||
        # iterate on the Path: header until we have a host name or no more
 | 
			
		||||
        # path elements
 | 
			
		||||
        while (!$Host && $Header{'path'} =~ /!/) {
 | 
			
		||||
          ($Host) = $Header{'path'} =~ /!?([a-z0-9-_]+\.[a-z0-9-_.]+)![^!]+$/i;
 | 
			
		||||
          undef($Host) if $Host && $Host =~ /\.MISMATCH/;
 | 
			
		||||
          # remove last path element
 | 
			
		||||
          $Header{'path'} =~ s/![^!]+$//;
 | 
			
		||||
        };
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # special cases
 | 
			
		||||
    $Host = 'news.highwinds-media.com' if $Host =~ /fx\d\d\.\S{3}\.POSTED/
 | 
			
		||||
                                       or $Host =~ /newsfe\d+\.(iad|ams2)/;
 | 
			
		||||
    $Host = 'newshosting.com' if $Host =~ /post\d*\.iad/;
 | 
			
		||||
 | 
			
		||||
    # trailing .POSTED
 | 
			
		||||
    ($Host) = $Host =~ /(\S+)\.POSTED$/ if $Host =~ /\.POSTED$/;
 | 
			
		||||
 | 
			
		||||
    # normalize hosts
 | 
			
		||||
    foreach (@KnownHosts) {
 | 
			
		||||
      if ($Host =~ /\.$_$/) {
 | 
			
		||||
        ($Host) = $_ ;
 | 
			
		||||
        last;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # lowercase
 | 
			
		||||
    $Host = lc($Host);
 | 
			
		||||
 | 
			
		||||
    # count host
 | 
			
		||||
    if ($Host) {
 | 
			
		||||
      $Postings{$Host}++;
 | 
			
		||||
    } else {
 | 
			
		||||
      &Bleat(2,sprintf("%s FAILED", $Header{'message-id'})) if !$Host;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # !DEBUG! printf("%s: %s\n", $Header{'message-id'}, $Host);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  # delete old data for that month
 | 
			
		||||
  if (!$Test) {
 | 
			
		||||
    $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s WHERE month = ?",
 | 
			
		||||
                                     $DBHosts),undef,$Month)
 | 
			
		||||
      or &Bleat(2,sprintf("Can't delete old hosts data for %s from %s: ".
 | 
			
		||||
                          "$DBI::errstr\n",$Month,$DBHosts));
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  foreach my $Host (sort keys %Postings) {
 | 
			
		||||
    print "$Host => $Postings{$Host}\n" if $Debug;
 | 
			
		||||
    if (!$Test) {
 | 
			
		||||
      # write to database
 | 
			
		||||
      $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s ".
 | 
			
		||||
                                            "(month,host,postings) ".
 | 
			
		||||
                                            "VALUES (?, ?, ?)",$DBHosts));
 | 
			
		||||
      $DBQuery->execute($Month, $Host, $Postings{$Host})
 | 
			
		||||
        or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s: ".
 | 
			
		||||
                            "$DBI::errstr\n",$Month,$Host,$DBHosts));
 | 
			
		||||
      $DBQuery->finish;
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
__END__
 | 
			
		||||
 | 
			
		||||
################################ Documentation #################################
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue