#! /usr/bin/perl -W # # groupstats.pl # # This script will get statistical data on newgroup usage # form a database. # # It is part of the NewsStats package. # # Copyright (c) 2010 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. BEGIN { our $VERSION = "0.01"; use File::Basename; push(@INC, dirname($0)); } use strict; use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); use DBI; ################################# Definitions ################################## # ... ################################# Main program ################################# ### read commandline options my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); ### read configuration my %Conf = %{ReadConfig('newsstats.conf')}; ### override configuration via commandline options my %ConfOverride; $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; &OverrideConfig(\%Conf,\%ConfOverride); ### check for incompatible command line options # you can't mix '-t', '-b' and '-l' # -b/-l take preference over -t, and -b takes preference over -l if ($Options{'b'} or $Options{'l'}) { if ($Options{'t'}) { # drop -t warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); undef($Options{'t'}); }; if ($Options{'b'} and $Options{'l'}) { # drop -l warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); undef($Options{'l'}); }; # -q/-d don't work with -b or -l warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; }; ### check output type # default output type to 'dump' $Options{'o'} = 'dump' if !$Options{'o'}; # fail if more than one newsgroup is combined with 'dumpgroup' type die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); # accept 'dumpgroup' only with -n if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { $Options{'o'} = 'dump'; warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); }; # set output type to 'pretty' for -l if ($Options{'l'}) { $Options{'o'} = 'pretty'; warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); }; ### get query type, default to 'postings' #die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}})); #die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'}); ### get time period my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); # reset to one month for 'dump' type if ($Options{'o'} eq 'dump' and $Options{'p'}) { $StartMonth = $EndMonth; warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n"); }; ### init database my $DBHandle = InitDB(\%Conf,1); ### get data # get list of newsgroups (-n) my ($QueryPart,@GroupList); my $Newsgroups = $Options{'n'}; if ($Newsgroups) { ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups); } else { $QueryPart = 1; }; # manage thresholds if (defined($Options{'t'})) { if ($Options{'i'}) { $QueryPart .= ' AND postings < ?'; } else { $QueryPart .= ' AND postings > ?'; }; push @GroupList,$Options{'t'}; } # construct WHERE clause my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'})); # get lenght of longest newsgroup delivered by query for formatting purposes my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList); my ($OrderClause,$DBQuery); # -b (best of) defined? if (!defined($Options{'b'}) and !defined($Options{'l'})) { $OrderClause = 'newsgroup'; $OrderClause = 'postings' if $Options{'q'}; $OrderClause .= ' DESC' if $Options{'d'}; # do query: get number of postings per group from groups table for given months and newsgroups $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); } elsif ($Options{'b'}) { # set sorting order (-i) if ($Options{'i'}) { $OrderClause = 'postings'; } else { $OrderClause = 'postings DESC'; }; # push LIMIT to GroupList to match number of binding vars push @GroupList,$Options{'b'}; # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); } else { # -l # set sorting order (-i) if ($Options{'i'}) { $OrderClause = '<'; } else { $OrderClause = '>'; }; # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars push @GroupList,$Options{'l'}; push @GroupList,$StartMonth,$EndMonth; # do query: get number of postings per group from groups table for given months and $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause)); }; # execute query $DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); # output result printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; if (!defined($Options{'b'}) and !defined($Options{'l'})) { &OutputData($Options{'o'},$DBQuery,$MaxLength); } elsif ($Options{'b'}) { while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); }; } else { # -l while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); }; }; ### close handles $DBHandle->disconnect;