-# $Id: Stats.pm,v 1.1 2006-12-14 17:34:57 mike Exp $
package ZOOM::IRSpy::Stats;
use strict;
use warnings;
+use Scalar::Util;
+use ZOOM::IRSpy::Utils qw(irspy_xpath_context);
+
=head1 NAME
ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets
=head1 SYNOPSIS
$stats = new ZOOM::IRSpy::Stats($dbname);
- use Data::Dumper; print Dumper($stats);
+ $stats->print();
=head1 DESCRIPTION
Provides a simple API to obtaining statistics about targets registered
in IRSpy. This is done just by creating a Stats object. Once this
-object is made, it can be crudely dumped, or the application can walk
-the structure to produce nice output.
+object is made, it can be crudely printed using the built-in debugging
+C<print()> method, or the application can walk the structure to
+produce nice output.
=head1 METHODS
$stats = new ZOOM::IRSpy::Stats($dbname,
new ZOOM::Query::PQF('@attr 1=1003 wedel');
# Or:
- $spy = new ZOOM::IRSpy("target/string/for/irspy/database");
+ $spy = new ZOOM::Connection("target/string/for/irspy/database");
$stats = new ZOOM::IRSpy::Stats($spy, $query);
Creates a new C<ZOOM::IRSpy::Stats> object and populates it with
=over 4
-=item $irspy (mandatory)
+=item $conn (mandatory)
An indication of the IRSpy database that statistics are required for.
-This may be in the form of a C<ZOOM::IRSpy> object or a database-name
-string such as C<localhost:3313/IR-Explain---1>.
+This may be in the form of a C<ZOOM::Connection> object or a
+database-name string such as C<localhost:8018/IR-Explain---1>.
=item $query (optional)
sub new {
my $class = shift();
- my($irspy, $query) = @_;
-
- return bless {
- irspy => $irspy,
- query => $query || "cql.allRecords=1",
+ my($conn, $query) = @_;
+
+ $query ||= "cql.allRecords=1",
+ $conn = new ZOOM::Connection($conn) if !ref $conn;
+ $query = new ZOOM::Query::CQL($query) if !ref $query;
+
+ my $oldSyntax = $conn->option("preferredRecordSyntax");
+ my $oldESN = $conn->option("elementSetName");
+ my $oldPC = $conn->option("presentChunk");
+ $conn->option(preferredRecordSyntax => "xml");
+ $conn->option(elementSetName => "zeerex");
+# $conn->option(presentChunk => 10);
+
+ my $rs = $conn->search($query);
+ my $n = $rs->size();
+
+ my $this = bless {
+ host => $conn->option("host"),
+ conn => $conn,
+ query => $query,
+ rs => $rs,
+ n => $n,
}, $class;
+
+ $this->_gather_stats();
+ $conn->option(preferredRecordSyntax => $oldSyntax);
+ $conn->option(elementSetName => $oldESN);
+ $conn->option(presentChunk => $oldPC);
+
+ return $this;
+}
+
+
+sub _gather_stats {
+ my $this = shift();
+
+ foreach my $i (0 .. $this->{n}-1) {
+ my $rec = $this->{rs}->record($i);
+ my $xc = irspy_xpath_context($rec);
+
+ # The ten most commonly supported Bib-1 Use attributes
+ foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) {
+ $this->{bib1AccessPoints}->{$node->findvalue(".")}++;
+ }
+
+ # Record syntax support by database
+ foreach my $node ($xc->findnodes('e:recordInfo/e:recordSyntax/@name')) {
+ $this->{recordSyntaxes}->{lc($node->findvalue("."))}++;
+ }
+
+ # Explain support
+ foreach my $node ($xc->findnodes('i:status/i:explain[@ok="1"]/@category')) {
+ $this->{explain}->{$node->findvalue(".")}++;
+ }
+
+ # Z39.50 Protocol Services Support
+ foreach my $node ($xc->findnodes('e:configInfo/e:supports')) {
+ my $supports = $node->findvalue('@type');
+ if ($node->findvalue(".") && $supports =~ s/^z3950_//) {
+ $this->{z3950_init_opt}->{$supports}++;
+ }
+ }
+
+ # Z39.50 Server Atlas
+ ### TODO -- awkward, should be considered an enhancement
+
+ # Top Domains
+ my $host = $xc->findvalue('e:serverInfo/e:host');
+ $host =~ s/.*\.//;
+ $this->{domains}->{lc($host)}++;
+
+ # Implementation
+ foreach my $node ($xc->findnodes('i:status/i:serverImplementationName/@value')) {
+ $this->{implementation}->{$node->findvalue(".")}++;
+ last; # This is because many of the records are still
+ # polluted with multiple implementationName elements
+ # from back then XSLT stylesheet that generated
+ # ZeeRex records was wrong.
+ }
+ }
+}
+
+
+=head2 print()
+
+ $stats->print();
+
+Prints an ugly but human-readable summary of the statistics on
+standard output.
+
+=cut
+
+sub print {
+ my $this = shift();
+
+ print "database = '", $this->{conn}->option("host"), "'\n";
+ print "query = '", $this->{query}, "'\n";
+ print "result set = '", $this->{rs}, "'\n";
+ print "count = '", $this->{n}, "'\n";
+ my $hr;
+
+ print "\nTOP 10 BIB-1 ATTRIBUTES\n";
+ $hr = $this->{bib1AccessPoints};
+ foreach my $key ((sort { $hr->{$b} <=> $hr->{$a}
+ || $a <=> $b } keys %$hr)[0..9]) {
+ print sprintf("%6d%20s%5d (%d%%)\n",
+ $key, "", $hr->{$key}, 100*$hr->{$key}/$this->{n});
+ }
+
+ print "\nRECORD SYNTAXES\n";
+ $hr = $this->{recordSyntaxes};
+ foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
+ || $a cmp $b } keys %$hr) {
+ print sprintf("%-26s%5d (%d%%)\n",
+ $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
+ }
+
+ print "\nEXPLAIN SUPPORT\n";
+ $hr = $this->{explain};
+ foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
+ || $a cmp $b } keys %$hr) {
+ print sprintf("%-26s%5d (%d%%)\n",
+ $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
+ }
+
+ print "\nZ39.50 PROTOCOL SERVICES SUPPORT\n";
+ $hr = $this->{z3950_init_opt};
+ foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
+ || $a cmp $b } keys %$hr) {
+ print sprintf("%-26s%5d (%d%%)\n",
+ $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
+ }
+
+ print "\nTOP-LEVEL DOMAINS\n";
+ $hr = $this->{domains};
+ foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
+ || $a cmp $b } keys %$hr) {
+ print sprintf("%-26s%5d (%d%%)\n",
+ $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
+ }
+
+ print "\nIMPLEMENTATIONS\n";
+ $hr = $this->{implementation};
+ foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
+ || $a cmp $b } keys %$hr) {
+ print sprintf("%-26s%5d (%d%%)\n",
+ $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
+ }
}