1 # $Id: Stats.pm,v 1.7 2007-10-31 16:42:13 mike Exp $
3 package ZOOM::IRSpy::Stats;
8 use ZOOM::IRSpy::Utils qw(irspy_xpath_context);
12 ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets
16 $stats = new ZOOM::IRSpy::Stats($dbname);
21 Provides a simple API to obtaining statistics about targets registered
22 in IRSpy. This is done just by creating a Stats object. Once this
23 object is made, it can be crudely printed using the built-in debugging
24 C<print()> method, or the application can walk the structure to
31 $stats = new ZOOM::IRSpy::Stats($dbname, "dc.creator=wedel");
33 $stats = new ZOOM::IRSpy::Stats($dbname,
34 new ZOOM::Query::PQF('@attr 1=1003 wedel');
36 $spy = new ZOOM::Connection("target/string/for/irspy/database");
37 $stats = new ZOOM::IRSpy::Stats($spy, $query);
39 Creates a new C<ZOOM::IRSpy::Stats> object and populates it with
40 statistics for the targets in the nominated database. This process
41 involves analysing the nominated IRSpy database at some length, and
42 which therefore takes some time
44 Either one or two arguments are required:
48 =item $conn (mandatory)
50 An indication of the IRSpy database that statistics are required for.
51 This may be in the form of a C<ZOOM::Connection> object or a
52 database-name string such as C<localhost:8018/IR-Explain---1>.
54 =item $query (optional)
56 The query with which to select a subset of the database to be
57 analysed. This may be in the form of a C<ZOOM::Query> object (using
58 any of the supported subclasses) or a CQL string. If this is omitted,
59 then all records in the database are included in the generated
68 my($conn, $query) = @_;
70 $query ||= "cql.allRecords=1",
71 $conn = new ZOOM::Connection($conn) if !ref $conn;
72 $query = new ZOOM::Query::CQL($query) if !ref $query;
74 my $oldSyntax = $conn->option("preferredRecordSyntax");
75 my $oldESN = $conn->option("elementSetName");
76 my $oldPC = $conn->option("presentChunk");
77 $conn->option(preferredRecordSyntax => "xml");
78 $conn->option(elementSetName => "zeerex");
79 # $conn->option(presentChunk => 10);
81 my $rs = $conn->search($query);
85 host => $conn->option("host"),
92 $this->_gather_stats();
93 $conn->option(preferredRecordSyntax => $oldSyntax);
94 $conn->option(elementSetName => $oldESN);
95 $conn->option(presentChunk => $oldPC);
104 foreach my $i (0 .. $this->{n}-1) {
105 my $rec = $this->{rs}->record($i);
106 my $xc = irspy_xpath_context($rec);
108 # The ten most commonly supported Bib-1 Use attributes
109 foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) {
110 $this->{bib1AccessPoints}->{$node->findvalue(".")}++;
113 # Record syntax support by database
114 foreach my $node ($xc->findnodes('e:recordInfo/e:recordSyntax/@name')) {
115 $this->{recordSyntaxes}->{$node->findvalue(".")}++;
119 foreach my $node ($xc->findnodes('i:status/i:explain[@ok="1"]/@category')) {
120 $this->{explain}->{$node->findvalue(".")}++;
123 # Z39.50 Protocol Services Support
124 foreach my $node ($xc->findnodes('e:configInfo/e:supports')) {
125 my $supports = $node->findvalue('@type');
126 if ($node->findvalue(".") && $supports =~ s/^z3950_//) {
127 $this->{z3950_init_opt}->{$supports}++;
131 # Z39.50 Server Atlas
132 ### TODO -- awkward, should be considered an enhancement
135 my $host = $xc->findvalue('e:serverInfo/e:host');
137 $this->{domains}->{$host}++;
140 foreach my $node ($xc->findnodes('i:status/i:serverImplementationName/@value')) {
141 $this->{implementation}->{$node->findvalue(".")}++;
142 last; # This is because many of the records are still
143 # polluted with multiple implementationName elements
144 # from back then XSLT stylesheet that generated
145 # ZeeRex records was wrong.
155 Prints an ugly but human-readable summary of the statistics on
163 print "database = '", $this->{conn}->option("host"), "'\n";
164 print "query = '", $this->{query}, "'\n";
165 print "result set = '", $this->{rs}, "'\n";
166 print "count = '", $this->{n}, "'\n";
169 print "\nTOP 10 BIB-1 ATTRIBUTES\n";
170 $hr = $this->{bib1AccessPoints};
171 foreach my $key ((sort { $hr->{$b} <=> $hr->{$a}
172 || $a <=> $b } keys %$hr)[0..9]) {
173 print sprintf("%6d%20s%5d (%d%%)\n",
174 $key, "", $hr->{$key}, 100*$hr->{$key}/$this->{n});
177 print "\nRECORD SYNTAXES\n";
178 $hr = $this->{recordSyntaxes};
179 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
180 || $a cmp $b } keys %$hr) {
181 print sprintf("%-26s%5d (%d%%)\n",
182 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
185 print "\nEXPLAIN SUPPORT\n";
186 $hr = $this->{explain};
187 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
188 || $a cmp $b } keys %$hr) {
189 print sprintf("%-26s%5d (%d%%)\n",
190 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
193 print "\nZ39.50 PROTOCOL SERVICES SUPPORT\n";
194 $hr = $this->{z3950_init_opt};
195 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
196 || $a cmp $b } keys %$hr) {
197 print sprintf("%-26s%5d (%d%%)\n",
198 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
201 print "\nTOP-LEVEL DOMAINS\n";
202 $hr = $this->{domains};
203 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
204 || $a cmp $b } keys %$hr) {
205 print sprintf("%-26s%5d (%d%%)\n",
206 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
209 print "\nIMPLEMENTATIONS\n";
210 $hr = $this->{implementation};
211 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
212 || $a cmp $b } keys %$hr) {
213 print sprintf("%-26s%5d (%d%%)\n",
214 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
225 Mike Taylor, E<lt>mike@indexdata.comE<gt>
227 =head1 COPYRIGHT AND LICENSE
229 Copyright (C) 2006 by Index Data ApS.
231 This library is free software; you can redistribute it and/or modify
232 it under the same terms as Perl itself, either Perl version 5.8.7 or,
233 at your option, any later version of Perl 5 you may have available.