1 # $Id: Stats.pm,v 1.7 2007-10-31 16:42:13 mike Exp $
3 package ZOOM::IRSpy::Stats;
10 use ZOOM::IRSpy::Utils qw(irspy_xpath_context);
14 ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets
18 $stats = new ZOOM::IRSpy::Stats($dbname);
23 Provides a simple API to obtaining statistics about targets registered
24 in IRSpy. This is done just by creating a Stats object. Once this
25 object is made, it can be crudely printed using the built-in debugging
26 C<print()> method, or the application can walk the structure to
33 $stats = new ZOOM::IRSpy::Stats($dbname, "dc.creator=wedel");
35 $stats = new ZOOM::IRSpy::Stats($dbname,
36 new ZOOM::Query::PQF('@attr 1=1003 wedel');
38 $spy = new ZOOM::Connection("target/string/for/irspy/database");
39 $stats = new ZOOM::IRSpy::Stats($spy, $query);
41 Creates a new C<ZOOM::IRSpy::Stats> object and populates it with
42 statistics for the targets in the nominated database. This process
43 involves analysing the nominated IRSpy database at some length, and
44 which therefore takes some time
46 Either one or two arguments are required:
50 =item $conn (mandatory)
52 An indication of the IRSpy database that statistics are required for.
53 This may be in the form of a C<ZOOM::Connection> object or a
54 database-name string such as C<localhost:8018/IR-Explain---1>.
56 =item $query (optional)
58 The query with which to select a subset of the database to be
59 analysed. This may be in the form of a C<ZOOM::Query> object (using
60 any of the supported subclasses) or a CQL string. If this is omitted,
61 then all records in the database are included in the generated
70 my($conn, $query) = @_;
72 $query ||= "cql.allRecords=1",
73 $conn = new ZOOM::Connection($conn) if !ref $conn;
74 $query = new ZOOM::Query::CQL($query) if !ref $query;
76 my $oldSyntax = $conn->option("preferredRecordSyntax");
77 my $oldESN = $conn->option("elementSetName");
78 my $oldPC = $conn->option("presentChunk");
79 $conn->option(preferredRecordSyntax => "xml");
80 $conn->option(elementSetName => "zeerex");
81 # $conn->option(presentChunk => 10);
83 my $rs = $conn->search($query);
87 host => $conn->option("host"),
94 $this->_gather_stats();
95 $conn->option(preferredRecordSyntax => $oldSyntax);
96 $conn->option(elementSetName => $oldESN);
97 $conn->option(presentChunk => $oldPC);
106 foreach my $i (0 .. $this->{n}-1) {
107 my $rec = $this->{rs}->record($i);
108 my $xc = irspy_xpath_context($rec);
110 # The ten most commonly supported Bib-1 Use attributes
111 foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) {
112 $this->{bib1AccessPoints}->{$node->findvalue(".")}++;
115 # Record syntax support by database
116 foreach my $node ($xc->findnodes('e:recordInfo/e:recordSyntax/@name')) {
117 $this->{recordSyntaxes}->{$node->findvalue(".")}++;
121 foreach my $node ($xc->findnodes('i:status/i:explain[@ok="1"]/@category')) {
122 $this->{explain}->{$node->findvalue(".")}++;
125 # Z39.50 Protocol Services Support
126 foreach my $node ($xc->findnodes('e:configInfo/e:supports')) {
127 my $supports = $node->findvalue('@type');
128 if ($node->findvalue(".") && $supports =~ s/^z3950_//) {
129 $this->{z3950_init_opt}->{$supports}++;
133 # Z39.50 Server Atlas
134 ### TODO -- awkward, should be considered an enhancement
137 my $host = $xc->findvalue('e:serverInfo/e:host');
139 $this->{domains}->{$host}++;
142 foreach my $node ($xc->findnodes('i:status/i:serverImplementationName/@value')) {
143 $this->{implementation}->{$node->findvalue(".")}++;
144 last; # This is because many of the records are still
145 # polluted with multiple implementationName elements
146 # from back then XSLT stylesheet that generated
147 # ZeeRex records was wrong.
157 Prints an ugly but human-readable summary of the statistics on
165 print "database = '", $this->{conn}->option("host"), "'\n";
166 print "query = '", $this->{query}, "'\n";
167 print "result set = '", $this->{rs}, "'\n";
168 print "count = '", $this->{n}, "'\n";
171 print "\nTOP 10 BIB-1 ATTRIBUTES\n";
172 $hr = $this->{bib1AccessPoints};
173 foreach my $key ((sort { $hr->{$b} <=> $hr->{$a}
174 || $a <=> $b } keys %$hr)[0..9]) {
175 print sprintf("%6d%20s%5d (%d%%)\n",
176 $key, "", $hr->{$key}, 100*$hr->{$key}/$this->{n});
179 print "\nRECORD SYNTAXES\n";
180 $hr = $this->{recordSyntaxes};
181 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
182 || $a cmp $b } keys %$hr) {
183 print sprintf("%-26s%5d (%d%%)\n",
184 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
187 print "\nEXPLAIN SUPPORT\n";
188 $hr = $this->{explain};
189 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
190 || $a cmp $b } keys %$hr) {
191 print sprintf("%-26s%5d (%d%%)\n",
192 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
195 print "\nZ39.50 PROTOCOL SERVICES SUPPORT\n";
196 $hr = $this->{z3950_init_opt};
197 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
198 || $a cmp $b } keys %$hr) {
199 print sprintf("%-26s%5d (%d%%)\n",
200 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
203 print "\nTOP-LEVEL DOMAINS\n";
204 $hr = $this->{domains};
205 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
206 || $a cmp $b } keys %$hr) {
207 print sprintf("%-26s%5d (%d%%)\n",
208 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
211 print "\nIMPLEMENTATIONS\n";
212 $hr = $this->{implementation};
213 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
214 || $a cmp $b } keys %$hr) {
215 print sprintf("%-26s%5d (%d%%)\n",
216 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
227 Mike Taylor, E<lt>mike@indexdata.comE<gt>
229 =head1 COPYRIGHT AND LICENSE
231 Copyright (C) 2006 by Index Data ApS.
233 This library is free software; you can redistribute it and/or modify
234 it under the same terms as Perl itself, either Perl version 5.8.7 or,
235 at your option, any later version of Perl 5 you may have available.