-# $Id: Ping.pm,v 1.20 2007-02-23 15:03:44 mike Exp $
+# $Id: Ping.pm,v 1.23 2007-04-30 11:26:57 mike Exp $
# See the "Main" test package for documentation
use ZOOM::IRSpy::Utils qw(isodate);
+use Text::Iconv;
+my $conv = new Text::Iconv("LATIN1", "UTF8");
+
sub start {
my $class = shift();
foreach my $opt (qw(serverImplementationId
serverImplementationName
serverImplementationVersion)) {
- $conn->record()->store_result($opt, value => $conn->option($opt));
+ my $val = $conn->option($opt);
+ next if !defined $val; # not defined for SRU, for example
+
+ # There doesn't seem to be a reliable way to tell what
+ # character set the server uses for these. At least one
+ # server (z3950.bcl.jcyl.es:210/AbsysCCFL) returns an ISO
+ # 8859-1 string containing an o-acute, which breaks the
+ # XML parser if we just insert it naively. It seems
+ # reasonable, though, to guess that the great majority of
+ # servers will use ASCII, Latin-1 or Unicode. The first
+ # of these is a subset of the second, so that brings it to
+ # down to two. The strategy is simply this: assume it's
+ # ASCII-Latin-1, and try to convert to UTF-8. If that
+ # conversion works, fine; if not, assume it's because the
+ # string was already UTF-8, so use it as is.
+ Text::Iconv->raise_error(1);
+ my $maybe;
+ eval {
+ $maybe = $conv->convert($val);
+ }; if (!$@ && $maybe ne $val) {
+ $conn->log("irspy", "converted '$val' from Latin-1 to UTF-8");
+ $val = $maybe;
+ }
+ $conn->record()->store_result($opt, value => $val);
}
}