1 # $Id: Session.pm,v 1.7 2003-02-28 20:11:20 pop Exp $
3 # Zebra perl API header
4 # =============================================================================
5 package IDZebra::Session;
12 use IDZebra::Logger qw(:flags :calls);
13 use IDZebra::Resultset;
16 our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
17 our @ISA = qw(IDZebra::Logger);
21 # -----------------------------------------------------------------------------
22 # Class constructors, destructor
23 # -----------------------------------------------------------------------------
25 my ($proto, %args) = @_;
26 my $class = ref($proto) || $proto;
28 $self->{args} = \%args;
30 bless ($self, $class);
31 $self->{cql_ct} = undef;
32 $self->{cql_mapfile} = "";
35 $self->{databases} = {};
39 my ($self, %args) = @_;
42 unless (defined($self->{zs})) {
43 if (defined($args{'configFile'})) {
44 $self->{zs} = IDZebra::start($args{'configFile'});
46 $self->{zs} = IDZebra::start("zebra.cfg");
53 if (defined($self->{zs})) {
54 IDZebra::stop($self->{zs}) if ($self->{zs});
61 my ($proto,%args) = @_;
64 if (ref($proto)) { $self = $proto; } else {
65 $self = $proto->new(%args);
69 %args = %{$self->{args}};
72 $self->start_service(%args);
74 unless (defined($self->{zs})) {
75 croak ("Falied to open zebra service");
78 unless (defined($self->{zh})) {
79 $self->{zh}=IDZebra::open($self->{zs});
82 # Reset result set counter
85 # This is needed in order to somehow initialize the service
86 $self->databases("Default");
88 # Load the default configuration
91 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
92 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
101 while (IDZebra::trans_no($self->{zh}) > 0) {
102 logf (LOG_WARN,"Explicitly closing transaction with session");
106 IDZebra::close($self->{zh});
110 if ($self->{odr_input}) {
111 IDZebra::odr_reset($self->{odr_input});
112 IDZebra::odr_destroy($self->{odr_input});
113 $self->{odr_input} = undef;
116 if ($self->{odr_output}) {
117 IDZebra::odr_reset($self->{odr_output});
118 IDZebra::odr_destroy($self->{odr_output});
119 $self->{odr_output} = undef;
127 logf (LOG_LOG,"DESTROY $self");
130 if (defined ($self->{cql_ct})) {
131 IDZebra::cql_transform_close($self->{cql_ct});
134 # -----------------------------------------------------------------------------
135 # Record group selection This is a bit nasty... but used at many places
136 # -----------------------------------------------------------------------------
138 my ($self,%args) = @_;
140 $self->{rg} = $self->_makeRecordGroup(%args);
141 $self->_selectRecordGroup($self->{rg});
146 sub selectRecordGroup {
147 my ($self, $groupName) = @_;
148 $self->{rg} = $self->_getRecordGroup($groupName);
149 $self->_selectRecordGroup($self->{rg});
152 sub _displayRecordGroup {
153 my ($self, $rg) = @_;
154 print STDERR "-----\n";
155 foreach my $key qw (groupName
166 print STDERR "$key:",$rg->{$key},"\n";
170 sub _cloneRecordGroup {
171 my ($self, $orig) = @_;
172 my $rg = IDZebra::recordGroup->new();
173 my $r = IDZebra::init_recordGroup($rg);
174 foreach my $key qw (groupName
186 $rg->{$key} = $orig->{$key} if ($orig->{$key});
191 sub _getRecordGroup {
192 my ($self, $groupName, $ext) = @_;
193 my $rg = IDZebra::recordGroup->new();
194 my $r = IDZebra::init_recordGroup($rg);
195 $rg->{groupName} = $groupName if ($groupName ne "");
196 $ext = "" unless ($ext);
197 $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
201 sub _makeRecordGroup {
202 my ($self, %args) = @_;
205 my @keys = keys(%args);
206 unless ($#keys >= 0) {
207 return ($self->{rg});
210 if ($args{groupName}) {
211 $rg = $self->_getRecordGroup($args{groupName});
213 $rg = $self->_cloneRecordGroup($self->{rg});
215 $self->_setRecordGroupOptions($rg, %args);
219 sub _setRecordGroupOptions {
220 my ($self, $rg, %args) = @_;
222 foreach my $key qw (databaseName
233 if (defined ($args{$key})) {
234 $rg->{$key} = $args{$key};
238 sub _selectRecordGroup {
239 my ($self, $rg) = @_;
240 my $r = IDZebra::set_group($self->{zh}, $rg);
242 unless ($dbName = $rg->{databaseName}) {
245 unless ($self->databases($dbName)) {
246 croak("Fatal error selecting database $dbName");
249 # -----------------------------------------------------------------------------
250 # Selecting databases for search (and also for updating - internally)
251 # -----------------------------------------------------------------------------
253 my ($self, @databases) = @_;
256 return (keys(%{$self->{databases}}));
262 foreach my $db (@databases) {
263 next if ($self->{databases}{$db});
268 foreach my $db (keys (%{$self->{databases}})) {
269 $changed++ unless ($tmp{$db});
274 delete ($self->{databases});
275 foreach my $db (@databases) {
276 $self->{databases}{$db}++;
279 if (IDZebra::select_databases($self->{zh},
283 "Could not select database(s) %s errCode=%d",
284 join(",",@databases),
288 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
291 return (keys(%{$self->{databases}}));
294 # -----------------------------------------------------------------------------
296 # -----------------------------------------------------------------------------
299 return(IDZebra::errCode($self->{zh}));
304 return(IDZebra::errString($self->{zh}));
309 return(IDZebra::errAdd($self->{zh}));
312 # -----------------------------------------------------------------------------
314 # -----------------------------------------------------------------------------
317 IDZebra::begin_trans($self->{zh});
322 my $stat = IDZebra::ZebraTransactionStatus->new();
323 IDZebra::end_trans($self->{zh}, $stat);
329 return(IDZebra::begin_read($self->{zh}));
334 IDZebra::end_read($self->{zh});
338 my ($self, $value) = @_;
339 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
340 return (IDZebra::get_shadow_enable($self->{zh}));
345 if ($self->shadow_enable) {
346 return(IDZebra::commit($self->{zh}));
350 # -----------------------------------------------------------------------------
351 # We don't really need that...
352 # -----------------------------------------------------------------------------
354 my ($self, $name) = @_;
355 if ($name !~/^(input|output)$/) {
356 croak("Undefined ODR '$name'");
358 IDZebra::odr_reset($self->{"odr_$name"});
361 # -----------------------------------------------------------------------------
363 # -----------------------------------------------------------------------------
366 return(IDZebra::init($self->{zh}));
371 return(IDZebra::compact($self->{zh}));
375 my ($self, %args) = @_;
376 my $rg = $self->_update_args(%args);
377 $self->_selectRecordGroup($rg);
379 IDZebra::repository_update($self->{zh});
380 $self->_selectRecordGroup($self->{rg});
385 my ($self, %args) = @_;
386 my $rg = $self->_update_args(%args);
387 $self->_selectRecordGroup($rg);
389 IDZebra::repository_delete($self->{zh});
390 $self->_selectRecordGroup($self->{rg});
395 my ($self, %args) = @_;
396 my $rg = $self->_update_args(%args);
397 $self->_selectRecordGroup($rg);
399 IDZebra::repository_show($self->{zh});
400 $self->_selectRecordGroup($self->{rg});
405 my ($self, %args) = @_;
406 my $rg = $self->_makeRecordGroup(%args);
407 $self->_selectRecordGroup($rg);
411 # -----------------------------------------------------------------------------
413 # -----------------------------------------------------------------------------
416 my ($self, %args) = @_;
417 return(IDZebra::update_record($self->{zh},
418 $self->_record_update_args(%args)));
422 my ($self, %args) = @_;
423 return(IDZebra::delete_record($self->{zh},
424 $self->_record_update_args(%args)));
426 sub _record_update_args {
427 my ($self, %args) = @_;
429 my $sysno = $args{sysno} ? $args{sysno} : 0;
430 my $match = $args{match} ? $args{match} : "";
431 my $rectype = $args{recordType} ? $args{recordType} : "";
432 my $fname = $args{file} ? $args{file} : "<no file>";
439 elsif ($args{file}) {
440 CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
441 $buff = join('',(<F>));
444 my $len = length($buff);
446 delete ($args{sysno});
447 delete ($args{match});
448 delete ($args{recordType});
449 delete ($args{file});
450 delete ($args{data});
452 my $rg = $self->_makeRecordGroup(%args);
454 # If no record type is given, then try to find it out from the
457 if (my ($ext) = $fname =~ /\.(\w+)$/) {
458 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
459 $rectype = $rg2->{recordType};
463 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
465 # print STDERR "$rectype,$sysno,$match,$fname,$len\n";
469 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
472 # -----------------------------------------------------------------------------
475 my ($self,$mapfile) = @_;
477 if ($self->{cql_mapfile} ne $mapfile) {
478 unless (-f $mapfile) {
479 croak("Cannot find $mapfile");
481 if (defined ($self->{cql_ct})) {
482 IDZebra::cql_transform_close($self->{cql_ct});
484 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
485 $self->{cql_mapfile} = $mapfile;
488 return ($self->{cql_mapfile});
492 my ($self, $cqlquery) = @_;
493 unless (defined($self->{cql_ct})) {
494 croak("CQL map file is not specified yet.");
496 my $res = "\0" x 2048;
497 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
498 unless ($r) {return (undef)};
504 # -----------------------------------------------------------------------------
506 # -----------------------------------------------------------------------------
508 my ($self, %args) = @_;
510 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
517 unless ($query = $self->cql2pqf($args{cql})) {
518 croak ("Invalid CQL query: '$args{cql}'");
522 croak ("No query given to search");
527 if ($args{databases}) {
528 @origdbs = $self->databases;
529 $self->databases(@{$args{databases}});
532 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
534 my $rs = $self->_search_pqf($query, $rsname);
536 if ($args{databases}) {
537 $self->databases(@origdbs);
545 return ("set_".$self->{rscount}++);
549 my ($self, $query, $setname) = @_;
551 my $hits = IDZebra::search_PQF($self->{zh},
557 my $rs = IDZebra::Resultset->new($self,
559 recordCount => $hits,
560 errCode => $self->errCode,
561 errString => $self->errString);
565 # -----------------------------------------------------------------------------
568 # Sorting of multiple result sets is not supported by zebra...
569 # -----------------------------------------------------------------------------
572 my ($self, $sortspec, $setname, @sets) = @_;
576 foreach my $rs (@sets) {
577 push (@setnames, $rs->{name});
578 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
582 my $status = IDZebra::sort($self->{zh},
588 my $errCode = $self->errCode;
589 my $errString = $self->errString;
591 if ($status || $errCode) {$count = 0;}
593 my $rs = IDZebra::Resultset->new($self,
595 recordCount => $count,
597 errString => $errString);
602 # ============================================================================
609 IDZebra::Session - A Zebra database server session for update and retrieval
613 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
616 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
617 groupName => 'demo1');
619 $sess->group(groupName => 'demo2');
625 $sess->update(path => 'lib');
627 my $s1=$sess->update_record(data => $rec1,
628 recordType => 'grs.perl.pod',
629 groupName => "demo1",
632 my $stat = $sess->end_trans;
634 $sess->databases('demo1','demo2');
636 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
637 cql => 'dc.title=IDZebra',
638 databases => [qw(demo1 demo2)]);
643 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
645 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
647 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
649 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
651 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
656 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
658 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
663 - close all transactions
664 - destroy all result sets
667 In the future different database access methods are going to be available,
670 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
672 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
674 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
675 groupName => 'demo');
680 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
682 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
684 $sess->group(groupName => ..., ...)
686 The following options are available:
692 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
694 =item B<databaseName>
696 The name of the (logical) database the updated records will belong to.
700 This path is used for directory updates (B<update>, B<delete> methods);
704 This option determines how to identify your records. See I<Zebra manual: Locating Records>
708 The record type used for indexing.
710 =item B<flagStoreData>
712 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
714 =item B<flagStoreKeys>
716 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
722 =item B<fileVerboseLimit>
724 Skip log messages, when doing a directory update, and the specified number of files are processed...
726 =item B<databaseNamePath>
730 =item B<explainDatabase>
732 The name of the explain database to be used
736 Follow links when doing directory update.
740 You can use the same parameters calling all update methods.
742 =head1 TRANSACTIONS (WRITE LOCKS)
744 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
748 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
750 $stat = $sess->end_trans;
752 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
754 $stat->{processed} # Number of records processed
755 $stat->{updated} # Number of records processed
756 $stat->{deleted} # Number of records processed
757 $stat->{inserted} # Number of records processed
758 $stat->{stime} # System time used
759 $stat->{utime} # User time used
763 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
765 $sess->update(path => 'lib');
767 This will update the database with the files in directory "lib", according to the current record group settings.
771 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
773 $sess->update(groupName => 'demo1',
776 Update the database with files in "lib" according to the settings of group "demo1"
778 $sess->delete(groupName => 'demo1',
781 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
783 You can also update records one by one, even directly from the memory:
785 $sysno = $sess->update_record(data => $rec1,
786 recordType => 'grs.perl.pod',
787 groupName => "demo1");
789 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
791 You can also index a single file:
793 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
795 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
797 $sysno = $sess->update_record(data => $rec1,
798 file => "lib/IDZebra/Data1.pm");
800 And some crazy stuff:
802 $sysno = $sess->delete_record(sysno => $sysno);
804 where sysno in itself is sufficient to identify the record
806 $sysno = $sess->delete_record(data => $rec1,
807 recordType => 'grs.perl.pod',
808 groupName => "demo1");
810 This case the record is extracted, and if already exists, located in the database, then deleted...
812 $sysno = $sess->delete_record(data => $rec1,
814 recordType => 'grs.perl.pod',
815 groupName => "demo1");
817 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
820 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
822 =head1 DATABASE SELECTION
824 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
826 For searching, you can select databases by calling:
828 $sess->databases('db1','db2');
830 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
832 @dblist = $sess->databases();
836 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
838 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
839 pqf => '@attr 1=4 computer');
841 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
845 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
847 The CCL searching is not currently supported by this API.
851 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
853 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
855 $sess->cqlmap($mapfile);
857 Or, you can directly provide the I<mapfile> parameter for the search:
859 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
860 cql => 'dc.title=IDZebra');
862 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
866 As you have seen, the result of the search request is a I<Resultset> object.
867 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
871 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
873 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
875 =head1 MISC FUNCTIONS
883 Peter Popovics, pop@technomat.hu
887 IDZebra, IDZebra::Data1, Zebra documentation