1 # $Id: Session.pm,v 1.5 2003-02-28 18:38:29 pop Exp $
3 # Zebra perl API header
4 # =============================================================================
6 package IDZebra::Session;
8 use IDZebra::Logger qw(:flags :calls);
9 use IDZebra::Resultset;
13 our @ISA = qw(IDZebra::Logger);
16 # -----------------------------------------------------------------------------
17 # Class constructors, destructor
18 # -----------------------------------------------------------------------------
20 my ($proto, %args) = @_;
21 my $class = ref($proto) || $proto;
23 $self->{args} = \%args;
25 bless ($self, $class);
26 $self->{cql_ct} = undef;
29 $self->{databases} = {};
33 my ($self, %args) = @_;
36 unless (defined($self->{zs})) {
37 if (defined($args{'configFile'})) {
38 $self->{zs} = IDZebra::start($args{'configFile'});
40 $self->{zs} = IDZebra::start("zebra.cfg");
47 if (defined($self->{zs})) {
48 IDZebra::stop($self->{zs}) if ($self->{zs});
55 my ($proto,%args) = @_;
58 if (ref($proto)) { $self = $proto; } else {
59 $self = $proto->new(%args);
63 %args = %{$self->{args}};
66 $self->start_service(%args);
68 unless (defined($self->{zs})) {
69 croak ("Falied to open zebra service");
72 unless (defined($self->{zh})) {
73 $self->{zh}=IDZebra::open($self->{zs});
76 # Reset result set counter
79 # This is needed in order to somehow initialize the service
80 $self->databases("Default");
82 # Load the default configuration
85 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
86 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
95 while (IDZebra::trans_no($self->{zh}) > 0) {
96 logf (LOG_WARN,"Explicitly closing transaction with session");
100 IDZebra::close($self->{zh});
104 if ($self->{odr_input}) {
105 IDZebra::odr_reset($self->{odr_input});
106 IDZebra::odr_destroy($self->{odr_input});
107 $self->{odr_input} = undef;
110 if ($self->{odr_output}) {
111 IDZebra::odr_reset($self->{odr_output});
112 IDZebra::odr_destroy($self->{odr_output});
113 $self->{odr_output} = undef;
121 logf (LOG_LOG,"DESTROY $self");
124 if (defined ($self->{cql_ct})) {
125 IDZebra::cql_transform_close($self->{cql_ct});
128 # -----------------------------------------------------------------------------
129 # Record group selection This is a bit nasty... but used at many places
130 # -----------------------------------------------------------------------------
132 my ($self,%args) = @_;
134 $self->{rg} = $self->_makeRecordGroup(%args);
135 $self->_selectRecordGroup($self->{rg});
140 sub selectRecordGroup {
141 my ($self, $groupName) = @_;
142 $self->{rg} = $self->_getRecordGroup($groupName);
143 $self->_selectRecordGroup($self->{rg});
146 sub _displayRecordGroup {
147 my ($self, $rg) = @_;
148 print STDERR "-----\n";
149 foreach my $key qw (groupName
160 print STDERR "$key:",$rg->{$key},"\n";
164 sub _cloneRecordGroup {
165 my ($self, $orig) = @_;
166 my $rg = IDZebra::recordGroup->new();
167 my $r = IDZebra::init_recordGroup($rg);
168 foreach my $key qw (groupName
180 $rg->{$key} = $orig->{$key} if ($orig->{$key});
185 sub _getRecordGroup {
186 my ($self, $groupName, $ext) = @_;
187 my $rg = IDZebra::recordGroup->new();
188 my $r = IDZebra::init_recordGroup($rg);
189 $rg->{groupName} = $groupName if ($groupName ne "");
190 $ext = "" unless ($ext);
191 my $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
195 sub _makeRecordGroup {
196 my ($self, %args) = @_;
199 my @keys = keys(%args);
200 unless ($#keys >= 0) {
201 return ($self->{rg});
204 if ($args{groupName}) {
205 $rg = $self->_getRecordGroup($args{groupName});
207 $rg = $self->_cloneRecordGroup($self->{rg});
209 $self->_setRecordGroupOptions($rg, %args);
213 sub _setRecordGroupOptions {
214 my ($self, $rg, %args) = @_;
216 foreach my $key qw (databaseName
227 if (defined ($args{$key})) {
228 $rg->{$key} = $args{$key};
232 sub _selectRecordGroup {
233 my ($self, $rg) = @_;
234 my $r = IDZebra::set_group($self->{zh}, $rg);
236 unless ($dbName = $rg->{databaseName}) {
239 unless ($self->databases($dbName)) {
240 croak("Fatal error selecting database $dbName");
243 # -----------------------------------------------------------------------------
244 # Selecting databases for search (and also for updating - internally)
245 # -----------------------------------------------------------------------------
247 my ($self, @databases) = @_;
250 return (keys(%{$self->{databases}}));
256 foreach my $db (@databases) {
257 next if ($self->{databases}{$db});
262 foreach my $db (keys (%{$self->{databases}})) {
263 $changed++ unless ($tmp{$db});
268 delete ($self->{databases});
269 foreach my $db (@databases) {
270 $self->{databases}{$db}++;
273 if (IDZebra::select_databases($self->{zh},
277 "Could not select database(s) %s errCode=%d",
278 join(",",@databases),
282 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
285 return (keys(%{$self->{databases}}));
288 # -----------------------------------------------------------------------------
290 # -----------------------------------------------------------------------------
293 return(IDZebra::errCode($self->{zh}));
298 return(IDZebra::errString($self->{zh}));
303 return(IDZebra::errAdd($self->{zh}));
306 # -----------------------------------------------------------------------------
308 # -----------------------------------------------------------------------------
311 IDZebra::begin_trans($self->{zh});
316 my $stat = IDZebra::ZebraTransactionStatus->new();
317 IDZebra::end_trans($self->{zh}, $stat);
323 return(IDZebra::begin_read($self->{zh}));
328 IDZebra::end_read($self->{zh});
332 my ($self, $value) = @_;
333 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
334 return (IDZebra::get_shadow_enable($self->{zh}));
339 if ($self->shadow_enable) {
340 return(IDZebra::commit($self->{zh}));
344 # -----------------------------------------------------------------------------
345 # We don't really need that...
346 # -----------------------------------------------------------------------------
348 my ($self, $name) = @_;
349 if ($name !~/^(input|output)$/) {
350 croak("Undefined ODR '$name'");
352 IDZebra::odr_reset($self->{"odr_$name"});
355 # -----------------------------------------------------------------------------
357 # -----------------------------------------------------------------------------
360 return(IDZebra::init($self->{zh}));
365 return(IDZebra::compact($self->{zh}));
369 my ($self, %args) = @_;
370 my $rg = $self->_update_args(%args);
371 $self->_selectRecordGroup($rg);
373 IDZebra::repository_update($self->{zh});
374 $self->_selectRecordGroup($self->{rg});
379 my ($self, %args) = @_;
380 my $rg = $self->_update_args(%args);
381 $self->_selectRecordGroup($rg);
383 IDZebra::repository_delete($self->{zh});
384 $self->_selectRecordGroup($self->{rg});
389 my ($self, %args) = @_;
390 my $rg = $self->_update_args(%args);
391 $self->_selectRecordGroup($rg);
393 IDZebra::repository_show($self->{zh});
394 $self->_selectRecordGroup($self->{rg});
399 my ($self, %args) = @_;
400 my $rg = $self->_makeRecordGroup(%args);
401 $self->_selectRecordGroup($rg);
405 # -----------------------------------------------------------------------------
407 # -----------------------------------------------------------------------------
410 my ($self, %args) = @_;
411 return(IDZebra::update_record($self->{zh},
412 $self->_record_update_args(%args)));
416 my ($self, %args) = @_;
417 return(IDZebra::delete_record($self->{zh},
418 $self->_record_update_args(%args)));
420 sub _record_update_args {
421 my ($self, %args) = @_;
423 my $sysno = $args{sysno} ? $args{sysno} : 0;
424 my $match = $args{match} ? $args{match} : "";
425 my $rectype = $args{recordType} ? $args{recordType} : "";
426 my $fname = $args{file} ? $args{file} : "<no file>";
433 elsif ($args{file}) {
434 open (F, $args{file}) || warn ("Cannot open $args{file}");
435 $buff = join('',(<F>));
438 my $len = length($buff);
440 delete ($args{sysno});
441 delete ($args{match});
442 delete ($args{recordType});
443 delete ($args{file});
444 delete ($args{data});
446 my $rg = $self->_makeRecordGroup(%args);
448 # If no record type is given, then try to find it out from the
451 if (my ($ext) = $fname =~ /\.(\w+)$/) {
452 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
453 $rectype = $rg2->{recordType};
457 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
459 # print STDERR "$rectype,$sysno,$match,$fname,$len\n";
463 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
466 # -----------------------------------------------------------------------------
469 my ($self,$mapfile) = @_;
471 if ($self->{cql_mapfile} ne $mapfile) {
472 unless (-f $mapfile) {
473 croak("Cannot find $mapfile");
475 if (defined ($self->{cql_ct})) {
476 IDZebra::cql_transform_close($self->{cql_ct});
478 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
479 $self->{cql_mapfile} = $mapfile;
482 return ($self->{cql_mapfile});
486 my ($self, $cqlquery) = @_;
487 unless (defined($self->{cql_ct})) {
488 croak("CQL map file is not specified yet.");
490 my $res = "\0" x 2048;
491 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
492 unless ($r) {return (undef)};
498 # -----------------------------------------------------------------------------
500 # -----------------------------------------------------------------------------
502 my ($self, %args) = @_;
504 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
511 unless ($query = $self->cql2pqf($args{cql})) {
512 croak ("Invalid CQL query: '$args{cql}'");
516 croak ("No query given to search");
521 if ($args{databases}) {
522 @origdbs = $self->databases;
523 $self->databases(@{$args{databases}});
526 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
528 my $rs = $self->_search_pqf($query, $rsname);
530 if ($args{databases}) {
531 $self->databases(@origdbs);
539 return ("set_".$self->{rscount}++);
543 my ($self, $query, $setname) = @_;
545 my $hits = IDZebra::search_PQF($self->{zh},
551 my $rs = IDZebra::Resultset->new($self,
553 recordCount => $hits,
554 errCode => $self->errCode,
555 errString => $self->errString);
559 # -----------------------------------------------------------------------------
562 # Sorting of multiple result sets is not supported by zebra...
563 # -----------------------------------------------------------------------------
566 my ($self, $sortspec, $setname, @sets) = @_;
570 foreach my $rs (@sets) {
571 push (@setnames, $rs->{name});
572 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
576 my $status = IDZebra::sort($self->{zh},
582 my $errCode = $self->errCode;
583 my $errString = $self->errString;
585 if ($status || $errCode) {$count = 0;}
587 my $rs = IDZebra::Resultset->new($self,
589 recordCount => $count,
591 errString => $errString);
596 # ============================================================================
603 IDZebra::Session - A Zebra database server session for update and retrieval
607 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
610 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
611 groupName => 'demo1');
613 $sess->group(groupName => 'demo2');
619 $sess->update(path => 'lib');
621 my $s1=$sess->update_record(data => $rec1,
622 recordType => 'grs.perl.pod',
623 groupName => "demo1",
626 my $stat = $sess->end_trans;
628 $sess->databases('demo1','demo2');
630 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
631 cql => 'dc.title=IDZebra',
632 databases => [qw(demo1 demo2)]);
637 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
639 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
641 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
643 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
645 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
650 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
652 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
657 - close all transactions
658 - destroy all result sets
661 In the future different database access methods are going to be available,
664 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
666 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
668 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
669 groupName => 'demo');
674 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
676 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
678 $sess->group(groupName => ..., ...)
680 The following options are available:
686 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
688 =item B<databaseName>
690 The name of the (logical) database the updated records will belong to.
694 This path is used for directory updates (B<update>, B<delete> methods);
698 This option determines how to identify your records. See I<Zebra manual: Locating Records>
702 The record type used for indexing.
704 =item B<flagStoreData>
706 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
708 =item B<flagStoreKeys>
710 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
716 =item B<fileVerboseLimit>
718 Skip log messages, when doing a directory update, and the specified number of files are processed...
720 =item B<databaseNamePath>
724 =item B<explainDatabase>
726 The name of the explain database to be used
730 Follow links when doing directory update.
734 You can use the same parameters calling all update methods.
736 =head1 TRANSACTIONS (WRITE LOCKS)
738 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
742 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
744 $stat = $sess->end_trans;
746 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
748 $stat->{processed} # Number of records processed
749 $stat->{updated} # Number of records processed
750 $stat->{deleted} # Number of records processed
751 $stat->{inserted} # Number of records processed
752 $stat->{stime} # System time used
753 $stat->{utime} # User time used
757 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
759 $sess->update(path => 'lib');
761 This will update the database with the files in directory "lib", according to the current record group settings.
765 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
767 $sess->update(groupName => 'demo1',
770 Update the database with files in "lib" according to the settings of group "demo1"
772 $sess->delete(groupName => 'demo1',
775 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
777 You can also update records one by one, even directly from the memory:
779 $sysno = $sess->update_record(data => $rec1,
780 recordType => 'grs.perl.pod',
781 groupName => "demo1");
783 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
785 You can also index a single file:
787 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
789 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
791 $sysno = $sess->update_record(data => $rec1,
792 file => "lib/IDZebra/Data1.pm");
794 And some crazy stuff:
796 $sysno = $sess->delete_record(sysno => $sysno);
798 where sysno in itself is sufficient to identify the record
800 $sysno = $sess->delete_record(data => $rec1,
801 recordType => 'grs.perl.pod',
802 groupName => "demo1");
804 This case the record is extracted, and if already exists, located in the database, then deleted...
806 $sysno = $sess->delete_record(data => $rec1,
808 recordType => 'grs.perl.pod',
809 groupName => "demo1");
811 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
814 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
816 =head1 DATABASE SELECTION
818 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
820 For searching, you can select databases by calling:
822 $sess->databases('db1','db2');
824 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
826 @dblist = $sess->databases();
830 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
832 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
833 pqf => '@attr 1=4 computer');
835 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
839 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
841 The CCL searching is not currently supported by this API.
845 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
847 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
849 $sess->cqlmap($mapfile);
851 Or, you can directly provide the I<mapfile> parameter for the search:
853 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
854 cql => 'dc.title=IDZebra');
856 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
860 As you have seen, the result of the search request is a I<Resultset> object.
861 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
865 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
867 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
869 =head1 MISC FUNCTIONS
877 Peter Popovics, pop@technomat.hu
881 IDZebra, IDZebra::Data1, Zebra documentation