2 * Copyright (C) 1995-2000, Index Data
6 * Revision 1.44 2001-10-15 19:53:43 adam
7 * POSIX thread updates. First work on term sets.
9 * Revision 1.43 2000/12/05 12:22:53 adam
10 * Termlist source implemented (so that we can index values of XML/SGML
13 * Revision 1.42 2000/12/05 10:01:44 adam
14 * Fixed bug regarding user-defined attribute sets.
16 * Revision 1.41 2000/12/01 17:59:08 adam
17 * Fixed bug regarding online updates on WIN32.
18 * When zebra.cfg is not available the server will not abort.
20 * Revision 1.40 2000/11/29 15:21:31 adam
21 * Fixed problem with passwd db.
23 * Revision 1.39 2000/11/29 14:24:01 adam
24 * Script configure uses yaz pthreads options. Added locking for
25 * zebra_register_{lock,unlock}.
27 * Revision 1.38 2000/11/08 13:46:58 adam
28 * Fixed scan: server could break if bad attribute/database was selected.
29 * Work on remote update.
31 * Revision 1.37 2000/10/17 12:37:09 adam
32 * Fixed notification of live-updates. Fixed minor problem with mf_init
33 * where it didn't handle shadow area file names correctly.
35 * Revision 1.36 2000/09/06 08:59:36 adam
36 * Using read-only (for now) for server.
38 * Revision 1.35 2000/07/07 12:49:20 adam
39 * Optimized resultSetInsert{Rank,Sort}.
41 * Revision 1.34 2000/06/09 13:56:38 ian
42 * Added some logging on Authentication and searches.
44 * Revision 1.33 2000/05/18 12:01:36 adam
45 * System call times(2) used again. More 64-bit fixes.
47 * Revision 1.32 2000/04/19 14:35:59 adam
48 * WIN32 update (this version is known not to work on Windows).
50 * Revision 1.31 2000/04/05 10:07:02 adam
51 * Minor zebra compile fix.
53 * Revision 1.30 2000/04/05 09:49:35 adam
54 * On Unix, zebra/z'mbol uses automake.
56 * Revision 1.29 2000/03/20 19:08:36 adam
57 * Added remote record import using Z39.50 extended services and Segment
60 * Revision 1.28 2000/03/15 15:00:30 adam
61 * First work on threaded version.
63 * Revision 1.27 2000/02/24 12:31:17 adam
64 * Added zebra_string_norm.
66 * Revision 1.26 1999/11/30 13:48:03 adam
67 * Improved installation. Updated for inclusion of YAZ header files.
69 * Revision 1.25 1999/11/04 15:00:45 adam
70 * Implemented delete result set(s).
72 * Revision 1.24 1999/10/14 14:33:50 adam
73 * Added truncation 5=106.
75 * Revision 1.23 1999/09/07 11:36:32 adam
78 * Revision 1.22 1999/08/02 10:13:47 adam
79 * Fixed bug regarding zebra_hits.
81 * Revision 1.21 1999/07/14 10:59:26 adam
82 * Changed functions isc_getmethod, isams_getmethod.
83 * Improved fatal error handling (such as missing EXPLAIN schema).
85 * Revision 1.20 1999/07/06 12:28:04 adam
86 * Updated record index structure. Format includes version ID. Compression
87 * algorithm ID is stored for each record block.
89 * Revision 1.19 1999/05/26 07:49:13 adam
92 * Revision 1.18 1999/05/15 14:36:38 adam
93 * Updated dictionary. Implemented "compression" of dictionary.
95 * Revision 1.17 1999/05/12 13:08:06 adam
96 * First version of ISAMS.
98 * Revision 1.16 1999/02/19 10:38:30 adam
99 * Implemented chdir-setting.
101 * Revision 1.15 1999/02/17 12:18:12 adam
102 * Fixed zebra_close so that a NULL pointer is ignored.
104 * Revision 1.14 1999/02/02 14:51:11 adam
105 * Updated WIN32 code specific sections. Changed header.
107 * Revision 1.13 1998/12/16 12:23:30 adam
108 * Added facility for database name mapping using resource mapdb.
110 * Revision 1.12 1998/11/16 10:18:10 adam
111 * Better error reporting for result sets.
113 * Revision 1.11 1998/10/16 08:14:34 adam
114 * Updated record control system.
116 * Revision 1.10 1998/09/22 10:03:42 adam
117 * Changed result sets to be persistent in the sense that they can
118 * be re-searched if needed.
119 * Fixed memory leak in rsm_or.
121 * Revision 1.9 1998/09/02 13:53:17 adam
122 * Extra parameter decode added to search routines to implement
123 * persistent queries.
125 * Revision 1.8 1998/08/24 17:29:23 adam
128 * Revision 1.7 1998/06/24 12:16:13 adam
129 * Support for relations on text operands. Open range support in
130 * DFA module (i.e. [-j], [g-]).
132 * Revision 1.6 1998/06/22 11:36:47 adam
133 * Added authentication check facility to zebra.
135 * Revision 1.5 1998/06/13 00:14:08 adam
138 * Revision 1.4 1998/06/12 12:22:12 adam
141 * Revision 1.3 1998/05/27 16:57:44 adam
142 * Zebra returns surrogate diagnostic for single records when
145 * Revision 1.2 1998/05/20 10:12:19 adam
146 * Implemented automatic EXPLAIN database maintenance.
147 * Modified Zebra to work with ASN.1 compiled version of YAZ.
149 * Revision 1.1 1998/03/05 08:45:13 adam
150 * New result set model and modular ranking system. Moved towards
151 * descent server API. System information stored as "SGML" records.
165 #include <yaz/diagbib1.h>
169 static void zebra_chdir (ZebraService zh)
171 const char *dir = res_get (zh->res, "chdir");
174 logf (LOG_DEBUG, "chdir %s", dir);
182 static int extract_rec_in_mem (ZebraHandle zh, const char *recordType,
183 const char *buf, size_t buf_size,
184 const char *databaseName, int delete_flag,
185 int test_mode, int *sysno,
186 int store_keys, int store_data,
187 const char *match_criteria);
189 static int explain_extract (void *handle, Record rec, data1_node *n);
190 static void extract_index (ZebraHandle zh);
192 static void zebra_register_unlock (ZebraHandle zh);
194 static int zebra_register_activate (ZebraService zh, int rw);
195 static int zebra_register_deactivate (ZebraService zh);
197 static int zebra_register_lock (ZebraHandle zh, int rw)
203 if (!zh->service->active)
213 zebra_mutex_cond_lock (&zh->service->session_lock);
215 state = zebra_server_lock_get_state(zh->service, &lastChange);
217 zebra_server_lock (zh->service, state);
228 logf (LOG_LOG, "Register in read/write mode");
229 else if (zh->service->registerState == state)
231 logf (LOG_DEBUG, "registerChange = %ld lastChange = %ld",
232 (long) zh->service->registerChange, (long)lastChange);
233 if (zh->service->registerChange >= lastChange)
237 logf (LOG_LOG, "Register completely updated since last access");
239 else if (zh->service->registerState == -1)
240 logf (LOG_LOG, "Reading register using state %d pid=%ld", state,
243 logf (LOG_LOG, "Register has changed state from %d to %d",
244 zh->service->registerState, state);
245 zh->service->registerChange = lastChange;
247 zebra_register_deactivate (zh->service);
249 zh->service->registerState = state;
251 zebra_register_activate (zh->service, rw);
256 static void zebra_register_unlock (ZebraHandle zh)
258 if (zh->service->registerState != -1)
259 zebra_server_unlock (zh->service, zh->service->registerState);
260 zebra_mutex_cond_unlock (&zh->service->session_lock);
263 logf (LOG_LOG, "user/system: %ld/%ld",
264 (long) (zh->tms2.tms_utime - zh->tms1.tms_utime),
265 (long) (zh->tms2.tms_stime - zh->tms1.tms_stime));
270 ZebraHandle zebra_open (ZebraService zs)
278 zh = (ZebraHandle) xmalloc (sizeof(*zh));
279 yaz_log (LOG_LOG, "zebra_open zs=%p returns %p", zs, zh);
288 zh->admin_databaseName = 0;
290 zebra_mutex_cond_lock (&zs->session_lock);
292 zh->next = zs->sessions;
295 zebra_mutex_cond_unlock (&zs->session_lock);
301 ZebraService zebra_start (const char *configName)
303 ZebraService zh = xmalloc (sizeof(*zh));
305 yaz_log (LOG_LOG, "zebra_start %s", configName);
307 zh->configName = xstrdup(configName);
312 zh->registerState = -1;
313 zh->registerChange = 0;
315 if (!(zh->res = res_open (zh->configName)))
317 logf (LOG_WARN, "Failed to read resources `%s'", zh->configName);
321 zebra_server_lock_init (zh);
322 zebra_mutex_cond_init (&zh->session_lock);
323 if (!res_get (zh->res, "passwd"))
324 zh->passwd_db = NULL;
327 zh->passwd_db = passwd_db_open ();
329 logf (LOG_WARN|LOG_ERRNO, "passwd_db_open failed");
331 passwd_db_file (zh->passwd_db, res_get (zh->res, "passwd"));
337 static int zebra_register_activate (ZebraService zh, int rw)
341 yaz_log (LOG_LOG, "zebra_register_activate (ignored since active=%d)",
345 yaz_log (LOG_LOG, "zebra_register_activate shadow=%s",
346 zh->registerState ? "yes" : "no");
348 zh->dh = data1_create ();
351 zh->bfs = bfs_create (res_get (zh->res, "register"));
354 data1_destroy(zh->dh);
357 bf_lockDir (zh->bfs, res_get (zh->res, "lockDir"));
358 bf_cache (zh->bfs, zh->registerState ? res_get (zh->res, "shadow") : NULL);
359 data1_set_tabpath (zh->dh, res_get(zh->res, "profilePath"));
360 zh->recTypes = recTypes_init (zh->dh);
361 recTypes_default_handlers (zh->recTypes);
364 zh->zebra_maps = zebra_maps_open (zh->res);
365 zh->rank_classes = NULL;
378 zebraRankInstall (zh, rank1_class);
380 if (!(zh->records = rec_open (zh->bfs, rw, 0)))
382 logf (LOG_WARN, "rec_open");
385 if (!(zh->dict = dict_open (zh->bfs, FNAME_DICT, 80, rw, 0)))
387 logf (LOG_WARN, "dict_open");
390 if (!(zh->sortIdx = sortIdx_open (zh->bfs, rw)))
392 logf (LOG_WARN, "sortIdx_open");
395 if (res_get_match (zh->res, "isam", "s", ISAM_DEFAULT))
397 struct ISAMS_M_s isams_m;
398 if (!(zh->isams = isams_open (zh->bfs, FNAME_ISAMS, rw,
399 key_isams_m(zh->res, &isams_m))))
401 logf (LOG_WARN, "isams_open");
406 else if (res_get_match (zh->res, "isam", "i", ISAM_DEFAULT))
408 if (!(zh->isam = is_open (zh->bfs, FNAME_ISAM, key_compare, rw,
409 sizeof (struct it_key), zh->res)))
411 logf (LOG_WARN, "is_open");
415 else if (res_get_match (zh->res, "isam", "c", ISAM_DEFAULT))
417 struct ISAMC_M_s isamc_m;
418 if (!(zh->isamc = isc_open (zh->bfs, FNAME_ISAMC,
419 rw, key_isamc_m(zh->res, &isamc_m))))
421 logf (LOG_WARN, "isc_open");
425 else if (res_get_match (zh->res, "isam", "d", ISAM_DEFAULT))
427 struct ISAMD_M_s isamd_m;
429 if (!(zh->isamd = isamd_open (zh->bfs, FNAME_ISAMD,
430 rw, key_isamd_m(zh->res, &isamd_m))))
432 logf (LOG_WARN, "isamd_open");
437 zh->zei = zebraExplain_open (zh->records, zh->dh,
438 zh->res, rw, 0 /* rGroup */,
442 logf (LOG_WARN, "Cannot obtain EXPLAIN information");
446 yaz_log (LOG_LOG, "zebra_register_activate ok");
450 void zebra_admin_shutdown (ZebraHandle zh)
452 zebra_mutex_cond_lock (&zh->service->session_lock);
453 zh->service->stop_flag = 1;
454 if (!zh->service->sessions)
455 zebra_register_deactivate(zh->service);
456 zh->service->active = 0;
457 zebra_mutex_cond_unlock (&zh->service->session_lock);
460 void zebra_admin_start (ZebraHandle zh)
462 ZebraService zs = zh->service;
464 zebra_mutex_cond_lock (&zs->session_lock);
466 zh->service->active = 1;
467 zebra_mutex_cond_unlock (&zs->session_lock);
470 static int zebra_register_deactivate (ZebraService zs)
475 yaz_log(LOG_LOG, "zebra_register_deactivate (ignored since active=%d)",
479 yaz_log(LOG_LOG, "zebra_register_deactivate");
483 zebraExplain_close (zs->zei, 0);
484 dict_close (zs->dict);
485 sortIdx_close (zs->sortIdx);
487 isams_close (zs->isams);
492 isc_close (zs->isamc);
494 isamd_close (zs->isamd);
496 rec_close (&zs->records);
498 recTypes_destroy (zs->recTypes);
499 zebra_maps_close (zs->zebra_maps);
500 zebraRankDestroy (zs);
501 bfs_destroy (zs->bfs);
502 data1_destroy (zs->dh);
505 passwd_db_close (zs->passwd_db);
510 void zebra_stop(ZebraService zs)
514 yaz_log (LOG_LOG, "zebra_stop");
516 zebra_mutex_cond_lock (&zs->session_lock);
518 zebra_close (zs->sessions);
520 zebra_mutex_cond_unlock (&zs->session_lock);
522 zebra_mutex_cond_destroy (&zs->session_lock);
524 zebra_register_deactivate(zs);
526 xfree (zs->configName);
530 void zebra_close (ZebraHandle zh)
532 ZebraService zs = zh->service;
533 struct zebra_session **sp;
535 yaz_log (LOG_LOG, "zebra_close zh=%p", zh);
538 resultSetDestroy (zh, -1, 0, 0);
545 xfree (zh->admin_databaseName);
546 zebra_mutex_cond_lock (&zs->session_lock);
558 if (!zs->sessions && zs->stop_flag)
559 zebra_register_deactivate(zs);
560 zebra_mutex_cond_unlock (&zs->session_lock);
564 struct map_baseinfo {
570 char **new_basenames;
574 void map_basenames_func (void *vp, const char *name, const char *value)
576 struct map_baseinfo *p = (struct map_baseinfo *) vp;
578 char fromdb[128], todb[8][128];
581 sscanf (value, "%127s %127s %127s %127s %127s %127s %127s %127s %127s",
582 fromdb, todb[0], todb[1], todb[2], todb[3], todb[4],
583 todb[5], todb[6], todb[7]);
587 for (i = 0; i<p->num_bases; i++)
588 if (p->basenames[i] && !strcmp (p->basenames[i], fromdb))
591 for (i = 0; i < no; i++)
593 if (p->new_num_bases == p->new_num_max)
595 p->new_basenames[(p->new_num_bases)++] =
596 nmem_strdup (p->mem, todb[i]);
602 void map_basenames (ZebraHandle zh, ODR stream,
603 int *num_bases, char ***basenames)
605 struct map_baseinfo info;
606 struct map_baseinfo *p = &info;
610 info.num_bases = *num_bases;
611 info.basenames = *basenames;
612 info.new_num_max = 128;
613 info.new_num_bases = 0;
614 info.new_basenames = (char **)
615 odr_malloc (stream, sizeof(*info.new_basenames) * info.new_num_max);
616 info.mem = stream->mem;
618 res_trav (zh->service->res, "mapdb", &info, map_basenames_func);
620 for (i = 0; i<p->num_bases; i++)
621 if (p->basenames[i] && p->new_num_bases < p->new_num_max)
623 p->new_basenames[(p->new_num_bases)++] =
624 nmem_strdup (p->mem, p->basenames[i]);
626 *num_bases = info.new_num_bases;
627 *basenames = info.new_basenames;
628 for (i = 0; i<*num_bases; i++)
629 logf (LOG_LOG, "base %s", (*basenames)[i]);
632 void zebra_search_rpn (ZebraHandle zh, ODR stream, ODR decode,
633 Z_RPNQuery *query, int num_bases, char **basenames,
637 if (zebra_register_lock (zh, 0))
639 map_basenames (zh, stream, &num_bases, &basenames);
640 resultSetAddRPN (zh, stream, decode, query, num_bases, basenames, setname);
642 zebra_register_unlock (zh);
644 logf(LOG_APP,"SEARCH:%d:",zh->hits);
647 void zebra_records_retrieve (ZebraHandle zh, ODR stream,
648 const char *setname, Z_RecordComposition *comp,
649 oid_value input_format, int num_recs,
650 ZebraRetrievalRecord *recs)
655 if (zebra_register_lock (zh, 0))
657 pos_array = (int *) xmalloc (num_recs * sizeof(*pos_array));
658 for (i = 0; i<num_recs; i++)
659 pos_array[i] = recs[i].position;
660 poset = zebraPosSetCreate (zh, setname, num_recs, pos_array);
663 logf (LOG_DEBUG, "zebraPosSetCreate error");
665 zh->errString = nmem_strdup (stream->mem, setname);
669 for (i = 0; i<num_recs; i++)
674 recs[i].format = VAL_SUTRS;
675 recs[i].len = strlen(poset[i].term);
676 recs[i].buf = poset[i].term;
677 recs[i].base = poset[i].db;
679 else if (poset[i].sysno)
682 zebra_record_fetch (zh, poset[i].sysno, poset[i].score,
683 stream, input_format, comp,
684 &recs[i].format, &recs[i].buf,
687 recs[i].errString = NULL;
693 sprintf (num_str, "%d", pos_array[i]);
695 zh->errString = nmem_strdup (stream->mem, num_str);
699 zebraPosSetDestroy (zh, poset, num_recs);
701 zebra_register_unlock (zh);
705 void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
706 oid_value attributeset,
707 int num_bases, char **basenames,
708 int *position, int *num_entries, ZebraScanEntry **entries,
711 if (zebra_register_lock (zh, 0))
717 map_basenames (zh, stream, &num_bases, &basenames);
718 rpn_scan (zh, stream, zapt, attributeset,
719 num_bases, basenames, position,
720 num_entries, entries, is_partial);
721 zebra_register_unlock (zh);
724 void zebra_sort (ZebraHandle zh, ODR stream,
725 int num_input_setnames, const char **input_setnames,
726 const char *output_setname, Z_SortKeySpecList *sort_sequence,
729 if (zebra_register_lock (zh, 0))
731 resultSetSort (zh, stream->mem, num_input_setnames, input_setnames,
732 output_setname, sort_sequence, sort_status);
733 zebra_register_unlock (zh);
736 int zebra_deleleResultSet(ZebraHandle zh, int function,
737 int num_setnames, char **setnames,
741 if (zebra_register_lock (zh, 0))
742 return Z_DeleteStatus_systemProblemAtTarget;
745 case Z_DeleteRequest_list:
746 resultSetDestroy (zh, num_setnames, setnames, statuses);
748 case Z_DeleteRequest_all:
749 resultSetDestroy (zh, -1, 0, statuses);
752 zebra_register_unlock (zh);
753 status = Z_DeleteStatus_success;
754 for (i = 0; i<num_setnames; i++)
755 if (statuses[i] == Z_DeleteStatus_resultSetDidNotExist)
756 status = statuses[i];
760 int zebra_errCode (ZebraHandle zh)
765 const char *zebra_errString (ZebraHandle zh)
767 return diagbib1_str (zh->errCode);
770 char *zebra_errAdd (ZebraHandle zh)
772 return zh->errString;
775 int zebra_hits (ZebraHandle zh)
780 int zebra_auth (ZebraService zh, const char *user, const char *pass)
782 if (!zh->passwd_db || !passwd_db_auth (zh->passwd_db, user, pass))
784 logf(LOG_APP,"AUTHOK:%s", user?user:"ANONYMOUS");
788 logf(LOG_APP,"AUTHFAIL:%s", user?user:"ANONYMOUS");
792 void zebra_admin_import_begin (ZebraHandle zh, const char *database)
794 if (zebra_register_lock (zh, 1))
796 xfree (zh->admin_databaseName);
797 zh->admin_databaseName = xstrdup(database);
800 void zebra_admin_import_end (ZebraHandle zh)
802 zebraExplain_flush (zh->service->zei, 1, zh);
804 zebra_register_unlock (zh);
807 void zebra_admin_import_segment (ZebraHandle zh, Z_Segment *segment)
811 if (zh->service->active < 2)
813 for (i = 0; i<segment->num_segmentRecords; i++)
815 Z_NamePlusRecord *npr = segment->segmentRecords[i];
816 const char *databaseName = npr->databaseName;
819 databaseName = zh->admin_databaseName;
820 printf ("--------------%d--------------------\n", i);
821 if (npr->which == Z_NamePlusRecord_intermediateFragment)
823 Z_FragmentSyntax *fragment = npr->u.intermediateFragment;
824 if (fragment->which == Z_FragmentSyntax_notExternallyTagged)
826 Odr_oct *oct = fragment->u.notExternallyTagged;
827 printf ("%.*s", (oct->len > 100 ? 100 : oct->len) ,
831 extract_rec_in_mem (zh, "grs.sgml",
839 0 /* match criteria */);
845 void zebra_admin_create (ZebraHandle zh, const char *database)
847 ZebraService zs = zh->service;
848 if (zebra_register_lock(zh, 1))
853 /* announce database */
854 if (zebraExplain_newDatabase (zs->zei, database, 0 /* explainDatabase */))
857 zh->errString = "Database already exist";
859 zebraExplain_flush (zh->service->zei, 1, zh);
861 zebra_register_unlock(zh);
864 int zebra_string_norm (ZebraHandle zh, unsigned reg_id,
865 const char *input_str, int input_len,
866 char *output_str, int output_len)
869 if (!zh->service->zebra_maps)
871 wrbuf = zebra_replace(zh->service->zebra_maps, reg_id, "",
872 input_str, input_len);
875 if (wrbuf_len(wrbuf) >= output_len)
877 if (wrbuf_len(wrbuf))
878 memcpy (output_str, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
879 output_str[wrbuf_len(wrbuf)] = '\0';
880 return wrbuf_len(wrbuf);
883 static void extract_init (struct recExtractCtrl *p, RecWord *w)
885 w->zebra_maps = p->zebra_maps;
886 w->seqnos = p->seqno;
887 w->attrSet = VAL_BIB1;
893 static void extract_add_index_string (RecWord *p, const char *string,
897 unsigned char attrSet;
898 unsigned short attrUse;
901 int *pseqno = &p->seqnos[p->reg_type];
902 ZebraHandle zh = p->extractCtrl->handle;
903 ZebraExplainInfo zei = zh->service->zei;
904 struct recKeys *keys = &zh->keys;
906 if (keys->buf_used+1024 > keys->buf_max)
910 b = (char *) xmalloc (keys->buf_max += 128000);
911 if (keys->buf_used > 0)
912 memcpy (b, keys->buf, keys->buf_used);
916 dst = keys->buf + keys->buf_used;
918 attrSet = p->attrSet;
919 if (keys->buf_used > 0 && keys->prevAttrSet == attrSet)
922 keys->prevAttrSet = attrSet;
923 attrUse = p->attrUse;
924 if (keys->buf_used > 0 && keys->prevAttrUse == attrUse)
927 keys->prevAttrUse = attrUse;
929 diff = 1 + *pseqno - keys->prevSeqNo;
930 if (diff >= 1 && diff <= 15)
935 keys->prevSeqNo = *pseqno;
942 int ch = zebraExplain_lookupSU (zei, attrSet, attrUse);
945 ch = zebraExplain_addSU (zei, attrSet, attrUse);
946 yaz_log (LOG_LOG, "addSU set=%d use=%d SU=%d",
947 attrSet, attrUse, ch);
950 memcpy (dst, &ch, sizeof(ch));
956 memcpy (dst, &attrSet, sizeof(attrSet));
957 dst += sizeof(attrSet);
961 memcpy (dst, &attrUse, sizeof(attrUse));
962 dst += sizeof(attrUse);
965 *dst++ = p->reg_type;
966 memcpy (dst, string, length);
972 memcpy (dst, pseqno, sizeof(*pseqno));
973 dst += sizeof(*pseqno);
975 keys->buf_used = dst - keys->buf;
980 static void extract_add_sort_string (RecWord *p, const char *string,
984 ZebraHandle zh = p->extractCtrl->handle;
985 struct sortKey *sortKeys = zh->sortKeys;
987 for (sk = sortKeys; sk; sk = sk->next)
988 if (sk->attrSet == p->attrSet && sk->attrUse == p->attrUse)
991 sk = (struct sortKey *) xmalloc (sizeof(*sk));
995 sk->string = (char *) xmalloc (length);
997 memcpy (sk->string, string, length);
999 sk->attrSet = p->attrSet;
1000 sk->attrUse = p->attrUse;
1003 static void extract_add_string (RecWord *p, const char *string, int length)
1005 assert (length > 0);
1006 if (zebra_maps_is_sort (p->zebra_maps, p->reg_type))
1007 extract_add_sort_string (p, string, length);
1009 extract_add_index_string (p, string, length);
1012 static void extract_add_incomplete_field (RecWord *p)
1014 const char *b = p->string;
1015 int remain = p->length;
1016 const char **map = 0;
1019 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
1023 char buf[IT_MAX_WORD+1];
1027 while (map && *map && **map == *CHR_SPACE)
1029 remain = p->length - (b - p->string);
1031 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
1038 while (map && *map && **map != *CHR_SPACE)
1040 const char *cp = *map;
1042 while (i < IT_MAX_WORD && *cp)
1044 remain = p->length - (b - p->string);
1046 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
1052 extract_add_string (p, buf, i);
1054 (p->seqnos[p->reg_type])++; /* to separate this from next one */
1057 static void extract_add_complete_field (RecWord *p)
1059 const char *b = p->string;
1060 char buf[IT_MAX_WORD+1];
1061 const char **map = 0;
1062 int i = 0, remain = p->length;
1065 map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain);
1067 while (remain > 0 && i < IT_MAX_WORD)
1069 while (map && *map && **map == *CHR_SPACE)
1071 remain = p->length - (b - p->string);
1073 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
1080 if (i && i < IT_MAX_WORD)
1081 buf[i++] = *CHR_SPACE;
1082 while (map && *map && **map != *CHR_SPACE)
1084 const char *cp = *map;
1086 if (i >= IT_MAX_WORD)
1088 while (i < IT_MAX_WORD && *cp)
1090 remain = p->length - (b - p->string);
1092 map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
1100 extract_add_string (p, buf, i);
1103 static void extract_token_add (RecWord *p)
1106 if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0,
1107 p->string, p->length)))
1109 p->string = wrbuf_buf(wrbuf);
1110 p->length = wrbuf_len(wrbuf);
1112 if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
1113 extract_add_complete_field (p);
1115 extract_add_incomplete_field(p);
1118 static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
1120 ZebraHandle zh = (ZebraHandle) (p->handle);
1121 zebraExplain_addSchema (zh->service->zei, oid);
1124 static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
1125 int cmd, struct sortKey **skp)
1127 struct sortKey *sk = *skp;
1128 SortIdx sortIdx = zh->service->sortIdx;
1130 sortIdx_sysno (sortIdx, sysno);
1133 struct sortKey *sk_next = sk->next;
1134 sortIdx_type (sortIdx, sk->attrUse);
1135 sortIdx_add (sortIdx, sk->string, sk->length);
1143 struct encode_info {
1150 void encode_key_init (struct encode_info *i)
1157 char *encode_key_int (int d, char *bp)
1161 else if (d <= 16383)
1163 *bp++ = 64 + (d>>8);
1166 else if (d <= 4194303)
1168 *bp++ = 128 + (d>>16);
1169 *bp++ = (d>>8) & 255;
1174 *bp++ = 192 + (d>>24);
1175 *bp++ = (d>>16) & 255;
1176 *bp++ = (d>>8) & 255;
1182 void encode_key_write (char *k, struct encode_info *i, FILE *outf)
1187 while ((*bp++ = *k++))
1189 memcpy (&key, k+1, sizeof(struct it_key));
1190 bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp);
1191 if (i->sysno != key.sysno)
1193 i->sysno = key.sysno;
1196 else if (!i->seqno && !key.seqno && i->cmd == *k)
1198 bp = encode_key_int (key.seqno - i->seqno, bp);
1199 i->seqno = key.seqno;
1201 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
1203 logf (LOG_FATAL|LOG_ERRNO, "fwrite");
1208 static void extract_flushWriteKeys (ZebraHandle zh)
1211 char out_fname[200];
1213 struct encode_info encode_info;
1214 int ptr_i = zh->ptr_i;
1218 if (!zh->key_buf || ptr_i <= 0)
1221 (zh->key_file_no)++;
1222 logf (LOG_LOG, "sorting section %d", (zh->key_file_no));
1224 qsort (zh->key_buf + zh->ptr_top - ptr_i, ptr_i, sizeof(char*),
1226 extract_get_fname_tmp (zh, out_fname, zh->key_file_no);
1228 if (!(outf = fopen (out_fname, "wb")))
1230 logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname);
1233 logf (LOG_LOG, "writing section %d", zh->key_file_no);
1234 prevcp = cp = (zh->key_buf)[zh->ptr_top - ptr_i];
1236 encode_key_init (&encode_info);
1237 encode_key_write (cp, &encode_info, outf);
1241 cp = (zh->key_buf)[zh->ptr_top - ptr_i];
1242 if (strcmp (cp, prevcp))
1244 encode_key_init (&encode_info);
1245 encode_key_write (cp, &encode_info, outf);
1249 encode_key_write (cp + strlen(cp), &encode_info, outf);
1252 qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare);
1253 extract_get_fname_tmp (out_fname, key_file_no);
1255 if (!(outf = fopen (out_fname, "wb")))
1257 logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname);
1260 logf (LOG_LOG, "writing section %d", key_file_no);
1262 prevcp = key_buf[ptr_top-i];
1264 if (!--i || strcmp (prevcp, key_buf[ptr_top-i]))
1266 key_y_len = strlen(prevcp)+1;
1268 logf (LOG_LOG, "key_y_len: %2d %02x %02x %s",
1269 key_y_len, prevcp[0], prevcp[1], 2+prevcp);
1271 qsort (key_buf + ptr_top-ptr_i, ptr_i - i,
1272 sizeof(char*), key_y_compare);
1273 cp = key_buf[ptr_top-ptr_i];
1275 encode_key_init (&encode_info);
1276 encode_key_write (cp, &encode_info, outf);
1279 cp = key_buf[ptr_top-ptr_i];
1280 encode_key_write (cp+key_y_len, &encode_info, outf);
1284 prevcp = key_buf[ptr_top-ptr_i];
1289 logf (LOG_FATAL|LOG_ERRNO, "fclose %s", out_fname);
1292 logf (LOG_LOG, "finished section %d", zh->key_file_no);
1294 zh->key_buf_used = 0;
1297 static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
1298 int cmd, struct recKeys *reckeys)
1302 unsigned char attrSet = (unsigned char) -1;
1303 unsigned short attrUse = (unsigned short) -1;
1308 ZebraExplainInfo zei = zh->service->zei;
1312 int mem = 8*1024*1024;
1313 zh->key_buf = (char**) xmalloc (mem);
1314 zh->ptr_top = mem/sizeof(char*);
1316 zh->key_buf_used = 0;
1317 zh->key_file_no = 0;
1319 zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
1320 while (off < reckeys->buf_used)
1322 const char *src = reckeys->buf + off;
1331 memcpy (&ch, src, sizeof(ch));
1337 memcpy (&attrSet, src, sizeof(attrSet));
1338 src += sizeof(attrSet);
1342 memcpy (&attrUse, src, sizeof(attrUse));
1343 src += sizeof(attrUse);
1346 if (zh->key_buf_used + 1024 > (zh->ptr_top-zh->ptr_i)*sizeof(char*))
1347 extract_flushWriteKeys (zh);
1349 (zh->key_buf)[zh->ptr_top - zh->ptr_i] =
1350 (char*)zh->key_buf + zh->key_buf_used;
1353 ch = zebraExplain_lookupSU (zei, attrSet, attrUse);
1355 ch = zebraExplain_addSU (zei, attrSet, attrUse);
1359 key_SU_encode (ch,((char*)zh->key_buf) + zh->key_buf_used);
1362 ((char*)zh->key_buf) [(zh->key_buf_used)++] = *src++;
1364 ((char*)(zh->key_buf))[(zh->key_buf_used)++] = '\0';
1365 ((char*)(zh->key_buf))[(zh->key_buf_used)++] = cmd;
1368 seqno += ((lead>>2) & 15)-1;
1371 memcpy (&seqno, src, sizeof(seqno));
1372 src += sizeof(seqno);
1376 memcpy ((char*)zh->key_buf + zh->key_buf_used, &key, sizeof(key));
1377 (zh->key_buf_used) += sizeof(key);
1378 off = src - reckeys->buf;
1380 assert (off == reckeys->buf_used);
1383 static void extract_index (ZebraHandle zh)
1385 extract_flushWriteKeys (zh);
1386 zebra_index_merge (zh);
1389 static int explain_extract (void *handle, Record rec, data1_node *n)
1391 ZebraHandle zh = (ZebraHandle) handle;
1392 struct recExtractCtrl extractCtrl;
1395 if (zebraExplain_curDatabase (zh->service->zei,
1396 rec->info[recInfo_databaseName]))
1399 if (zebraExplain_newDatabase (zh->service->zei,
1400 rec->info[recInfo_databaseName], 0))
1404 zh->keys.buf_used = 0;
1405 zh->keys.prevAttrUse = -1;
1406 zh->keys.prevAttrSet = -1;
1407 zh->keys.prevSeqNo = 0;
1410 extractCtrl.init = extract_init;
1411 extractCtrl.tokenAdd = extract_token_add;
1412 extractCtrl.schemaAdd = extract_schema_add;
1413 extractCtrl.dh = zh->service->dh;
1414 for (i = 0; i<256; i++)
1415 extractCtrl.seqno[i] = 0;
1416 extractCtrl.zebra_maps = zh->service->zebra_maps;
1417 extractCtrl.flagShowRecords = 0;
1418 extractCtrl.handle = handle;
1420 grs_extract_tree(&extractCtrl, n);
1422 logf (LOG_LOG, "flush explain record, sysno=%d", rec->sysno);
1424 if (rec->size[recInfo_delKeys])
1426 struct recKeys delkeys;
1427 struct sortKey *sortKeys = 0;
1429 delkeys.buf_used = rec->size[recInfo_delKeys];
1430 delkeys.buf = rec->info[recInfo_delKeys];
1431 extract_flushSortKeys (zh, rec->sysno, 0, &sortKeys);
1432 extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys);
1434 extract_flushRecordKeys (zh, rec->sysno, 1, &zh->keys);
1435 extract_flushSortKeys (zh, rec->sysno, 1, &zh->sortKeys);
1437 xfree (rec->info[recInfo_delKeys]);
1438 rec->size[recInfo_delKeys] = zh->keys.buf_used;
1439 rec->info[recInfo_delKeys] = zh->keys.buf;
1440 zh->keys.buf = NULL;
1441 zh->keys.buf_max = 0;
1445 static int extract_rec_in_mem (ZebraHandle zh, const char *recordType,
1446 const char *buf, size_t buf_size,
1447 const char *databaseName, int delete_flag,
1448 int test_mode, int *sysno,
1449 int store_keys, int store_data,
1450 const char *match_criteria)
1452 RecordAttr *recordAttr;
1453 struct recExtractCtrl extractCtrl;
1458 const char *fname = "<no file>";
1460 long recordOffset = 0;
1461 struct zebra_fetch_control fc;
1464 fc.record_int_buf = buf;
1465 fc.record_int_len = buf_size;
1466 fc.record_int_pos = 0;
1468 fc.record_offset = 0;
1470 extractCtrl.offset = 0;
1471 extractCtrl.readf = zebra_record_int_read;
1472 extractCtrl.seekf = zebra_record_int_seek;
1473 extractCtrl.tellf = zebra_record_int_tell;
1474 extractCtrl.endf = zebra_record_int_end;
1475 extractCtrl.fh = &fc;
1477 /* announce database */
1478 if (zebraExplain_curDatabase (zh->service->zei, databaseName))
1480 if (zebraExplain_newDatabase (zh->service->zei, databaseName, 0))
1484 recType_byName (zh->service->recTypes, recordType, subType,
1487 logf (LOG_WARN, "No such record type: %s", recordType);
1491 zh->keys.buf_used = 0;
1492 zh->keys.prevAttrUse = -1;
1493 zh->keys.prevAttrSet = -1;
1494 zh->keys.prevSeqNo = 0;
1497 extractCtrl.subType = subType;
1498 extractCtrl.init = extract_init;
1499 extractCtrl.tokenAdd = extract_token_add;
1500 extractCtrl.schemaAdd = extract_schema_add;
1501 extractCtrl.dh = zh->service->dh;
1502 extractCtrl.handle = zh;
1503 extractCtrl.zebra_maps = zh->service->zebra_maps;
1504 extractCtrl.flagShowRecords = 0;
1505 for (i = 0; i<256; i++)
1507 if (zebra_maps_is_positioned(zh->service->zebra_maps, i))
1508 extractCtrl.seqno[i] = 1;
1510 extractCtrl.seqno[i] = 0;
1513 r = (*recType->extract)(clientData, &extractCtrl);
1515 if (r == RECCTRL_EXTRACT_EOF)
1517 else if (r == RECCTRL_EXTRACT_ERROR)
1519 /* error occured during extraction ... */
1521 yaz_log (LOG_WARN, "extract error");
1523 if (rGroup->flagRw &&
1524 records_processed < rGroup->fileVerboseLimit)
1526 logf (LOG_WARN, "fail %s %s %ld", rGroup->recordType,
1527 fname, (long) recordOffset);
1532 if (zh->keys.buf_used == 0)
1534 /* the extraction process returned no information - the record
1535 is probably empty - unless flagShowRecords is in use */
1538 logf (LOG_WARN, "No keys generated for record");
1539 logf (LOG_WARN, " The file is probably empty");
1542 /* match criteria */
1549 logf (LOG_LOG, "delete %s %s %ld", recordType,
1550 fname, (long) recordOffset);
1551 logf (LOG_WARN, "cannot delete record above (seems new)");
1554 logf (LOG_LOG, "add %s %s %ld", recordType, fname,
1555 (long) recordOffset);
1556 rec = rec_new (zh->service->records);
1558 *sysno = rec->sysno;
1560 recordAttr = rec_init_attr (zh->service->zei, rec);
1565 dict_insert (matchDict, matchStr, sizeof(*sysno), sysno);
1568 extract_flushRecordKeys (zh, *sysno, 1, &zh->keys);
1569 extract_flushSortKeys (zh, *sysno, 1, &zh->sortKeys);
1573 /* record already exists */
1574 struct recKeys delkeys;
1576 rec = rec_get (zh->service->records, *sysno);
1579 recordAttr = rec_init_attr (zh->service->zei, rec);
1581 if (recordAttr->runNumber ==
1582 zebraExplain_runNumberIncrement (zh->service->zei, 0))
1584 logf (LOG_LOG, "skipped %s %s %ld", recordType,
1585 fname, (long) recordOffset);
1589 delkeys.buf_used = rec->size[recInfo_delKeys];
1590 delkeys.buf = rec->info[recInfo_delKeys];
1591 extract_flushSortKeys (zh, *sysno, 0, &zh->sortKeys);
1592 extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
1595 /* record going to be deleted */
1596 if (!delkeys.buf_used)
1598 logf (LOG_LOG, "delete %s %s %ld", recordType,
1599 fname, (long) recordOffset);
1600 logf (LOG_WARN, "cannot delete file above, storeKeys false");
1604 logf (LOG_LOG, "delete %s %s %ld", recordType,
1605 fname, (long) recordOffset);
1608 dict_delete (matchDict, matchStr);
1610 rec_del (zh->service->records, &rec);
1617 /* record going to be updated */
1618 if (!delkeys.buf_used)
1620 logf (LOG_LOG, "update %s %s %ld", recordType,
1621 fname, (long) recordOffset);
1622 logf (LOG_WARN, "cannot update file above, storeKeys false");
1626 logf (LOG_LOG, "update %s %s %ld", recordType,
1627 fname, (long) recordOffset);
1628 extract_flushRecordKeys (zh, *sysno, 1, &zh->keys);
1632 /* update file type */
1633 xfree (rec->info[recInfo_fileType]);
1634 rec->info[recInfo_fileType] =
1635 rec_strdup (recordType, &rec->size[recInfo_fileType]);
1637 /* update filename */
1638 xfree (rec->info[recInfo_filename]);
1639 rec->info[recInfo_filename] =
1640 rec_strdup (fname, &rec->size[recInfo_filename]);
1642 /* update delete keys */
1643 xfree (rec->info[recInfo_delKeys]);
1644 if (zh->keys.buf_used > 0 && store_keys == 1)
1646 rec->size[recInfo_delKeys] = zh->keys.buf_used;
1647 rec->info[recInfo_delKeys] = zh->keys.buf;
1648 zh->keys.buf = NULL;
1649 zh->keys.buf_max = 0;
1653 rec->info[recInfo_delKeys] = NULL;
1654 rec->size[recInfo_delKeys] = 0;
1657 /* save file size of original record */
1658 zebraExplain_recordBytesIncrement (zh->service->zei,
1659 - recordAttr->recordSize);
1661 recordAttr->recordSize = fi->file_moffset - recordOffset;
1662 if (!recordAttr->recordSize)
1663 recordAttr->recordSize = fi->file_max - recordOffset;
1665 recordAttr->recordSize = buf_size;
1667 zebraExplain_recordBytesIncrement (zh->service->zei,
1668 recordAttr->recordSize);
1670 /* set run-number for this record */
1671 recordAttr->runNumber =
1672 zebraExplain_runNumberIncrement (zh->service->zei, 0);
1674 /* update store data */
1675 xfree (rec->info[recInfo_storeData]);
1676 if (store_data == 1)
1678 rec->size[recInfo_storeData] = recordAttr->recordSize;
1679 rec->info[recInfo_storeData] = (char *)
1680 xmalloc (recordAttr->recordSize);
1682 memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
1684 if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
1686 logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s",
1687 (long) recordOffset, fname);
1690 if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
1691 < recordAttr->recordSize)
1693 logf (LOG_ERRNO|LOG_FATAL, "read %d bytes of %s",
1694 recordAttr->recordSize, fname);
1701 rec->info[recInfo_storeData] = NULL;
1702 rec->size[recInfo_storeData] = 0;
1704 /* update database name */
1705 xfree (rec->info[recInfo_databaseName]);
1706 rec->info[recInfo_databaseName] =
1707 rec_strdup (databaseName, &rec->size[recInfo_databaseName]);
1710 recordAttr->recordOffset = recordOffset;
1712 /* commit this record */
1713 rec_put (zh->service->records, &rec);