2 * Copyright (c) 1995-2000, Index Data.
3 * See the file LICENSE for details.
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.32 2002-07-25 12:52:53 adam
8 * Character set negotiation updates
10 * Revision 1.31 2002/04/04 20:49:46 adam
11 * New functions yaz_is_abspath, yaz_path_fopen_base
13 * Revision 1.30 2000/12/05 19:07:24 adam
14 * Fixed problem with element level in reading of abstract syntax.
16 * Revision 1.29 2000/12/05 14:34:49 adam
17 * Fixed bug with termlists (introduced by previous commit).
19 * Revision 1.28 2000/12/05 12:21:45 adam
20 * Added termlist source for data1 system.
22 * Revision 1.27 1999/12/21 14:16:19 ian
23 * Changed retrieval module to allow data1 trees with no associated absyn.
24 * Also added a simple interface for extracting values from data1 trees using
25 * a string based tagpath.
27 * Revision 1.26 1999/11/30 13:47:12 adam
28 * Improved installation. Moved header files to include/yaz.
30 * Revision 1.25 1999/10/21 12:06:29 adam
31 * Retrieval module no longer uses ctype.h - functions.
33 * Revision 1.24 1999/08/27 09:40:32 adam
34 * Renamed logf function to yaz_log. Removed VC++ project files.
36 * Revision 1.23 1998/10/15 08:29:16 adam
37 * Tag set type may be specified in reference to it using "tagset"
38 * directive in .abs-files and "include" directive in .tag-files.
40 * Revision 1.22 1998/10/13 16:09:47 adam
41 * Added support for arbitrary OID's for tagsets, schemas and attribute sets.
42 * Added support for multiple attribute set references and tagset references
43 * from an abstract syntax file.
44 * Fixed many bad logs-calls in routines that read the various
45 * specifications regarding data1 (*.abs,*.att,...) and made the messages
46 * consistent whenever possible.
47 * Added extra 'lineno' argument to function readconf_line.
49 * Revision 1.21 1998/06/09 13:55:07 adam
52 * Revision 1.20 1998/05/18 13:07:02 adam
53 * Changed the way attribute sets are handled by the retriaval module.
54 * Extended Explain conversion / schema.
55 * Modified server and client to work with ASN.1 compiled protocol handlers.
57 * Revision 1.19 1998/03/05 08:15:32 adam
58 * Implemented data1_add_insert_taggeddata utility which is more flexible
59 * than data1_insert_taggeddata.
61 * Revision 1.18 1998/02/27 14:08:04 adam
62 * Added const to some char pointer arguments.
63 * Reworked data1_read_node so that it doesn't create a tree with
64 * pointers to original "SGML"-buffer.
66 * Revision 1.17 1998/02/11 11:53:34 adam
67 * Changed code so that it compiles as C++.
69 * Revision 1.16 1997/12/18 10:51:30 adam
70 * Implemented sub-trees feature for schemas - including forward
73 * Revision 1.15 1997/12/09 16:18:16 adam
74 * Work on EXPLAIN schema. First implementation of sub-schema facility
77 * Revision 1.14 1997/10/31 12:20:09 adam
78 * Improved memory debugging for xmalloc/nmem.c. References to NMEM
79 * instead of ODR in n ESPEC-1 handling in source d1_espec.c.
80 * Bug fix: missing fclose in data1_read_espec1.
82 * Revision 1.13 1997/10/27 13:54:18 adam
83 * Changed structure field in data1 node to be simple string which
84 * is "unknown" to the retrieval system itself.
86 * Revision 1.12 1997/09/17 12:10:34 adam
89 * Revision 1.11 1997/09/05 09:50:55 adam
90 * Removed global data1_tabpath - uses data1_get_tabpath() instead.
92 * Revision 1.10 1997/05/14 06:54:01 adam
95 * Revision 1.9 1997/02/19 14:46:15 adam
96 * The "all" specifier only affects elements that are indexed (and not
99 * Revision 1.8 1997/01/02 10:47:59 quinn
100 * Added optional, physical ANY
102 * Revision 1.7 1996/06/10 08:56:01 quinn
105 * Revision 1.6 1996/05/31 13:52:21 quinn
106 * Fixed uninitialized variable for local tags in abstract syntax.
108 * Revision 1.5 1996/05/09 07:27:43 quinn
109 * Multiple local attributes values supported.
111 * Revision 1.4 1996/05/01 12:45:28 quinn
112 * Support use of local tag names in abs file.
114 * Revision 1.3 1995/11/01 16:34:55 quinn
115 * Making data1 look for tables in data1_tabpath
117 * Revision 1.2 1995/11/01 13:54:44 quinn
120 * Revision 1.1 1995/11/01 11:56:06 quinn
121 * Added Retrieval (data management) functions en masse.
132 #include <yaz/data1.h>
134 #define D1_MAX_NESTING 128
136 struct data1_absyn_cache_info
140 data1_absyn_cache next;
143 struct data1_attset_cache_info
146 data1_attset *attset;
147 data1_attset_cache next;
150 data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
152 data1_absyn_cache p = *data1_absyn_cache_get (dh);
156 if (!strcmp (name, p->name))
163 void data1_absyn_trav (data1_handle dh, void *handle,
164 void (*fh)(data1_handle dh, void *h, data1_absyn *a))
166 data1_absyn_cache p = *data1_absyn_cache_get (dh);
170 (*fh)(dh, handle, p->absyn);
175 data1_absyn *data1_absyn_add (data1_handle dh, const char *name)
178 NMEM mem = data1_nmem_get (dh);
180 data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p));
181 data1_absyn_cache *pp = data1_absyn_cache_get (dh);
183 sprintf(fname, "%s.abs", name);
184 p->absyn = data1_read_absyn (dh, fname);
185 p->name = nmem_strdup (mem, name);
191 data1_absyn *data1_get_absyn (data1_handle dh, const char *name)
195 if (!(absyn = data1_absyn_search (dh, name)))
196 absyn = data1_absyn_add (dh, name);
200 data1_attset *data1_attset_search_name (data1_handle dh, const char *name)
202 data1_attset_cache p = *data1_attset_cache_get (dh);
206 if (!strcmp (name, p->name))
213 data1_attset *data1_attset_search_id (data1_handle dh, int id)
215 data1_attset_cache p = *data1_attset_cache_get (dh);
219 if (id == p->attset->reference)
226 data1_attset *data1_attset_add (data1_handle dh, const char *name)
228 char fname[512], aname[512];
229 NMEM mem = data1_nmem_get (dh);
230 data1_attset *attset;
232 strcpy (aname, name);
233 sprintf(fname, "%s.att", name);
234 attset = data1_read_attset (dh, fname);
238 attset = data1_read_attset (dh, name);
239 if (attset && (cp = strrchr (aname, '.')))
243 yaz_log (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name);
246 data1_attset_cache p = (data1_attset_cache)
247 nmem_malloc (mem, sizeof(*p));
248 data1_attset_cache *pp = data1_attset_cache_get (dh);
250 attset->name = p->name = nmem_strdup (mem, aname);
258 data1_attset *data1_get_attset (data1_handle dh, const char *name)
260 data1_attset *attset;
262 if (!(attset = data1_attset_search_name (dh, name)))
263 attset = data1_attset_add (dh, name);
267 data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a,
272 for (r = a->esetnames; r; r = r->next)
273 if (!data1_matchstr(r->name, name))
278 data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
279 data1_element *parent,
284 /* It's now possible to have a data1 tree with no abstract syntax */
289 r = abs->main_elements;
291 r = parent->children;
292 assert (abs->main_elements);
293 for (; r; r = r->next)
297 for (n = r->tag->names; n; n = n->next)
298 if (!data1_matchstr(tagname, n->name))
304 data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
309 /* It's now possible to have a data1 tree with no abstract syntax */
313 assert (absyn->main_elements);
314 for (r = absyn->main_elements; r; r = r->next)
315 if (!data1_matchstr(r->name, name))
321 void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e)
323 /* It's now possible to have a data1 tree with no abstract syntax */
327 for (; e; e = e->next)
332 fix_element_ref (dh, absyn, e->children);
336 data1_sub_elements *sub_e = absyn->sub_elements;
337 while (sub_e && strcmp (e->sub_name, sub_e->name))
340 e->children = sub_e->elements;
342 yaz_log (LOG_WARN, "Unresolved reference to sub-elements %s",
349 static int parse_termlists (data1_handle dh, data1_termlist ***tpp,
350 char *p, const char *file, int lineno,
351 const char *element_name, data1_absyn *res)
353 data1_termlist **tp = *tpp;
356 char attname[512], structure[512];
360 if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
364 "%s:%d: Syntax error in termlistspec '%s'",
373 strcpy(attname, element_name);
374 *tp = (data1_termlist *)
375 nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
377 if (!((*tp)->att = data1_getattbyname(dh, res->attset,
381 "%s:%d: Couldn't find att '%s' in attset",
382 file, lineno, attname);
389 if (r == 2 && (source = strchr(structure, ':')))
390 *source++ = '\0'; /* cut off structure .. */
392 source = "data"; /* ok: default is leaf data */
393 (*tp)->source = (char *)
394 nmem_strdup (data1_nmem_get (dh), source);
396 if (r < 2) /* is the structure qualified? */
397 (*tp)->structure = "w";
399 (*tp)->structure = (char *)
400 nmem_strdup (data1_nmem_get (dh), structure);
403 while ((p = strchr(p, ',')) && *(++p));
408 data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
410 data1_sub_elements *cur_elements = NULL;
411 data1_absyn *res = 0;
413 data1_element **ppl[D1_MAX_NESTING];
414 data1_esetname **esetpp;
415 data1_maptab **maptabp;
416 data1_marctab **marcp;
417 data1_termlist *all = 0;
418 data1_attset_child **attset_childp;
419 data1_tagset **tagset_childp;
423 char *argv[50], line[512];
425 if (!(f = data1_path_fopen(dh, file, "r")))
427 yaz_log(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file);
431 res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res));
433 res->reference = VAL_NONE;
436 tagset_childp = &res->tagset;
438 res->attset = data1_empty_attset (dh);
439 attset_childp = &res->attset->children;
443 esetpp = &res->esetnames;
445 maptabp = &res->maptabs;
449 res->sub_elements = NULL;
450 res->main_elements = NULL;
452 while ((argc = readconf_line(f, &lineno, line, 512, argv, 50)))
455 if (!strcmp(cmd, "elm"))
457 data1_element *new_element;
459 char *p, *sub_p, *path, *name, *termlists;
465 yaz_log(LOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
474 cur_elements = (data1_sub_elements *)
475 nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
476 cur_elements->next = res->sub_elements;
477 cur_elements->elements = NULL;
478 cur_elements->name = "main";
479 res->sub_elements = cur_elements;
482 ppl[level] = &cur_elements->elements;
489 if ((e = strchr(p, '/')))
496 yaz_log(LOG_WARN, "%s:%d: Bad level increase", file, lineno);
501 new_element = *ppl[level-1] = (data1_element *)
502 nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
503 new_element->next = new_element->children = 0;
504 new_element->tag = 0;
505 new_element->termlists = 0;
506 new_element->sub_name = 0;
508 tp = &new_element->termlists;
509 ppl[level-1] = &new_element->next;
510 ppl[level] = &new_element->children;
512 /* consider subtree (if any) ... */
513 if ((sub_p = strchr (p, ':')) && sub_p[1])
516 new_element->sub_name =
517 nmem_strdup (data1_nmem_get(dh), sub_p);
519 /* well-defined tag */
520 if (sscanf(p, "(%d,%d)", &type, &value) == 2)
524 yaz_log(LOG_WARN, "%s:%d: No tagset loaded", file, lineno);
528 if (!(new_element->tag = data1_gettagbynum (dh, res->tagset,
531 yaz_log(LOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
541 new_element->tag = (data1_tag *)
542 nmem_malloc(data1_nmem_get (dh),
543 sizeof(*new_element->tag));
544 nt->which = DATA1T_string;
545 nt->value.string = nmem_strdup(data1_nmem_get (dh), p);
546 nt->names = (data1_name *)
547 nmem_malloc(data1_nmem_get(dh),
548 sizeof(*new_element->tag->names));
549 nt->names->name = nt->value.string;
551 nt->kind = DATA1K_string;
557 yaz_log(LOG_WARN, "%s:%d: Bad element", file, lineno);
561 /* parse termList definitions */
565 assert (res->attset);
567 if (parse_termlists (dh, &tp, p, file, lineno, name, res))
572 *tp = all; /* append any ALL entries to the list */
574 new_element->name = nmem_strdup(data1_nmem_get (dh), name);
576 else if (!strcmp(cmd, "section"))
582 yaz_log(LOG_WARN, "%s:%d: Bad # of args to section",
588 cur_elements = (data1_sub_elements *)
589 nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
590 cur_elements->next = res->sub_elements;
591 cur_elements->elements = NULL;
592 cur_elements->name = nmem_strdup (data1_nmem_get(dh), name);
593 res->sub_elements = cur_elements;
596 ppl[level] = &cur_elements->elements;
598 else if (!strcmp(cmd, "all"))
600 data1_termlist **tp = &all;
603 yaz_log(LOG_WARN, "%s:%d: Too many 'all' directives - ignored",
609 yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'all' directive",
613 if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res))
619 else if (!strcmp(cmd, "name"))
623 yaz_log(LOG_WARN, "%s:%d: Bad # of args to name directive",
627 res->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
629 else if (!strcmp(cmd, "reference"))
635 yaz_log(LOG_WARN, "%s:%d: Bad # of args to reference",
640 if ((res->reference = oid_getvalbyname(name)) == VAL_NONE)
642 yaz_log(LOG_WARN, "%s:%d: Unknown tagset ref '%s'",
647 else if (!strcmp(cmd, "attset"))
650 data1_attset *attset;
654 yaz_log(LOG_WARN, "%s:%d: Bad # of args to attset",
659 if (!(attset = data1_get_attset (dh, name)))
661 yaz_log(LOG_WARN, "%s:%d: Couldn't find attset %s",
665 *attset_childp = (data1_attset_child *)
666 nmem_malloc (data1_nmem_get(dh), sizeof(**attset_childp));
667 (*attset_childp)->child = attset;
668 (*attset_childp)->next = 0;
669 attset_childp = &(*attset_childp)->next;
671 else if (!strcmp(cmd, "tagset"))
677 yaz_log(LOG_WARN, "%s:%d: Bad # of args to tagset",
683 type = atoi(argv[2]);
684 *tagset_childp = data1_read_tagset (dh, name, type);
685 if (!(*tagset_childp))
687 yaz_log(LOG_WARN, "%s:%d: Couldn't load tagset %s",
691 tagset_childp = &(*tagset_childp)->next;
693 else if (!strcmp(cmd, "varset"))
699 yaz_log(LOG_WARN, "%s:%d: Bad # of args in varset",
704 if (!(res->varset = data1_read_varset (dh, name)))
706 yaz_log(LOG_WARN, "%s:%d: Couldn't load Varset %s",
711 else if (!strcmp(cmd, "esetname"))
717 yaz_log(LOG_WARN, "%s:%d: Bad # of args in esetname",
724 *esetpp = (data1_esetname *)
725 nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
726 (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
730 else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname)))
732 yaz_log(LOG_WARN, "%s:%d: Espec-1 read failed for %s",
733 file, lineno, fname);
736 esetpp = &(*esetpp)->next;
738 else if (!strcmp(cmd, "maptab"))
744 yaz_log(LOG_WARN, "%s:%d: Bad # of args for maptab",
749 if (!(*maptabp = data1_read_maptab (dh, name)))
751 yaz_log(LOG_WARN, "%s:%d: Couldn't load maptab %s",
755 maptabp = &(*maptabp)->next;
757 else if (!strcmp(cmd, "marc"))
763 yaz_log(LOG_WARN, "%s:%d: Bad # or args for marc",
768 if (!(*marcp = data1_read_marctab (dh, name)))
770 yaz_log(LOG_WARN, "%s:%d: Couldn't read marctab %s",
774 marcp = &(*marcp)->next;
776 else if (!strcmp(cmd, "encoding"))
780 yaz_log(LOG_WARN, "%s:%d: Bad # or args for encoding",
784 res->encoding = nmem_strdup (data1_nmem_get(dh), argv[1]);
788 yaz_log(LOG_WARN, "%s:%d: Unknown directive '%s'", file, lineno, cmd);
794 for (cur_elements = res->sub_elements; cur_elements;
795 cur_elements = cur_elements->next)
797 if (!strcmp (cur_elements->name, "main"))
798 res->main_elements = cur_elements->elements;
799 fix_element_ref (dh, res, cur_elements->elements);
801 yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file);