1 /* $Id: recgrs.c,v 1.103 2005-06-29 16:52:27 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
223 wrd->term_buf = nmem_malloc(sp->nmem, sp->len);
224 memcpy(wrd->term_buf, sp->tok, sp->len);
225 wrd->term_len = sp->len;
228 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
230 wrd->term_len = sp->len - 2;
231 wrd->term_buf = nmem_malloc(sp->nmem, wrd->term_len);
232 memcpy(wrd->term_buf, sp->tok+1, wrd->term_len);
244 static struct source_parser *source_parser_create()
246 struct source_parser *sp = xmalloc(sizeof(*sp));
248 sp->nmem = nmem_create();
252 static void source_parser_destroy(struct source_parser *sp)
256 nmem_destroy(sp->nmem);
260 static int sp_parse(struct source_parser *sp,
261 data1_node *n, RecWord *wrd, const char *src)
267 nmem_reset(sp->nmem);
270 return sp_expr(sp, n, wrd);
273 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
282 if (p->which == XPATH_PREDICATE_RELATION) {
283 if (p->u.relation.name[0]) {
284 if (*p->u.relation.name != '@') {
286 " Only attributes (@) are supported in xelm xpath predicates");
287 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
290 attname = p->u.relation.name + 1;
292 /* looking for the attribute with a specified name */
293 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
294 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
296 if (!strcmp(attr->name, attname)) {
297 if (p->u.relation.op[0]) {
298 if (*p->u.relation.op != '=') {
300 "Only '=' relation is supported (%s)",p->u.relation.op);
301 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
304 yaz_log(YLOG_DEBUG," - value %s <-> %s",
305 p->u.relation.value, attr->value );
306 if (!strcmp(attr->value, p->u.relation.value)) {
311 /* attribute exists, no value specified */
316 yaz_log(YLOG_DEBUG, "return %d", res);
322 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
323 if (!strcmp(p->u.boolean.op,"and")) {
324 return d1_check_xpath_predicate(n, p->u.boolean.left)
325 && d1_check_xpath_predicate(n, p->u.boolean.right);
327 else if (!strcmp(p->u.boolean.op,"or")) {
328 return (d1_check_xpath_predicate(n, p->u.boolean.left)
329 || d1_check_xpath_predicate(n, p->u.boolean.right));
331 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
340 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
342 struct DFA_state *s = dfaar[0]; /* start state */
345 const char *p = text;
348 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
349 if (c >= t->ch[0] && c <= t->ch[1])
353 /* move to next state and return if we get a match */
359 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
360 if (c >= t->ch[0] && c <= t->ch[1])
369 New function, looking for xpath "element" definitions in abs, by
370 tagpath, using a kind of ugly regxp search.The DFA was built while
371 parsing abs, so here we just go trough them and try to match
372 against the given tagpath. The first matching entry is returned.
376 Added support for enhanced xelm. Now [] predicates are considered
377 as well, when selecting indexing rules... (why the hell it's called
384 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
386 data1_absyn *abs = n->root->u.root.absyn;
387 data1_xpelement *xpe = abs->xp_elements;
390 struct xpath_location_step *xp;
392 char *pexpr = xmalloc(strlen(tagpath)+5);
395 sprintf (pexpr, "/%s\n", tagpath);
396 yaz_log(YLOG_LOG, "Checking tagpath %s", pexpr);
397 for (; xpe; xpe = xpe->next)
400 ok = dfa_match_first(xpe->dfa->states, pexpr);
402 yaz_log(YLOG_LOG, " xpath got match %s",xpe->xpath_expr);
404 yaz_log(YLOG_LOG, " xpath no match %s",xpe->xpath_expr);
408 /* we have to check the perdicates up to the root node */
411 /* find the first tag up in the node structure */
412 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
415 /* go from inside out in the node structure, while going
416 backwards trough xpath location steps ... */
417 for (i = xpe->xpath_len - 1; i>0; i--)
419 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
420 i, xp[i].part, nn->u.tag.tag);
422 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
424 yaz_log(YLOG_DEBUG, " Predicates didn't match");
429 if (nn->which == DATA1N_tag)
441 yaz_log(YLOG_DEBUG, "Got it");
442 return xpe->termlists;
449 1 start element (tag)
451 3 start attr (and attr-exact)
459 Now, if there is a matching xelm described in abs, for the
460 indexed element or the attribute, then the data is handled according
461 to those definitions...
463 modified by pop, 2002-12-13
466 /* add xpath index for an attribute */
467 static void index_xpath_attr (char *tag_path, char *name, char *value,
468 char *structure, struct recExtractCtrl *p,
472 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
474 wrd->attrSet = VAL_IDXPATH;
477 wrd->index_type = '0';
478 wrd->term_buf = tag_path;
479 wrd->term_len = strlen(tag_path);
484 wrd->index_name = ZEBRA_XPATH_ATTR;
488 wrd->index_type = 'w';
489 wrd->term_buf = value;
490 wrd->term_len = strlen(value);
494 wrd->index_name = ZEBRA_XPATH_ELM_END;
498 wrd->index_type = '0';
499 wrd->term_buf = tag_path;
500 wrd->term_len = strlen(tag_path);
505 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
510 /* we have to fetch the whole path to the data tag */
511 for (nn = n; nn; nn = nn->parent)
513 if (nn->which == DATA1N_tag)
515 size_t tlen = strlen(nn->u.tag.tag);
516 if (tlen + flen > (max - 2))
518 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
520 tag_path_full[flen++] = '/';
523 if (nn->which == DATA1N_root)
526 tag_path_full[flen] = 0;
527 yaz_log(YLOG_LOG, "mk_tag_path_full=%s", tag_path_full);
531 static void index_xpath(struct source_parser *sp, data1_node *n,
532 struct recExtractCtrl *p,
533 int level, RecWord *wrd,
543 char tag_path_full[1024];
544 int termlist_only = 1;
549 int xpath_is_start = 0;
555 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
558 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
560 if ((!n->root->u.root.absyn) ||
561 (n->root->u.root.absyn->enable_xpath_indexing)) {
568 wrd->term_buf = n->u.data.data;
569 wrd->term_len = n->u.data.len;
572 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
574 /* If we have a matching termlist... */
575 if (n->root->u.root.absyn &&
576 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
578 for (; tl; tl = tl->next)
580 /* need to copy recword because it may be changed */
582 wrd->index_type = *tl->structure;
583 memcpy (&wrd_tl, wrd, sizeof(*wrd));
585 sp_parse(sp, n, &wrd_tl, tl->source);
592 /* this is the ! case, so structure is for the xpath index */
594 wrd_tl.index_name = xpath_index;
596 wrd_tl.attrSet = VAL_IDXPATH;
597 wrd_tl.attrUse = use;
599 if (p->flagShowRecords)
602 printf("%*sXPath index", (level + 1) * 4, "");
603 printf (" XData:\"");
604 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
605 fputc (wrd_tl.term_buf[i], stdout);
607 if (wrd_tl.term_len > 40)
609 fputc ('\n', stdout);
612 (*p->tokenAdd)(&wrd_tl);
615 /* this is just the old fashioned attribute based index */
617 wrd_tl.index_name = tl->index_name;
619 wrd_tl.attrSet = (int) (tl->att->parent->reference);
620 wrd_tl.attrUse = tl->att->locals->local;
622 if (p->flagShowRecords)
625 printf("%*sIdx: [%s]", (level + 1) * 4, "",
628 printf("%s %s", tl->index_name, tl->source);
630 printf("%s:%s [%d] %s",
631 tl->att->parent->name,
632 tl->att->name, tl->att->value,
635 printf (" XData:\"");
636 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
637 fputc (wrd_tl.term_buf[i], stdout);
639 if (wrd_tl.term_len > 40)
641 fputc ('\n', stdout);
644 (*p->tokenAdd)(&wrd_tl);
648 /* xpath indexing is done, if there was no termlist given,
649 or no ! in the termlist, and default indexing is enabled... */
650 if (!p->flagShowRecords && !xpdone && !termlist_only)
653 wrd->index_name = xpath_index;
655 wrd->attrSet = VAL_IDXPATH;
658 wrd->index_type = 'w';
663 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
665 wrd->index_type = '0';
666 wrd->term_buf = tag_path_full;
667 wrd->term_len = strlen(tag_path_full);
669 wrd->index_name = xpath_index;
671 wrd->attrSet = VAL_IDXPATH;
674 if (p->flagShowRecords)
676 printf("%*s tag=", (level + 1) * 4, "");
677 for (i = 0; i<wrd->term_len && i < 40; i++)
678 fputc (wrd->term_buf[i], stdout);
689 /* Add tag start/end xpath index, only when there is a ! in
690 the apropriate xelm directive, or default xpath indexing
693 if (!(do_xpindex = 1 - termlist_only))
695 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
697 for (; tl; tl = tl->next)
709 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
712 if (xpath_is_start == 1) /* only for the starting tag... */
714 #define MAX_ATTR_COUNT 50
715 data1_termlist *tll[MAX_ATTR_COUNT];
719 /* get termlists for attributes, and find out, if we have to do xpath indexing */
720 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
725 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
727 int do_xpindex = 1 - termlist_only;
729 char attr_tag_path_full[1024];
731 /* this could be cached as well */
732 sprintf (attr_tag_path_full, "@%s/%s",
733 xp->name, tag_path_full);
735 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
737 /* if there is a ! in the xelm termlist, or default indexing is on,
738 proceed with xpath idx */
741 for (; tl; tl = tl->next)
755 /* attribute (no value) */
756 wrd->index_type = '0';
758 wrd->index_name = ZEBRA_XPATH_ATTR;
762 wrd->term_buf = xp->name;
763 wrd->term_len = strlen(xp->name);
769 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
771 /* attribute value exact */
772 strcpy (comb, xp->name);
774 strcat (comb, xp->value);
777 wrd->index_name = ZEBRA_XPATH_ATTR;
781 wrd->index_type = '0';
782 wrd->term_buf = comb;
783 wrd->term_len = strlen(comb);
793 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
795 char attr_tag_path_full[1024];
798 sprintf (attr_tag_path_full, "@%s/%s",
799 xp->name, tag_path_full);
803 /* If there is a termlist given (=xelm directive) */
804 for (; tl; tl = tl->next)
812 /* add xpath index for the attribute */
813 index_xpath_attr (attr_tag_path_full, xp->name,
814 xp->value, tl->structure,
818 /* add attribute based index for the attribute */
822 wrd->index_name = tl->index_name;
825 (tl->att->parent->reference);
826 wrd->attrUse = tl->att->locals->local;
828 wrd->index_type = *tl->structure;
829 wrd->term_buf = xp->value;
830 wrd->term_len = strlen(xp->value);
836 /* if there was no termlist for the given path,
837 or the termlist didn't have a ! element, index
838 the attribute as "w" */
839 if ((!xpdone) && (!termlist_only))
841 index_xpath_attr (attr_tag_path_full, xp->name,
842 xp->value, "w", p, wrd);
851 static void index_termlist (struct source_parser *sp, data1_node *par,
853 struct recExtractCtrl *p, int level, RecWord *wrd)
855 data1_termlist *tlist = 0;
856 data1_datatype dtype = DATA1K_string;
859 * cycle up towards the root until we find a tag with an att..
860 * this has the effect of indexing locally defined tags with
861 * the attribute of their ancestor in the record.
864 while (!par->u.tag.element)
865 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
867 if (!par || !(tlist = par->u.tag.element->termlists))
869 if (par->u.tag.element->tag)
870 dtype = par->u.tag.element->tag->kind;
872 for (; tlist; tlist = tlist->next)
874 /* consider source */
876 assert(tlist->source);
877 sp_parse(sp, n, wrd, tlist->source);
879 if (wrd->term_buf && wrd->term_len)
881 if (p->flagShowRecords)
884 printf("%*sIdx: [%s]", (level + 1) * 4, "",
887 printf("%s %s", tlist->index_name, tlist->source);
889 printf("%s:%s [%d] %s",
890 tlist->att->parent->name,
891 tlist->att->name, tlist->att->value,
894 printf (" XData:\"");
895 for (i = 0; i<wrd->term_len && i < 40; i++)
896 fputc (wrd->term_buf[i], stdout);
898 if (wrd->term_len > 40)
900 fputc ('\n', stdout);
904 wrd->index_type = *tlist->structure;
906 wrd->index_name = tlist->index_name;
908 wrd->attrSet = (int) (tlist->att->parent->reference);
909 wrd->attrUse = tlist->att->locals->local;
917 static int dumpkeys_r(struct source_parser *sp,
918 data1_node *n, struct recExtractCtrl *p, int level,
921 for (; n; n = n->next)
923 if (p->flagShowRecords) /* display element description to user */
925 if (n->which == DATA1N_root)
927 printf("%*s", level * 4, "");
928 printf("Record type: '%s'\n", n->u.root.type);
930 else if (n->which == DATA1N_tag)
934 printf("%*s", level * 4, "");
935 if (!(e = n->u.tag.element))
936 printf("Local tag: '%s'\n", n->u.tag.tag);
939 printf("Elm: '%s' ", e->name);
942 data1_tag *t = e->tag;
944 printf("TagNam: '%s' ", t->names->name);
947 printf("%s[%d],", t->tagset->name, t->tagset->type);
950 if (t->which == DATA1T_numeric)
951 printf("%d)", t->value.numeric);
953 printf("'%s')", t->value.string);
960 if (n->which == DATA1N_tag)
962 index_termlist(sp, n, n, p, level, wrd);
963 /* index start tag */
965 if (n->root->u.root.absyn)
966 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
969 if (n->root->u.root.absyn)
970 index_xpath(sp, n, p, level, wrd, 1);
975 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
979 if (n->which == DATA1N_data)
981 data1_node *par = get_parent_tag(p->dh, n);
983 if (p->flagShowRecords)
985 printf("%*s", level * 4, "");
987 if (n->u.data.len > 256)
988 printf("'%.170s ... %.70s'\n", n->u.data.data,
989 n->u.data.data + n->u.data.len-70);
990 else if (n->u.data.len > 0)
991 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
997 index_termlist(sp, par, n, p, level, wrd);
1000 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
1003 index_xpath(sp, n, p, level, wrd, 1016);
1007 if (n->which == DATA1N_tag)
1011 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
1014 index_xpath(sp, n, p, level, wrd, 2);
1018 if (p->flagShowRecords && n->which == DATA1N_root)
1020 printf("%*s-------------\n\n", level * 4, "");
1026 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
1028 struct source_parser *sp = source_parser_create();
1029 int r = dumpkeys_r(sp, n, p, 0, wrd);
1030 source_parser_destroy(sp);
1034 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1037 int oidtmp[OID_SIZE];
1040 oe.proto = PROTO_Z3950;
1041 oe.oclass = CLASS_SCHEMA;
1042 if (n->u.root.absyn)
1044 oe.value = n->u.root.absyn->reference;
1046 if ((oid_ent_to_oid (&oe, oidtmp)))
1047 (*p->schemaAdd)(p, oidtmp);
1049 (*p->init)(p, &wrd);
1051 return dumpkeys(n, p, &wrd);
1054 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
1056 data1_node *(*grs_read)(struct grs_read_info *))
1059 struct grs_read_info gri;
1061 int oidtmp[OID_SIZE];
1064 gri.readf = p->readf;
1065 gri.seekf = p->seekf;
1066 gri.tellf = p->tellf;
1069 gri.offset = p->offset;
1072 gri.clientData = clientData;
1074 n = (*grs_read)(&gri);
1076 return RECCTRL_EXTRACT_EOF;
1077 oe.proto = PROTO_Z3950;
1078 oe.oclass = CLASS_SCHEMA;
1080 if (!n->u.root.absyn)
1081 return RECCTRL_EXTRACT_ERROR;
1083 if (n->u.root.absyn)
1085 oe.value = n->u.root.absyn->reference;
1086 if ((oid_ent_to_oid (&oe, oidtmp)))
1087 (*p->schemaAdd)(p, oidtmp);
1089 data1_concat_text(p->dh, mem, n);
1091 /* ensure our data1 tree is UTF-8 */
1092 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1095 data1_pr_tree (p->dh, n, stdout);
1098 (*p->init)(p, &wrd);
1099 if (dumpkeys(n, p, &wrd) < 0)
1101 data1_free_tree(p->dh, n);
1102 return RECCTRL_EXTRACT_ERROR_GENERIC;
1104 data1_free_tree(p->dh, n);
1105 return RECCTRL_EXTRACT_OK;
1108 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1109 data1_node *(*grs_read)(struct grs_read_info *))
1112 NMEM mem = nmem_create ();
1113 ret = grs_extract_sub(clientData, p, mem, grs_read);
1119 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1121 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1122 char **addinfo, ODR o)
1124 data1_esetname *eset;
1125 Z_Espec1 *espec = 0;
1130 case Z_RecordComp_simple:
1131 if (c->u.simple->which != Z_ElementSetNames_generic)
1132 return 26; /* only generic form supported. Fix this later */
1133 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1134 c->u.simple->u.generic)))
1136 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1137 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1138 return 25; /* invalid esetname */
1140 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1141 c->u.simple->u.generic);
1144 case Z_RecordComp_complex:
1145 if (c->u.complex->generic)
1147 /* insert check for schema */
1148 if ((p = c->u.complex->generic->elementSpec))
1152 case Z_ElementSpec_elementSetName:
1154 data1_getesetbyname(dh, n->u.root.absyn,
1155 p->u.elementSetName)))
1157 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1158 p->u.elementSetName);
1159 *addinfo = odr_strdup(o, p->u.elementSetName);
1160 return 25; /* invalid esetname */
1162 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1163 p->u.elementSetName);
1166 case Z_ElementSpec_externalSpec:
1167 if (p->u.externalSpec->which == Z_External_espec1)
1169 yaz_log(YLOG_DEBUG, "Got Espec-1");
1170 espec = p->u.externalSpec-> u.espec1;
1174 yaz_log(YLOG_LOG, "Unknown external espec.");
1175 return 25; /* bad. what is proper diagnostic? */
1182 return 26; /* fix */
1186 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1187 return data1_doespec1(dh, n, espec);
1191 yaz_log(YLOG_DEBUG, "Element: all match");
1196 /* Add Zebra info in separate namespace ...
1199 <metadata xmlns="http://www.indexdata.dk/zebra/">
1201 <localnumber>447</localnumber>
1202 <filename>records/genera.xml</filename>
1207 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1210 const char *idzebra_ns[3];
1211 const char *i2 = "\n ";
1212 const char *i4 = "\n ";
1215 idzebra_ns[0] = "xmlns";
1216 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1219 data1_mk_text (p->dh, mem, i2, top);
1221 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1223 data1_mk_text (p->dh, mem, "\n", top);
1225 data1_mk_text (p->dh, mem, i4, n);
1227 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1231 data1_mk_text (p->dh, mem, i4, n);
1232 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1234 data1_mk_text (p->dh, mem, i4, n);
1235 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1238 data1_mk_text (p->dh, mem, i4, n);
1239 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1241 data1_mk_text (p->dh, mem, i2, n);
1244 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1245 data1_node *(*grs_read)(struct grs_read_info *))
1247 data1_node *node = 0, *onode = 0, *top;
1250 int res, selected = 0;
1252 struct grs_read_info gri;
1253 const char *tagname;
1255 int requested_schema = VAL_NONE;
1256 data1_marctab *marctab;
1259 mem = nmem_create();
1260 gri.readf = p->readf;
1261 gri.seekf = p->seekf;
1262 gri.tellf = p->tellf;
1268 gri.clientData = clientData;
1270 yaz_log(YLOG_DEBUG, "grs_retrieve");
1271 node = (*grs_read)(&gri);
1278 data1_concat_text(p->dh, mem, node);
1281 data1_pr_tree (p->dh, node, stdout);
1283 top = data1_get_root_tag (p->dh, node);
1285 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1286 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1288 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1290 dnew->u.data.what = DATA1I_text;
1291 dnew->u.data.data = dnew->lbuf;
1292 sprintf(dnew->u.data.data, "%d", p->recordSize);
1293 dnew->u.data.len = strlen(dnew->u.data.data);
1296 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1297 if (tagname && p->score >= 0 &&
1298 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1300 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1301 dnew->u.data.what = DATA1I_num;
1302 dnew->u.data.data = dnew->lbuf;
1303 sprintf(dnew->u.data.data, "%d", p->score);
1304 dnew->u.data.len = strlen(dnew->u.data.data);
1307 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1308 "localControlNumber");
1309 if (tagname && p->localno > 0 &&
1310 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1312 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1313 dnew->u.data.what = DATA1I_text;
1314 dnew->u.data.data = dnew->lbuf;
1316 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1317 dnew->u.data.len = strlen(dnew->u.data.data);
1320 if (p->input_format == VAL_TEXT_XML)
1321 zebra_xml_metadata (p, top, mem);
1324 data1_pr_tree (p->dh, node, stdout);
1326 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1327 p->comp->u.complex->generic &&
1328 p->comp->u.complex->generic->which == Z_Schema_oid &&
1329 p->comp->u.complex->generic->schema.oid)
1331 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1333 requested_schema = oe->value;
1335 /* If schema has been specified, map if possible, then check that
1336 * we got the right one
1338 if (requested_schema != VAL_NONE)
1340 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1341 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1343 if (map->target_absyn_ref == requested_schema)
1346 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1355 if (node->u.root.absyn &&
1356 requested_schema != node->u.root.absyn->reference)
1358 p->diagnostic = 238;
1364 * Does the requested format match a known syntax-mapping? (this reflects
1365 * the overlap of schema and formatting which is inherent in the MARC
1368 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1369 if (node->u.root.absyn)
1370 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1372 if (map->target_absyn_ref == p->input_format)
1375 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1384 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1385 if (node->u.root.absyn &&
1386 node->u.root.absyn->reference != VAL_NONE &&
1387 p->input_format == VAL_GRS1)
1391 int oidtmp[OID_SIZE];
1393 oe.proto = PROTO_Z3950;
1394 oe.oclass = CLASS_SCHEMA;
1395 oe.value = node->u.root.absyn->reference;
1397 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1400 data1_handle dh = p->dh;
1404 for (ii = oid; *ii >= 0; ii++)
1408 sprintf(p, "%d", *ii);
1411 if ((dnew = data1_mk_tag_data_wd(dh, top,
1412 "schemaIdentifier", mem)))
1414 dnew->u.data.what = DATA1I_oid;
1415 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1416 memcpy(dnew->u.data.data, tmp, p - tmp);
1417 dnew->u.data.len = p - tmp;
1422 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1423 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1426 p->diagnostic = res;
1428 data1_free_tree(p->dh, onode);
1429 data1_free_tree(p->dh, node);
1433 else if (p->comp && !res)
1437 data1_pr_tree (p->dh, node, stdout);
1439 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1440 switch (p->output_format = (p->input_format != VAL_NONE ?
1441 p->input_format : VAL_SUTRS))
1445 data1_pr_tree (p->dh, node, stdout);
1447 /* default output encoding for XML is UTF-8 */
1448 data1_iconv (p->dh, mem, node,
1449 p->encoding ? p->encoding : "UTF-8",
1450 data1_get_encoding(p->dh, node));
1452 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1454 p->diagnostic = 238;
1457 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1458 memcpy (new_buf, p->rec_buf, p->rec_len);
1459 p->rec_buf = new_buf;
1463 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1465 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1467 p->diagnostic = 238; /* not available in requested syntax */
1469 p->rec_len = (size_t) (-1);
1472 /* ensure our data1 tree is UTF-8 */
1473 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1475 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1477 p->diagnostic = 238;
1479 p->rec_len = (size_t) (-1);
1482 /* ensure our data1 tree is UTF-8 */
1483 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1484 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1486 p->diagnostic = 238;
1488 p->rec_len = (size_t) (-1);
1492 data1_iconv (p->dh, mem, node, p->encoding,
1493 data1_get_encoding(p->dh, node));
1494 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1496 p->diagnostic = 238;
1499 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1500 memcpy (new_buf, p->rec_buf, p->rec_len);
1501 p->rec_buf = new_buf;
1506 data1_iconv (p->dh, mem, node, p->encoding,
1507 data1_get_encoding(p->dh, node));
1508 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1510 p->diagnostic = 238;
1513 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1514 memcpy (new_buf, p->rec_buf, p->rec_len);
1515 p->rec_buf = new_buf;
1519 if (!node->u.root.absyn)
1521 p->diagnostic = 238;
1524 for (marctab = node->u.root.absyn->marc; marctab;
1525 marctab = marctab->next)
1526 if (marctab->reference == p->input_format)
1530 p->diagnostic = 238;
1534 data1_iconv (p->dh, mem, node, p->encoding,
1535 data1_get_encoding(p->dh, node));
1536 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1537 selected, &p->rec_len)))
1538 p->diagnostic = 238;
1541 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1542 memcpy (new_buf, p->rec_buf, p->rec_len);
1543 p->rec_buf = new_buf;
1547 data1_free_tree(p->dh, node);
1549 data1_free_tree(p->dh, onode);