1 /* $Id: recgrs.c,v 1.86.2.12 2006-09-29 10:02:44 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
40 struct RecWord_entry **entries;
45 struct RecWord_entry {
47 struct RecWord_entry *next;
50 struct RecWord_list *RecWord_list_create(const char *name)
52 NMEM m = nmem_create();
53 struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
58 p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
59 for (i = 0; i<p->hash_size; i++)
61 p->name = nmem_strdup(m, name);
65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
67 struct RecWord_entry *e;
70 (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
72 for (e = l->entries[hash]; e; e = e->next)
73 if (e->w.attrSet == wrd->attrSet &&
74 e->w.attrUse == wrd->attrUse &&
75 e->w.reg_type == wrd->reg_type &&
76 e->w.length == wrd->length &&
77 !memcmp(e->w.string, wrd->string, wrd->length))
80 fprintf(stderr, "DUP key found in %s\n", l->name);
81 fprintf(stderr, "set=%d use=%d regtype=%c\n",
82 wrd->attrSet, wrd->attrUse, wrd->reg_type);
86 e = nmem_malloc(l->nmem, sizeof(*e));
87 e->next = l->entries[hash];
89 memcpy(&e->w, wrd, sizeof(*wrd));
90 e->w.string = nmem_malloc(l->nmem, wrd->length);
91 memcpy(e->w.string, wrd->string, wrd->length);
95 void RecWord_list_destroy(struct RecWord_list *l)
98 nmem_destroy(l->nmem);
106 struct grs_handler *next;
109 struct grs_handlers {
110 struct grs_handler *handlers;
113 static int read_grs_type (struct grs_handlers *h,
114 struct grs_read_info *p, const char *type,
117 struct grs_handler *gh = h->handlers;
118 const char *cp = strchr (type, '.');
120 if (cp == NULL || cp == type)
122 cp = strlen(type) + type;
126 strcpy (p->type, cp+1);
127 for (gh = h->handlers; gh; gh = gh->next)
129 if (!memcmp (type, gh->type->type, cp-type) &&
130 gh->type->type[cp-type] == '\0')
135 gh->clientData = (*gh->type->init)();
137 p->clientData = gh->clientData;
138 *root = (gh->type->read)(p);
139 gh->clientData = p->clientData;
146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
148 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
149 gh->next = h->handlers;
156 static void *grs_init(RecType recType)
158 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
161 grs_add_handler (h, recTypeGrs_sgml);
162 grs_add_handler (h, recTypeGrs_regx);
164 grs_add_handler (h, recTypeGrs_tcl);
166 grs_add_handler (h, recTypeGrs_marc);
167 grs_add_handler (h, recTypeGrs_marcxml);
169 grs_add_handler (h, recTypeGrs_xml);
172 grs_add_handler (h, recTypeGrs_perl);
174 grs_add_handler (h, recTypeGrs_danbib);
178 static void grs_destroy(void *clientData)
180 struct grs_handlers *h = (struct grs_handlers *) clientData;
181 struct grs_handler *gh = h->handlers, *gh_next;
186 (*gh->type->destroy)(gh->clientData);
193 struct source_parser {
200 static int sp_lex(struct source_parser *sp)
202 while (*sp->src == ' ')
206 while (*sp->src && !strchr("<>();,-: ", *sp->src))
215 sp->lookahead = *sp->src;
219 return sp->lookahead;
223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
225 if (sp->lookahead != 't')
227 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
229 if (n->which == DATA1N_data)
231 wrd->string = n->u.data.data;
232 wrd->length = n->u.data.len;
236 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
238 if (n->which == DATA1N_tag)
240 wrd->string = n->u.tag.tag;
241 wrd->length = strlen(n->u.tag.tag);
245 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
248 if (sp->lookahead != '(')
251 if (sp->lookahead != 't')
254 if (n->which == DATA1N_tag)
256 data1_xattr *p = n->u.tag.attributes;
257 while (p && strlen(p->name) != sp->len &&
258 memcmp (p->name, sp->tok, sp->len))
262 wrd->string = p->value;
263 wrd->length = strlen(p->value);
267 if (sp->lookahead != ')')
271 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
275 if (sp->lookahead != '(')
280 if (sp->lookahead != ',')
284 if (sp->lookahead != 't')
286 start = atoi_n(sp->tok, sp->len);
289 if (sp->lookahead != ',')
293 if (sp->lookahead != 't')
295 len = atoi_n(sp->tok, sp->len);
298 if (sp->lookahead != ')')
302 if (wrd->string && wrd->length)
304 wrd->string += start;
305 wrd->length -= start;
306 if (wrd->length > len)
313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
315 struct source_parser sp;
322 return sp_expr(&sp, n, wrd);
325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
334 if (p->which == XPATH_PREDICATE_RELATION) {
335 if (p->u.relation.name[0]) {
336 if (*p->u.relation.name != '@') {
338 " Only attributes (@) are supported in xelm xpath predicates");
339 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
342 attname = p->u.relation.name + 1;
344 /* looking for the attribute with a specified name */
345 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
346 yaz_log(LOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
348 if (!strcmp(attr->name, attname)) {
349 if (p->u.relation.op[0]) {
350 if (*p->u.relation.op != '=') {
352 "Only '=' relation is supported (%s)",p->u.relation.op);
353 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
356 yaz_log(LOG_DEBUG," - value %s <-> %s",
357 p->u.relation.value, attr->value );
358 if (!strcmp(attr->value, p->u.relation.value)) {
363 /* attribute exists, no value specified */
368 yaz_log(LOG_DEBUG, "return %d", res);
374 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
375 if (!strcmp(p->u.boolean.op,"and")) {
376 return d1_check_xpath_predicate(n, p->u.boolean.left)
377 && d1_check_xpath_predicate(n, p->u.boolean.right);
379 else if (!strcmp(p->u.boolean.op,"or")) {
380 return (d1_check_xpath_predicate(n, p->u.boolean.left)
381 || d1_check_xpath_predicate(n, p->u.boolean.right));
383 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
393 struct DFA_state *s = dfaar[0]; /* start state */
396 const char *p = text;
399 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
400 if (c >= t->ch[0] && c <= t->ch[1])
404 /* move to next state and return if we get a match */
412 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
413 if (c >= t->ch[0] && c <= t->ch[1])
423 New function, looking for xpath "element" definitions in abs, by
424 tagpath, using a kind of ugly regxp search.The DFA was built while
425 parsing abs, so here we just go trough them and try to match
426 against the given tagpath. The first matching entry is returned.
430 Added support for enhanced xelm. Now [] predicates are considered
431 as well, when selecting indexing rules... (why the hell it's called
438 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
440 data1_absyn *abs = n->root->u.root.absyn;
441 data1_xpelement *xpe = 0;
444 struct xpath_location_step *xp;
446 char *pexpr = xmalloc(strlen(tagpath)+5);
448 sprintf (pexpr, "/%s\n", tagpath);
450 yaz_log(LOG_DEBUG, "Checking tagpath %s", tagpath);
453 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
454 xpe->match_state = -1; /* don't know if it matches yet */
456 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
459 int ok = xpe->match_state;
462 { /* don't know whether there is a match yet */
463 data1_xpelement *xpe1;
466 ok = dfa_match_first(xpe->dfa->states, pexpr);
468 /* mark this and following ones with same regexp */
469 for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
470 xpe1->match_state = ok;
473 assert (ok == 0 || ok == 1);
476 /* we have to check the perdicates up to the root node */
479 /* find the first tag up in the node structure */
480 nn = n; while (nn && nn->which != DATA1N_tag) {
484 /* go from inside out in the node structure, while going
485 backwards trough xpath location steps ... */
486 for (i=xpe->xpath_len - 1; i>0; i--) {
488 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
489 i,xp[i].part,nn->u.tag.tag);
491 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
492 yaz_log(LOG_DEBUG," Predicates didn't match");
497 if (nn->which == DATA1N_tag) {
511 yaz_log(LOG_DEBUG,"Got it");
512 return xpe->termlists;
519 1 start element (tag)
521 3 start attr (and attr-exact)
529 Now, if there is a matching xelm described in abs, for the
530 indexed element or the attribute, then the data is handled according
531 to those definitions...
533 modified by pop, 2002-12-13
536 /* add xpath index for an attribute */
537 static void index_xpath_attr (char *tag_path, char *name, char *value,
538 char *structure, struct recExtractCtrl *p,
541 wrd->attrSet = VAL_IDXPATH;
544 wrd->string = tag_path;
545 wrd->length = strlen(tag_path);
552 wrd->length = strlen(value);
558 wrd->string = tag_path;
559 wrd->length = strlen(tag_path);
565 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
566 int level, RecWord *wrd, int use,
567 struct RecWord_list *wl)
570 char tag_path_full[1024];
573 int termlist_only = 1;
577 yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
578 if ((!n->root->u.root.absyn) ||
579 (n->root->u.root.absyn->enable_xpath_indexing)) {
586 wrd->string = n->u.data.data;
587 wrd->length = n->u.data.len;
591 /* we have to fetch the whole path to the data tag */
592 for (nn = n; nn; nn = nn->parent) {
593 if (nn->which == DATA1N_tag) {
594 size_t tlen = strlen(nn->u.tag.tag);
595 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
596 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
598 tag_path_full[flen++] = '/';
600 else if (nn->which == DATA1N_root) break;
603 tag_path_full[flen] = 0;
605 /* If we have a matching termlist... */
606 if (n->root->u.root.absyn &&
607 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
609 for (; tl; tl = tl->next)
611 /* need to copy recword because it may be changed */
613 wrd->reg_type = *tl->structure;
614 /* this is the ! case, so structure is for the xpath index */
615 memcpy (&wrd_tl, wrd, sizeof(*wrd));
617 sp_parse(n, &wrd_tl, tl->source);
619 wrd_tl.attrSet = VAL_IDXPATH;
620 wrd_tl.attrUse = use;
621 if (p->flagShowRecords)
624 printf("%*sXPath index", (level + 1) * 4, "");
625 printf (" XData:\"");
626 for (i = 0; i<wrd_tl.length && i < 40; i++)
627 fputc (wrd_tl.string[i], stdout);
629 if (wrd_tl.length > 40)
631 fputc ('\n', stdout);
634 (*p->tokenAdd)(&wrd_tl);
638 /* this is just the old fashioned attribute based index */
639 wrd_tl.attrSet = (int) (tl->att->parent->reference);
640 wrd_tl.attrUse = tl->att->locals->local;
641 if (p->flagShowRecords)
644 printf("%*sIdx: [%s]", (level + 1) * 4, "",
646 printf("%s:%s [%d] %s",
647 tl->att->parent->name,
648 tl->att->name, tl->att->value,
650 printf (" XData:\"");
651 for (i = 0; i<wrd_tl.length && i < 40; i++)
652 fputc (wrd_tl.string[i], stdout);
654 if (wrd_tl.length > 40)
656 fputc ('\n', stdout);
659 (*p->tokenAdd)(&wrd_tl);
663 /* xpath indexing is done, if there was no termlist given,
664 or no ! in the termlist, and default indexing is enabled... */
665 if (!p->flagShowRecords && !xpdone && !termlist_only)
667 wrd->attrSet = VAL_IDXPATH;
677 for (nn = n; nn; nn = nn->parent)
679 if (nn->which == DATA1N_tag)
681 size_t tlen = strlen(nn->u.tag.tag);
682 if (tlen + flen > (sizeof(tag_path_full)-2))
684 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
686 tag_path_full[flen++] = '/';
688 else if (nn->which == DATA1N_root)
694 wrd->string = tag_path_full;
696 wrd->attrSet = VAL_IDXPATH;
698 if (p->flagShowRecords)
700 printf("%*s tag=", (level + 1) * 4, "");
701 for (i = 0; i<wrd->length && i < 40; i++)
702 fputc (wrd->string[i], stdout);
713 tag_path_full[flen] = 0;
715 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
716 directive, or default xpath indexing is enabled */
717 if (!(do_xpindex = 1 - termlist_only)) {
718 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
719 for (; tl; tl = tl->next)
727 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
730 if (use == 1) /* only for the starting tag... */
732 #define MAX_ATTR_COUNT 50
733 data1_termlist *tll[MAX_ATTR_COUNT];
737 /* get termlists for attributes, and find out, if we have to do xpath indexing */
738 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
743 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
745 int do_xpindex = 1 - termlist_only;
747 char attr_tag_path_full[1024];
750 /* this could be cached as well */
751 sprintf (attr_tag_path_full, "@%s/%.*s",
752 xp->name, int_len, tag_path_full);
754 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
756 /* if there is a ! in the xelm termlist, or default indexing is on,
757 proceed with xpath idx */
760 for (; tl; tl = tl->next)
769 /* attribute (no value) */
772 wrd->string = xp->name;
773 wrd->length = strlen(xp->name);
779 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
781 /* attribute value exact */
782 strcpy (comb, xp->name);
784 strcat (comb, xp->value);
789 wrd->length = strlen(comb);
792 if (RecWord_list_lookadd(wl, wrd))
800 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
802 char attr_tag_path_full[1024];
806 sprintf (attr_tag_path_full, "@%s/%.*s",
807 xp->name, int_len, tag_path_full);
811 /* If there is a termlist given (=xelm directive) */
812 for (; tl; tl = tl->next)
815 /* add xpath index for the attribute */
816 index_xpath_attr (attr_tag_path_full, xp->name,
817 xp->value, tl->structure,
823 /* index attribute value (only path/@attr) */
827 (tl->att->parent->reference);
828 wrd->attrUse = tl->att->locals->local;
829 wrd->reg_type = *tl->structure;
830 wrd->string = xp->value;
831 wrd->length = strlen(xp->value);
832 if (RecWord_list_lookadd(wl, wrd))
838 /* if there was no termlist for the given path,
839 or the termlist didn't have a ! element, index
840 the attribute as "w" */
841 if ((!xpdone) && (!termlist_only))
843 index_xpath_attr (attr_tag_path_full, xp->name,
844 xp->value, "w", p, wrd);
853 static void index_termlist (data1_node *par, data1_node *n,
854 struct recExtractCtrl *p, int level, RecWord *wrd)
856 data1_termlist *tlist = 0;
857 data1_datatype dtype = DATA1K_string;
860 * cycle up towards the root until we find a tag with an att..
861 * this has the effect of indexing locally defined tags with
862 * the attribute of their ancestor in the record.
865 while (!par->u.tag.element)
866 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
868 if (!par || !(tlist = par->u.tag.element->termlists))
870 if (par->u.tag.element->tag)
871 dtype = par->u.tag.element->tag->kind;
873 for (; tlist; tlist = tlist->next)
875 /* consider source */
877 assert(tlist->source);
878 sp_parse(n, wrd, tlist->source);
882 if (p->flagShowRecords)
885 printf("%*sIdx: [%s]", (level + 1) * 4, "",
887 printf("%s:%s [%d] %s",
888 tlist->att->parent->name,
889 tlist->att->name, tlist->att->value,
891 printf (" XData:\"");
892 for (i = 0; i<wrd->length && i < 40; i++)
893 fputc (wrd->string[i], stdout);
895 if (wrd->length > 40)
897 fputc ('\n', stdout);
901 wrd->reg_type = *tlist->structure;
902 wrd->attrSet = (int) (tlist->att->parent->reference);
903 wrd->attrUse = tlist->att->locals->local;
910 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
911 RecWord *wrd, struct RecWord_list *wl)
913 for (; n; n = n->next)
915 if (p->flagShowRecords) /* display element description to user */
917 if (n->which == DATA1N_root)
919 printf("%*s", level * 4, "");
920 printf("Record type: '%s'\n", n->u.root.type);
922 else if (n->which == DATA1N_tag)
926 printf("%*s", level * 4, "");
927 if (!(e = n->u.tag.element))
928 printf("Local tag: '%s'\n", n->u.tag.tag);
931 printf("Elm: '%s' ", e->name);
934 data1_tag *t = e->tag;
936 printf("TagNam: '%s' ", t->names->name);
939 printf("%s[%d],", t->tagset->name, t->tagset->type);
942 if (t->which == DATA1T_numeric)
943 printf("%d)", t->value.numeric);
945 printf("'%s')", t->value.string);
952 if (n->which == DATA1N_tag)
954 index_termlist (n, n, p, level, wrd);
955 /* index start tag */
956 if (n->root->u.root.absyn)
957 index_xpath (n, p, level, wrd, 1, wl);
961 if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
965 if (n->which == DATA1N_data)
967 data1_node *par = get_parent_tag(p->dh, n);
969 if (p->flagShowRecords)
971 printf("%*s", level * 4, "");
973 if (n->u.data.len > 256)
974 printf("'%.170s ... %.70s'\n", n->u.data.data,
975 n->u.data.data + n->u.data.len-70);
976 else if (n->u.data.len > 0)
977 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
983 index_termlist (par, n, p, level, wrd);
985 index_xpath (n, p, level, wrd, 1016, wl);
988 if (n->which == DATA1N_tag)
991 index_xpath (n, p, level, wrd, 2, wl);
994 if (p->flagShowRecords && n->which == DATA1N_root)
996 printf("%*s-------------\n\n", level * 4, "");
1002 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1005 int oidtmp[OID_SIZE];
1008 struct RecWord_list *wl = 0;
1010 oe.proto = PROTO_Z3950;
1011 oe.oclass = CLASS_SCHEMA;
1012 if (n->u.root.absyn)
1014 oe.value = n->u.root.absyn->reference;
1016 if ((oid_ent_to_oid (&oe, oidtmp)))
1017 (*p->schemaAdd)(p, oidtmp);
1019 (*p->init)(p, &wrd);
1021 wl = RecWord_list_create("grs_extract_tree");
1022 r = dumpkeys(n, p, 0, &wrd, wl);
1023 RecWord_list_destroy(wl);
1027 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1031 struct grs_read_info gri;
1033 int oidtmp[OID_SIZE];
1035 struct RecWord_list *wl = 0;
1038 gri.readf = p->readf;
1039 gri.seekf = p->seekf;
1040 gri.tellf = p->tellf;
1043 gri.offset = p->offset;
1047 if (read_grs_type (h, &gri, p->subType, &n))
1048 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1050 return RECCTRL_EXTRACT_EOF;
1051 oe.proto = PROTO_Z3950;
1052 oe.oclass = CLASS_SCHEMA;
1054 if (!n->u.root.absyn)
1055 return RECCTRL_EXTRACT_ERROR;
1057 if (n->u.root.absyn)
1059 oe.value = n->u.root.absyn->reference;
1060 if ((oid_ent_to_oid (&oe, oidtmp)))
1061 (*p->schemaAdd)(p, oidtmp);
1063 data1_concat_text(p->dh, mem, n);
1065 /* ensure our data1 tree is UTF-8 */
1066 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1069 data1_pr_tree (p->dh, n, stdout);
1072 wl = RecWord_list_create("grs.sgml");
1074 (*p->init)(p, &wrd);
1075 if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1076 ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1078 ret_val = RECCTRL_EXTRACT_OK;
1079 data1_free_tree(p->dh, n);
1080 RecWord_list_destroy(wl);
1085 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1088 NMEM mem = nmem_create ();
1089 struct grs_handlers *h = (struct grs_handlers *) clientData;
1091 ret = grs_extract_sub(h, p, mem);
1097 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1099 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1101 data1_esetname *eset;
1102 Z_Espec1 *espec = 0;
1107 case Z_RecordComp_simple:
1108 if (c->u.simple->which != Z_ElementSetNames_generic)
1109 return 26; /* only generic form supported. Fix this later */
1110 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1111 c->u.simple->u.generic)))
1113 yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1114 return 25; /* invalid esetname */
1116 yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1117 c->u.simple->u.generic);
1120 case Z_RecordComp_complex:
1121 if (c->u.complex->generic)
1123 /* insert check for schema */
1124 if ((p = c->u.complex->generic->elementSpec))
1128 case Z_ElementSpec_elementSetName:
1130 data1_getesetbyname(dh, n->u.root.absyn,
1131 p->u.elementSetName)))
1133 yaz_log(LOG_LOG, "Unknown esetname '%s'",
1134 p->u.elementSetName);
1135 return 25; /* invalid esetname */
1137 yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1138 p->u.elementSetName);
1141 case Z_ElementSpec_externalSpec:
1142 if (p->u.externalSpec->which == Z_External_espec1)
1144 yaz_log(LOG_DEBUG, "Got Espec-1");
1145 espec = p->u.externalSpec-> u.espec1;
1149 yaz_log(LOG_LOG, "Unknown external espec.");
1150 return 25; /* bad. what is proper diagnostic? */
1157 return 26; /* fix */
1161 yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1162 return data1_doespec1(dh, n, espec);
1166 yaz_log(LOG_DEBUG, "Element: all match");
1171 /* Add Zebra info in separate namespace ...
1174 <metadata xmlns="http://www.indexdata.dk/zebra/">
1176 <localnumber>447</localnumber>
1177 <filename>records/genera.xml</filename>
1182 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1185 const char *idzebra_ns[3];
1186 const char *i2 = "\n ";
1187 const char *i4 = "\n ";
1190 idzebra_ns[0] = "xmlns";
1191 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1194 data1_mk_text (p->dh, mem, i2, top);
1196 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1198 data1_mk_text (p->dh, mem, "\n", top);
1200 data1_mk_text (p->dh, mem, i4, n);
1202 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1206 data1_mk_text (p->dh, mem, i4, n);
1207 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1209 data1_mk_text (p->dh, mem, i4, n);
1210 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1213 data1_mk_text (p->dh, mem, i4, n);
1214 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1216 data1_mk_text (p->dh, mem, i2, n);
1219 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1221 data1_node *node = 0, *onode = 0, *top;
1224 int res, selected = 0;
1226 struct grs_read_info gri;
1227 const char *tagname;
1228 struct grs_handlers *h = (struct grs_handlers *) clientData;
1229 int requested_schema = VAL_NONE;
1230 data1_marctab *marctab;
1233 mem = nmem_create();
1234 gri.readf = p->readf;
1235 gri.seekf = p->seekf;
1236 gri.tellf = p->tellf;
1243 yaz_log(LOG_DEBUG, "grs_retrieve");
1244 if (read_grs_type (h, &gri, p->subType, &node))
1256 data1_concat_text(p->dh, mem, node);
1258 /* ensure our data1 tree is UTF-8 */
1259 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1262 data1_pr_tree (p->dh, node, stdout);
1264 top = data1_get_root_tag (p->dh, node);
1266 yaz_log(LOG_DEBUG, "grs_retrieve: size");
1267 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1269 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1271 dnew->u.data.what = DATA1I_text;
1272 dnew->u.data.data = dnew->lbuf;
1273 sprintf(dnew->u.data.data, "%d", p->recordSize);
1274 dnew->u.data.len = strlen(dnew->u.data.data);
1277 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1279 if (tagname && p->score >= 0 &&
1280 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1282 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1283 dnew->u.data.what = DATA1I_num;
1284 dnew->u.data.data = dnew->lbuf;
1285 sprintf(dnew->u.data.data, "%d", p->score);
1286 dnew->u.data.len = strlen(dnew->u.data.data);
1289 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1290 "localControlNumber");
1291 if (tagname && p->localno > 0 &&
1292 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1294 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1295 dnew->u.data.what = DATA1I_text;
1296 dnew->u.data.data = dnew->lbuf;
1298 sprintf(dnew->u.data.data, "%d", p->localno);
1299 dnew->u.data.len = strlen(dnew->u.data.data);
1302 if (p->input_format == VAL_TEXT_XML)
1303 zebra_xml_metadata (p, top, mem);
1306 data1_pr_tree (p->dh, node, stdout);
1308 #if YAZ_VERSIONL >= 0x010903L
1309 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1310 p->comp->u.complex->generic &&
1311 p->comp->u.complex->generic->which == Z_Schema_oid &&
1312 p->comp->u.complex->generic->schema.oid)
1314 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1316 requested_schema = oe->value;
1319 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1320 p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1322 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1324 requested_schema = oe->value;
1328 /* If schema has been specified, map if possible, then check that
1329 * we got the right one
1331 if (requested_schema != VAL_NONE)
1333 yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1334 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1336 if (map->target_absyn_ref == requested_schema)
1339 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1348 if (node->u.root.absyn &&
1349 requested_schema != node->u.root.absyn->reference)
1351 p->diagnostic = 238;
1357 * Does the requested format match a known syntax-mapping? (this reflects
1358 * the overlap of schema and formatting which is inherent in the MARC
1361 yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1362 if (node->u.root.absyn)
1363 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1365 if (map->target_absyn_ref == p->input_format)
1368 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1377 yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1378 if (node->u.root.absyn &&
1379 node->u.root.absyn->reference != VAL_NONE &&
1380 p->input_format == VAL_GRS1)
1384 int oidtmp[OID_SIZE];
1386 oe.proto = PROTO_Z3950;
1387 oe.oclass = CLASS_SCHEMA;
1388 oe.value = node->u.root.absyn->reference;
1390 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1393 data1_handle dh = p->dh;
1397 for (ii = oid; *ii >= 0; ii++)
1401 sprintf(p, "%d", *ii);
1404 if ((dnew = data1_mk_tag_data_wd(dh, top,
1405 "schemaIdentifier", mem)))
1407 dnew->u.data.what = DATA1I_oid;
1408 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1409 memcpy(dnew->u.data.data, tmp, p - tmp);
1410 dnew->u.data.len = p - tmp;
1415 yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1416 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1418 p->diagnostic = res;
1420 data1_free_tree(p->dh, onode);
1421 data1_free_tree(p->dh, node);
1425 else if (p->comp && !res)
1429 data1_pr_tree (p->dh, node, stdout);
1431 yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1432 switch (p->output_format = (p->input_format != VAL_NONE ?
1433 p->input_format : VAL_SUTRS))
1438 data1_pr_tree (p->dh, node, stdout);
1442 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1444 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1446 p->diagnostic = 238;
1449 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1450 memcpy (new_buf, p->rec_buf, p->rec_len);
1451 p->rec_buf = new_buf;
1456 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1458 p->diagnostic = 238; /* not available in requested syntax */
1460 p->rec_len = (size_t) (-1);
1463 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1465 p->diagnostic = 238;
1467 p->rec_len = (size_t) (-1);
1470 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1472 p->diagnostic = 238;
1474 p->rec_len = (size_t) (-1);
1478 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1479 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1481 p->diagnostic = 238;
1484 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1485 memcpy (new_buf, p->rec_buf, p->rec_len);
1486 p->rec_buf = new_buf;
1490 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1492 p->diagnostic = 238;
1495 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1496 memcpy (new_buf, p->rec_buf, p->rec_len);
1497 p->rec_buf = new_buf;
1501 if (!node->u.root.absyn)
1503 p->diagnostic = 238;
1506 for (marctab = node->u.root.absyn->marc; marctab;
1507 marctab = marctab->next)
1508 if (marctab->reference == p->input_format)
1512 p->diagnostic = 238;
1516 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1517 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1518 selected, &p->rec_len)))
1519 p->diagnostic = 238;
1522 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1523 memcpy (new_buf, p->rec_buf, p->rec_len);
1524 p->rec_buf = new_buf;
1528 data1_free_tree(p->dh, node);
1530 data1_free_tree(p->dh, onode);
1535 static struct recType grs_type =
1544 RecType recTypeGrs = &grs_type;