/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2001, Index Data
* All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
*
* $Log: recgrs.c,v $
- * Revision 1.30 1999-07-06 12:26:41 adam
+ * Revision 1.47 2002-05-03 13:50:25 adam
+ * data1 cleanup
+ *
+ * Revision 1.46 2002/04/13 18:16:43 adam
+ * More XPATH work; common sequence numbers for extract keys
+ *
+ * Revision 1.45 2002/04/12 14:40:42 adam
+ * Work on XPATH
+ *
+ * Revision 1.44 2002/04/11 20:09:47 adam
+ * work on string tag indexing
+ *
+ * Revision 1.43 2002/03/21 23:06:36 adam
+ * Source 'tag' in abs-file
+ *
+ * Revision 1.42 2002/02/20 17:30:01 adam
+ * Work on new API. Locking system re-implemented
+ *
+ * Revision 1.41 2001/05/22 21:01:47 adam
+ * Removed print of data1 tree on stdout so that inetd works again.
+ *
+ * Revision 1.40 2001/03/29 21:31:31 adam
+ * Fixed "record begin" for Tcl filter.
+ *
+ * Revision 1.39 2000/12/05 19:09:15 adam
+ * Fixed problem where indexer could crash if abstract syntax was undefined.
+ *
+ * Revision 1.38 2000/12/05 14:44:58 adam
+ * Fixed minor bug that could cause zmbol to break it data were emitted
+ * with not parent tags.
+ *
+ * Revision 1.37 2000/12/05 12:22:53 adam
+ * Termlist source implemented (so that we can index values of XML/SGML
+ * attributes).
+ *
+ * Revision 1.36 2000/12/05 10:01:44 adam
+ * Fixed bug regarding user-defined attribute sets.
+ *
+ * Revision 1.35 2000/11/29 15:21:31 adam
+ * Fixed problem with passwd db.
+ *
+ * Revision 1.34 2000/02/25 13:24:49 adam
+ * Fixed bug regarding pointer conversion that showed up on OSF V5.
+ *
+ * Revision 1.33 1999/11/30 13:48:04 adam
+ * Improved installation. Updated for inclusion of YAZ header files.
+ *
+ * Revision 1.32 1999/09/07 07:19:21 adam
+ * Work on character mapping. Implemented replace rules.
+ *
+ * Revision 1.31 1999/07/14 10:56:43 adam
+ * Fixed potential memory leak.
+ *
+ * Revision 1.30 1999/07/06 12:26:41 adam
* Retrieval handler obeys schema and handles XML transfer syntax.
*
* Revision 1.29 1999/05/26 07:49:14 adam
#include <unistd.h>
#endif
-#include <log.h>
-#include <oid.h>
+#include <yaz/log.h>
+#include <yaz/oid.h>
#include <recctrl.h>
#include "grsread.h"
free (h);
}
-static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
+static void index_xpath (data1_node *n, struct recExtractCtrl *p,
+ int level, RecWord *wrd, int use)
+{
+ int i;
+ char tag_path_full[1024];
+ size_t flen = 0;
+ data1_node *nn;
+
+ switch (n->which)
+ {
+ case DATA1N_data:
+ wrd->reg_type = 'w';
+ wrd->string = n->u.data.data;
+ wrd->length = n->u.data.len;
+ wrd->attrSet = VAL_IDXPATH,
+ wrd->attrUse = use;
+ if (p->flagShowRecords)
+ {
+ printf("%*s data=", (level + 1) * 4, "");
+ for (i = 0; i<wrd->length && i < 8; i++)
+ fputc (wrd->string[i], stdout);
+ printf("\n");
+ }
+ else
+ {
+ (*p->tokenAdd)(wrd);
+ }
+ break;
+ case DATA1N_tag:
+ for (nn = n; nn; nn = nn->parent)
+ {
+ if (n->which == DATA1N_tag)
+ {
+ size_t tlen = strlen(nn->u.tag.tag);
+ if (tlen + flen > (sizeof(tag_path_full)-2))
+ return;
+ memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
+ flen += tlen;
+ tag_path_full[flen++] = '/';
+ }
+ else if (n->which == DATA1N_root)
+ {
+ size_t tlen = strlen(nn->u.root.type);
+ if (tlen + flen > (sizeof(tag_path_full)-2))
+ return;
+ memcpy (tag_path_full + flen, nn->u.root.type, tlen);
+ flen += tlen;
+ tag_path_full[flen++] = '/';
+ break;
+ }
+ }
+ wrd->reg_type = '0';
+ wrd->string = tag_path_full;
+ wrd->length = flen;
+ wrd->attrSet = VAL_IDXPATH,
+ wrd->attrUse = use;
+ if (p->flagShowRecords)
+ {
+ printf("%*s tag=", (level + 1) * 4, "");
+ for (i = 0; i<wrd->length && i < 40; i++)
+ fputc (wrd->string[i], stdout);
+ if (i == 40)
+ printf (" ..");
+ printf("\n");
+ }
+ else
+ {
+ (*p->tokenAdd)(wrd);
+ }
+ break;
+ }
+}
+
+static void index_termlist (data1_node *par, data1_node *n,
+ struct recExtractCtrl *p, int level, RecWord *wrd)
+{
+ data1_termlist *tlist = 0;
+ data1_datatype dtype = DATA1K_string;
+ /*
+ * cycle up towards the root until we find a tag with an att..
+ * this has the effect of indexing locally defined tags with
+ * the attribute of their ancestor in the record.
+ */
+
+ while (!par->u.tag.element)
+ if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
+ break;
+ if (!par || !(tlist = par->u.tag.element->termlists))
+ return;
+ if (par->u.tag.element->tag)
+ dtype = par->u.tag.element->tag->kind;
+
+ for (; tlist; tlist = tlist->next)
+ {
+ char xattr[512];
+ /* consider source */
+ wrd->string = 0;
+
+ if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
+ {
+ wrd->string = n->u.data.data;
+ wrd->length = n->u.data.len;
+ }
+ else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
+ {
+ wrd->string = n->u.tag.tag;
+ wrd->length = strlen(n->u.tag.tag);
+ }
+ else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
+ n->which == DATA1N_tag)
+ {
+ data1_xattr *p = n->u.tag.attributes;
+ while (p && strcmp (p->name, xattr))
+ p = p->next;
+ if (p)
+ {
+ wrd->string = p->value;
+ wrd->length = strlen(p->value);
+ }
+ }
+ if (wrd->string)
+ {
+ if (p->flagShowRecords)
+ {
+ int i;
+ printf("%*sIdx: [%s]", (level + 1) * 4, "",
+ tlist->structure);
+ printf("%s:%s [%d] %s",
+ tlist->att->parent->name,
+ tlist->att->name, tlist->att->value,
+ tlist->source);
+ printf (" data=\"");
+ for (i = 0; i<wrd->length && i < 8; i++)
+ fputc (wrd->string[i], stdout);
+ fputc ('"', stdout);
+ if (wrd->length > 8)
+ printf (" ...");
+ fputc ('\n', stdout);
+ }
+ else
+ {
+ wrd->reg_type = *tlist->structure;
+ wrd->attrSet = (int) (tlist->att->parent->reference);
+ wrd->attrUse = tlist->att->locals->local;
+ (*p->tokenAdd)(wrd);
+ }
+ }
+ }
+}
+
+static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
+ RecWord *wrd)
{
- RecWord wrd;
- (*p->init)(p, &wrd); /* set defaults */
for (; n; n = n->next)
{
if (p->flagShowRecords) /* display element description to user */
if (n->which == DATA1N_root)
{
printf("%*s", level * 4, "");
- printf("Record type: '%s'\n", n->u.root.absyn->name);
+ printf("Record type: '%s'\n", n->u.root.type);
}
else if (n->which == DATA1N_tag)
{
}
}
+ if (n->which == DATA1N_tag)
+ {
+ index_termlist (n, n, p, level, wrd);
+ /* index start tag */
+ if (!n->root->u.root.absyn)
+ index_xpath (n, p, level, wrd, 1);
+ }
+
if (n->child)
- if (dumpkeys(n->child, p, level + 1) < 0)
+ if (dumpkeys(n->child, p, level + 1, wrd) < 0)
return -1;
+
if (n->which == DATA1N_data)
{
data1_node *par = get_parent_tag(p->dh, n);
- data1_termlist *tlist = 0;
- data1_datatype dtype = DATA1K_string;
if (p->flagShowRecords)
{
printf("NULL\n");
}
- assert(par);
+ if (par)
+ index_termlist (par, n, p, level, wrd);
+ if (!n->root->u.root.absyn)
+ index_xpath (n, p, level, wrd, 1016);
- /*
- * cycle up towards the root until we find a tag with an att..
- * this has the effect of indexing locally defined tags with
- * the attribute of their ancestor in the record.
- */
+ }
- while (!par->u.tag.element)
- if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
- break;
- if (!par || !(tlist = par->u.tag.element->termlists))
- continue;
- if (par->u.tag.element->tag)
- dtype = par->u.tag.element->tag->kind;
- for (; tlist; tlist = tlist->next)
- {
- if (p->flagShowRecords)
- {
- printf("%*sIdx: [%s]", (level + 1) * 4, "",
- tlist->structure);
- printf("%s:%s [%d]\n",
- tlist->att->parent->name,
- tlist->att->name, tlist->att->value);
- }
- else
- {
- wrd.reg_type = *tlist->structure;
- wrd.string = n->u.data.data;
- wrd.length = n->u.data.len;
- wrd.attrSet = (int) (tlist->att->parent->reference);
- wrd.attrUse = tlist->att->locals->local;
- (*p->addWord)(&wrd);
- }
- }
+ if (n->which == DATA1N_tag)
+ {
+ /* index end tag */
+ if (!n->root->u.root.absyn)
+ index_xpath (n, p, level, wrd, 2);
}
+
+
if (p->flagShowRecords && n->which == DATA1N_root)
{
printf("%*s-------------\n\n", level * 4, "");
{
oident oe;
int oidtmp[OID_SIZE];
+ RecWord wrd;
oe.proto = PROTO_Z3950;
oe.oclass = CLASS_SCHEMA;
- oe.value = n->u.root.absyn->reference;
-
- if ((oid_ent_to_oid (&oe, oidtmp)))
- (*p->addSchema)(p, oidtmp);
-
- return dumpkeys(n, p, 0);
+ if (n->u.root.absyn)
+ {
+ oe.value = n->u.root.absyn->reference;
+
+ if ((oid_ent_to_oid (&oe, oidtmp)))
+ (*p->schemaAdd)(p, oidtmp);
+ }
+ (*p->init)(p, &wrd);
+ return dumpkeys(n, p, 0, &wrd);
}
-static int grs_extract(void *clientData, struct recExtractCtrl *p)
+static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
+ NMEM mem)
{
data1_node *n;
- NMEM mem;
struct grs_read_info gri;
oident oe;
int oidtmp[OID_SIZE];
- struct grs_handlers *h = (struct grs_handlers *) clientData;
+ RecWord wrd;
- mem = nmem_create ();
gri.readf = p->readf;
gri.seekf = p->seekf;
gri.tellf = p->tellf;
return RECCTRL_EXTRACT_ERROR;
if (!n)
return RECCTRL_EXTRACT_EOF;
-
oe.proto = PROTO_Z3950;
oe.oclass = CLASS_SCHEMA;
- oe.value = n->u.root.absyn->reference;
- if ((oid_ent_to_oid (&oe, oidtmp)))
- (*p->addSchema)(p, oidtmp);
-
- if (dumpkeys(n, p, 0) < 0)
+#if 0
+ if (!n->u.root.absyn)
+ return RECCTRL_EXTRACT_ERROR;
+#endif
+ if (n->u.root.absyn)
+ {
+ oe.value = n->u.root.absyn->reference;
+ if ((oid_ent_to_oid (&oe, oidtmp)))
+ (*p->schemaAdd)(p, oidtmp);
+ }
+#if 0
+ data1_pr_tree (p->dh, n, stdout);
+#endif
+ (*p->init)(p, &wrd);
+ if (dumpkeys(n, p, 0, &wrd) < 0)
{
data1_free_tree(p->dh, n);
return RECCTRL_EXTRACT_ERROR;
}
data1_free_tree(p->dh, n);
- nmem_destroy(mem);
return RECCTRL_EXTRACT_OK;
}
+static int grs_extract(void *clientData, struct recExtractCtrl *p)
+{
+ int ret;
+ NMEM mem = nmem_create ();
+ struct grs_handlers *h = (struct grs_handlers *) clientData;
+
+ ret = grs_extract_sub(h, p, mem);
+ nmem_destroy(mem);
+ return ret;
+}
+
/*
* Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
*/
c->u.simple->u.generic)))
{
logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
- return 25; /* invalid esetname */
+ return 25; /* invalid esetname */
}
logf(LOG_DEBUG, "Esetname '%s' in simple compspec",
c->u.simple->u.generic);
nmem_destroy (mem);
return 0;
}
+#if 0
+ data1_pr_tree (p->dh, node, stdout);
+#endif
logf (LOG_DEBUG, "grs_retrieve: size");
- if ((dnew = data1_insert_taggeddata(p->dh, node, node,
- "size", mem)))
+ if ((dnew = data1_mk_tag_data_wd(p->dh, node, node,"size", mem)))
{
dnew->u.data.what = DATA1I_text;
dnew->u.data.data = dnew->lbuf;
tagname = res_get_def(p->res, "tagrank", "rank");
if (strcmp(tagname, "0") && p->score >= 0 &&
- (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem)))
+ (dnew = data1_mk_tag_data_wd(p->dh, node, node, tagname, mem)))
{
logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
dnew->u.data.what = DATA1I_num;
tagname = res_get_def(p->res, "tagsysno", "localControlNumber");
if (strcmp(tagname, "0") && p->localno > 0 &&
- (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem)))
+ (dnew = data1_mk_tag_data_wd(p->dh, node, node, tagname, mem)))
{
logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
dnew->u.data.what = DATA1I_text;
dnew->u.data.len = strlen(dnew->u.data.data);
}
+ data1_pr_tree (p->dh, node, stdout);
+
if (p->comp && p->comp->which == Z_RecordComp_complex &&
p->comp->u.complex->generic &&
p->comp->u.complex->generic->schema)
* family)
*/
logf (LOG_DEBUG, "grs_retrieve: syntax mapping");
- for (map = node->u.root.absyn->maptabs; map; map = map->next)
- {
- if (map->target_absyn_ref == p->input_format)
- {
- onode = node;
- if (!(node = data1_map_record(p->dh, onode, map, mem)))
- {
- p->diagnostic = 14;
- nmem_destroy (mem);
- return 0;
- }
- break;
- }
- }
+ if (node->u.root.absyn)
+ for (map = node->u.root.absyn->maptabs; map; map = map->next)
+ {
+ if (map->target_absyn_ref == p->input_format)
+ {
+ onode = node;
+ if (!(node = data1_map_record(p->dh, onode, map, mem)))
+ {
+ p->diagnostic = 14;
+ nmem_destroy (mem);
+ return 0;
+ }
+ break;
+ }
+ }
logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
if (node->u.root.absyn &&
node->u.root.absyn->reference != VAL_NONE &&
}
*(p++) = '\0';
- if ((dnew = data1_insert_taggeddata(dh, node, node,
- "schemaIdentifier", mem)))
+ if ((dnew = data1_mk_tag_data_wd(dh, node, node,
+ "schemaIdentifier", mem)))
{
dnew->u.data.what = DATA1I_oid;
dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
else if (p->comp && !res)
selected = 1;
+#if 0
+ data1_pr_tree (p->dh, node, stdout);
+#endif
logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
switch (p->output_format = (p->input_format != VAL_NONE ?
p->input_format : VAL_SUTRS))
case VAL_TEXT_XML:
if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
- (int*)&p->rec_len)))
+ &p->rec_len)))
p->diagnostic = 238;
else
{
break;
case VAL_SUTRS:
if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
- (int*)&p->rec_len)))
+ &p->rec_len)))
p->diagnostic = 238;
else
{
break;
case VAL_SOIF:
if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
- (int*)&p->rec_len)))
+ &p->rec_len)))
p->diagnostic = 238;
else
{
break;
}
if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
- selected,
- (int*)&p->rec_len)))
+ selected, &p->rec_len)))
p->diagnostic = 238;
else
{