X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=retrieval%2Fd1_read.c;h=0af749251df77c3602e5943a91ce0708c260e2ee;hb=0804c970f7c9926167055fd77cfc8a72c7271fa6;hp=a2753415569c2eacc237b9493bc07bba2c16f907;hpb=43801085d371c69455730182f72c4e53d8c8218f;p=yaz-moved-to-github.git
diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c
index a275341..0af7492 100644
--- a/retrieval/d1_read.c
+++ b/retrieval/d1_read.c
@@ -1,10 +1,25 @@
/*
- * Copyright (c) 1995-1999, Index Data.
+ * Copyright (c) 1995-2001, Index Data.
* See the file LICENSE for details.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: d1_read.c,v $
- * Revision 1.33 2000-11-29 14:22:47 adam
+ * Revision 1.38 2001-03-27 23:06:21 adam
+ * Quotes and slashes may occur within attributes.
+ *
+ * Revision 1.37 2001/02/28 09:00:06 adam
+ * Fixed problem with stack overflow for very nested records.
+ *
+ * Revision 1.36 2001/02/21 13:46:53 adam
+ * C++ fixes.
+ *
+ * Revision 1.35 2000/12/05 14:44:25 adam
+ * Readers skips sections.
+ *
+ * Revision 1.34 2000/12/05 10:06:23 adam
+ * Added support for null-data rules like .
+ *
+ * Revision 1.33 2000/11/29 14:22:47 adam
* Implemented XML/SGML attributes for data1 so that d1_read reads them
* and d1_write generates proper attributes for XML/SGML records. Added
* register locking for threaded version.
@@ -342,7 +357,7 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m,
c = (*get_byte)(fh);
if (!c || c == '>' || c == '/')
break;
- *pp = p = nmem_malloc (m, sizeof(*p));
+ *pp = p = (data1_xattr *) nmem_malloc (m, sizeof(*p));
p->next = 0;
pp = &p->next;
p->value = 0;
@@ -355,25 +370,36 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m,
}
wrbuf_putc (wrbuf, '\0');
len = wrbuf_len(wrbuf);
- p->name = nmem_malloc (m, len);
+ p->name = (char*) nmem_malloc (m, len);
strcpy (p->name, wrbuf_buf(wrbuf));
if (c == '=')
{
c = (*get_byte)(fh);
if (c == '"')
- c = (*get_byte)(fh);
- wrbuf_rewind(wrbuf);
- while (c && c != '"' && c != '>' && c != '/')
{
- wrbuf_putc (wrbuf, c);
- c = (*get_byte)(fh);
+ c = (*get_byte)(fh);
+ wrbuf_rewind(wrbuf);
+ while (c && c != '"')
+ {
+ wrbuf_putc (wrbuf, c);
+ c = (*get_byte)(fh);
+ }
+ if (c)
+ c = (*get_byte)(fh);
}
+ else
+ {
+ wrbuf_rewind(wrbuf);
+ while (c && c != '>' && c != '/')
+ {
+ wrbuf_putc (wrbuf, c);
+ c = (*get_byte)(fh);
+ }
+ }
wrbuf_putc (wrbuf, '\0');
len = wrbuf_len(wrbuf);
- p->value = nmem_malloc (m, len);
+ p->value = (char*) nmem_malloc (m, len);
strcpy (p->value, wrbuf_buf(wrbuf));
- if (c == '"')
- c = (*get_byte)(fh);
}
}
*ch = c;
@@ -416,10 +442,30 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m,
#endif
char tag[64];
char args[256];
- size_t i;
- for (i = 0; (c=(*get_byte)(fh)) && c != '>' && !d1_isspace(c);)
+ int null_tag = 0;
+ int end_tag = 0;
+ size_t i = 0;
+
+ c = (*get_byte)(fh);
+ if (c == '/')
+ {
+ end_tag = 1;
+ c = (*get_byte)(fh);
+ }
+ else if (c == '!') /* tags/comments that we don't deal with yet */
+ {
+ while (c && c != '>')
+ c = (*get_byte)(fh);
+ if (c)
+ c = (*get_byte)(fh);
+ continue;
+ }
+ while (c && c != '>' && c != '/' && !d1_isspace(c))
+ {
if (i < (sizeof(tag)-1))
tag[i++] = c;
+ c = (*get_byte)(fh);
+ }
tag[i] = '\0';
#if DATA1_USING_XATTR
xattr = data1_read_xattr (dh, m, get_byte, fh, wrbuf, &c);
@@ -427,11 +473,16 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m,
#else
while (d1_isspace(c))
c = (*get_byte)(fh);
- for (i = 0; c && c != '>'; c = (*get_byte)(fh))
+ for (i = 0; c && c != '>' && c != '/'; c = (*get_byte)(fh))
if (i < (sizeof(args)-1))
args[i++] = c;
args[i] = '\0';
#endif
+ if (c == '/')
+ { /* or */
+ null_tag = 1;
+ c = (*get_byte)(fh);
+ }
if (c != '>')
{
yaz_log(LOG_WARN, "d1: %d: Malformed tag", line);
@@ -441,9 +492,9 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m,
c = (*get_byte)(fh);
/* End tag? */
- if (*tag == '/')
+ if (end_tag)
{
- if (tag[1] == '\0')
+ if (*tag == '\0')
--level; /* > */
else
{ /* */
@@ -452,9 +503,9 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m,
{
parent = d1_stack[--i];
if ((parent->which == DATA1N_root &&
- !strcmp(tag+1, parent->u.root.type)) ||
+ !strcmp(tag, parent->u.root.type)) ||
(parent->which == DATA1N_tag &&
- !strcmp(tag+1, parent->u.tag.tag)))
+ !strcmp(tag, parent->u.tag.tag)))
{
level = i;
break;
@@ -561,7 +612,9 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m,
else if (parent)
parent->child = res;
d1_stack[level] = res;
- d1_stack[++level] = 0;
+ d1_stack[level+1] = 0;
+ if (level < 250 && !null_tag)
+ ++level;
}
else /* != '<'... this is a body of text */
{