Added extra presence check for tcl.h, because some systems have
[idzebra-moved-to-github.git] / recctrl / marcread.c
index f3214ff..42096f0 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: marcread.c,v 1.21 2003-08-21 10:29:00 adam Exp $
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: marcread.c,v 1.24.2.4 2006-08-14 10:39:16 adam Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
    Index Data Aps
 
 This file is part of the Zebra server.
@@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.
 
 You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra.  If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
 */
 
 #include <stdio.h>
@@ -42,6 +42,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
     int indicator_length;
     int identifier_length;
     int base_address;
+    int end_of_directory;
     int length_data_entry;
     int length_starting;
     int length_implementation;
@@ -55,6 +56,18 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
 
     if ((*p->readf)(p->fh, buf, 5) != 5)
         return NULL;
+    while (*buf < '0' || *buf > '9')
+    {
+       int i;
+
+       yaz_log(LOG_WARN, "MARC: Skipping bad byte %d (0x%02X)",
+               *buf & 0xff, *buf & 0xff);
+       for (i = 0; i<4; i++)
+           buf[i] = buf[i+1];
+
+       if ((*p->readf)(p->fh, buf+4, 1) != 1)
+           return NULL;
+    }
     record_length = atoi_n (buf, 5);
     if (record_length < 25)
     {
@@ -113,16 +126,39 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
        identifier_length = marctab->force_identifier_length;
     else
        identifier_length = atoi_n (buf+11, 1);
-    base_address = atoi_n (buf+12, 4);
+    base_address = atoi_n (buf+12, 5);
 
     length_data_entry = atoi_n (buf+20, 1);
     length_starting = atoi_n (buf+21, 1);
     length_implementation = atoi_n (buf+22, 1);
 
     for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
-        entry_p += 3+length_data_entry+length_starting;
-    base_address = entry_p+1;
-    for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+    {
+        int l = 3 + length_data_entry + length_starting;
+        if (entry_p + l >= record_length)
+        {
+           yaz_log(LOG_WARN, "MARC: Directory offset %d: end of record.",
+                   entry_p);
+           return 0;
+        }
+        /* check for digits in length info */
+        while (--l >= 3)
+            if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
+                break;
+        if (l >= 3)
+        {
+            /* not all digits, so stop directory scan */
+           yaz_log(LOG_LOG, "MARC: Bad directory");
+            break;
+        }
+        entry_p += 3 + length_data_entry + length_starting;
+    }
+    end_of_directory = entry_p;
+    if (base_address != entry_p+1)
+    {
+       yaz_log(LOG_WARN, "MARC: Base address does not follow directory");
+    }
+    for (entry_p = 24; entry_p != end_of_directory; )
     {
         int data_length;
         int data_offset;
@@ -151,6 +187,12 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
         i = data_offset + base_address;
         end_offset = i+data_length-1;
 
+       if (data_length <= 0 || data_offset < 0 || end_offset >= record_length)
+       {
+           yaz_log(LOG_WARN, "MARC: Bad offsets in data. Skipping rest");
+           break;
+       }
+       
         if (memcmp (tag, "00", 2) && indicator_length)
         {
             /* generate indicator node */
@@ -210,37 +252,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
         i0 = i;
         while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
         {
-
-           if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")||
-               !yaz_matchstr(absynName, "RUSMARC")))
-           {
-               int go = 1;
-               data1_node *res =
-                   data1_mk_tag_n (p->dh, p->mem,
-                                    buf+i+1, identifier_length-1, 
-                                    0 /* attr */, parent);
-                i += identifier_length;
-                i0 = i;
-               do {
-                   while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
-                        buf[i] != ISO2709_FS && i < end_offset)
-                   {
-                       i++;
-                   }
-                   if (!memcmp(buf+i+1, "1", 1) && i<end_offset)
-                   {
-                       go = 0;
-                   }
-                   else
-                   {
-                       buf[i] = '$';
-                   }               
-               } while (go && i < end_offset);
-               
-               data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
-               i0 = i;
-           }
-            else if (memcmp (tag, "00", 2) && identifier_length)
+           if (memcmp (tag, "00", 2) && identifier_length)
             {
                data1_node *res;
                if (marc_xml)
@@ -305,6 +317,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
     }
     return res_root;
 }
+
 /*
  * Locate some data under this node. This routine should handle variants
  * prettily.
@@ -317,9 +330,12 @@ static char *get_data(data1_node *n, int *len)
     {
         if (n->which == DATA1N_data)
         {
-            int i;
             *len = n->u.data.len;
-
+           
+           /** Fixme: not delete leader/final whitespaces
+            ** in MARC field/subfield. It fixed in
+            ** data1/d1_marc.c too.
+            
             for (i = 0; i<*len; i++)
                 if (!d1_isspace(n->u.data.data[i]))
                     break;
@@ -328,6 +344,9 @@ static char *get_data(data1_node *n, int *len)
             *len = *len - i;
             if (*len > 0)
                 return n->u.data.data + i;
+           **/
+           if (*len > 0)
+               return n->u.data.data;
         }
         if (n->which == DATA1N_tag)
             n = n->child;
@@ -352,7 +371,9 @@ static data1_node *lookup_subfield(data1_node *node, const char *name)
     }
     return 0;
 }
-static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name)
+
+static inline_subfield *lookup_inline_subfield(inline_subfield *pisf,
+                                              const char *name)
 {
     inline_subfield *p;
     
@@ -363,7 +384,9 @@ static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char
     }
     return 0;
 }
-static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf)
+
+static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf,
+                                           inline_subfield *pisf)
 {
     mc_subfield *p;
     
@@ -377,20 +400,23 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_
            {
                if (strcmp(p->prefix, "_"))
                {
-                   strcat(strcat(buf, " "), p->prefix);
+                   wrbuf_puts(buf, " ");
+                   wrbuf_puts(buf, p->prefix);
                }
                if (p->interval.start == -1)
                {
-                   strcat(buf, found->data);
+                   wrbuf_puts(buf, found->data);
                }
                else
                {
-                   strncat(buf, found->data+p->interval.start,
-                       p->interval.end-p->interval.start+1);
+                   wrbuf_write(buf, found->data+p->interval.start,
+                               p->interval.end-p->interval.start+1);
+                   wrbuf_puts(buf, "");
                }
                if (strcmp(p->suffix, "_"))
                {
-                   strcat(strcat(buf, p->suffix), " ");
+                   wrbuf_puts(buf, p->suffix);
+                   wrbuf_puts(buf, " ");
                }
 #if MARCOMP_DEBUG
                logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
@@ -424,30 +450,54 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_
            }
            if (found)
            {
-               strcat(buf, " (");
+               wrbuf_puts(buf, " (");
                pisf = cat_inline_subfield(p->u.child, buf, pisf);
-               strcat(buf, ") ");
+               wrbuf_puts(buf, ") ");
            }
        }
     }
     return pisf; 
 }
-static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield)
-{
-    
+
+static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield)
+{    
     if (!pf || !subfield)
        return;
 
-    for (;subfield; subfield = subfield->next)
+    for (;subfield;)
     {
        int len;
-       inline_field *pif = inline_parse(get_data(subfield,&len));
+       inline_field *pif=NULL;
+       data1_node *psubf;
+       
+       if (yaz_matchstr(subfield->u.tag.tag, "1"))
+       {
+           subfield = subfield->next;
+           continue;
+       }
+       
+       psubf = subfield;
+       pif = inline_mk_field();
+       do
+       {
+           int i;
+           if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0)
+           {
+               logf(LOG_WARN, "inline subfield ($%s): parse error",
+                   psubf->u.tag.tag);
+               inline_destroy_field(pif);
+               return; 
+           }
+           psubf = psubf->next;
+       } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1"));
+       
+       subfield = psubf;
        
        if (pif && !yaz_matchstr(pif->name, pf->name))
        {
            if (!pf->list && pif->list)
            {
-               strcat(buf, pif->list->data);
+               wrbuf_puts(buf, pif->list->data);
            }
            else
            {
@@ -468,24 +518,26 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield)
                    /*
                        add separator for inline fields
                    */
-                   if (strlen(buf))
+                   if (wrbuf_len(buf))
                    {
-                       strcat(buf, "\n");
+                       wrbuf_puts(buf, "\n");
                    }
                }
                else
                {
-                   logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name);
+                   logf(LOG_WARN, "In-line field %s missed -- indicators do not match", pif->name);
                }
            }
        }
        inline_destroy_field(pif);
     }
 #if MARCOMP_DEBUG    
-    logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
+    logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf->buf);
 #endif
 }
-static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield)
+
+static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf,
+                               data1_node *subfield)
 {
     mc_subfield *p;
     
@@ -501,7 +553,8 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel
                
                if (strcmp(p->prefix, "_"))
                {
-                   strcat(strcat(buf, " "), p->prefix);
+                   wrbuf_puts(buf, " ");
+                   wrbuf_puts(buf, p->prefix);
                }
                
                if (p->u.in_line)
@@ -510,16 +563,18 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel
                }
                else if (p->interval.start == -1)
                {
-                   strcat(buf, get_data(found, &len));
+                   wrbuf_puts(buf, get_data(found, &len));
                }
                else
                {
-                   strncat(buf, get_data(found, &len)+p->interval.start,
+                   wrbuf_write(buf, get_data(found, &len)+p->interval.start,
                        p->interval.end-p->interval.start+1);
+                   wrbuf_puts(buf, "");
                }
                if (strcmp(p->suffix, "_"))
                {
-                   strcat(strcat(buf, p->suffix), " ");
+                   wrbuf_puts(buf, p->suffix);
+                   wrbuf_puts(buf, " ");
                }
 #if MARCOMP_DEBUG              
                logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
@@ -552,15 +607,17 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel
            }
            if (found)
            {
-               strcat(buf, " (");
+               wrbuf_puts(buf, " (");
                subfield = cat_subfield(p->u.child, buf, subfield);
-               strcat(buf, ") ");
+               wrbuf_puts(buf, ") ");
            }
        }
     }
     return subfield;
 }
-static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field)
+
+static data1_node *cat_field(struct grs_read_info *p, mc_field *pf,
+                            WRBUF buf, data1_node *field)
 {
     data1_node *subfield;
     int ind1, ind2;
@@ -587,15 +644,16 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d
        
        if (pf->interval.start == -1)
        {
-           strcat(buf, get_data(field, &len));
+           wrbuf_puts(buf, get_data(field, &len));
        }
        else
        {
-           strncat(buf, get_data(field, &len)+pf->interval.start,
-               pf->interval.end-pf->interval.start+1);
+           wrbuf_write(buf, get_data(field, &len)+pf->interval.start,
+                       pf->interval.end-pf->interval.start+1);
+           wrbuf_puts(buf, "");
        }
 #if MARCOMP_DEBUG
-        logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
+        logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf);
 #endif
        return field->next;
     }
@@ -626,11 +684,12 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d
     cat_subfield(pf->list, buf, subfield);
 
 #if MARCOMP_DEBUG    
-    logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
+    logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf);
 #endif
     
     return field->next;    
 }
+
 static int is_empty(char *s)
 {
     char *p = s;
@@ -642,14 +701,16 @@ static int is_empty(char *s)
     }
     return 1;
 }
-static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root)
+
+static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt,
+                            data1_node *root)
 {
     data1_marctab *marctab = root->u.root.absyn->marc;
     data1_node *top = root->child;
     data1_node *field;
     mc_context *c;
     mc_field *pf;
-    char buf[1000000];
+    WRBUF buf;
     
     c = mc_mk_context(mc_stmnt+3);
     
@@ -663,6 +724,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data
        mc_destroy_context(c);
        return;
     }
+    buf = wrbuf_alloc();
 #if MARCOMP_DEBUG    
     logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
 #endif
@@ -686,13 +748,16 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data
            if (!yaz_matchstr(field->u.tag.tag, pf->name))
            {
                data1_node *new;
-               char *pb = buf;
+               char *pb;
 #if MARCOMP_DEBUG              
                logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
 #endif         
-               *buf = '\0';        
+               wrbuf_rewind(buf);
+               wrbuf_puts(buf, "");
+
                field = cat_field(p, pf, buf, field);
                
+               pb = wrbuf_buf(buf);
                for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
                {
                        if (!is_empty(pb))
@@ -710,6 +775,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data
     }
     mc_destroy_field(pf);
     mc_destroy_context(c);
+    wrbuf_free(buf, 1);
 }
 
 data1_node *grs_read_marcxml(struct grs_read_info *p)
@@ -731,7 +797,6 @@ data1_node *grs_read_marcxml(struct grs_read_info *p)
     return root;
 }
 
-
 data1_node *grs_read_marc(struct grs_read_info *p)
 {
     data1_node *root = grs_read_iso2709(p, 0);
@@ -750,6 +815,7 @@ data1_node *grs_read_marc(struct grs_read_info *p)
     }
     return root;
 }
+
 static void *grs_init_marc(void)
 {
     return 0;