ISO2709 decoding: skip control characters from indicator data
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 15 Jan 2009 12:06:06 +0000 (13:06 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 15 Jan 2009 12:06:06 +0000 (13:06 +0100)
ISO2709 indicators that hold control characters are marked
invalid and the control characters are now skipped.

src/marc_read_iso2709.c

index 89839ea..249ba42 100644 (file)
@@ -149,8 +149,23 @@ int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
         {
             /* datafield */
             i += identifier_flag-1;
-            yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
-            i += indicator_length;
+            if (indicator_length)
+            {
+                /* skip RS/FS bytes in indicator. They are not allowed there */
+                int j;
+                for (j = indicator_length; --j >= 0; )
+                    if (buf[j+i] < ' ')
+                    {
+                        j++;
+                        i += j;
+                        end_offset += j;
+                        yaz_marc_cprintf(mt, "Bad indicator data. "
+                                         "Skipping %d bytes", j);
+                        break;
+                    }
+                yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
+                i += indicator_length;
+            }
 
             while (i < end_offset &&
                     buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
@@ -182,7 +197,7 @@ int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
         if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
         {
             yaz_marc_cprintf(mt, "No separator at end of field length=%d",
-                    data_length);
+                             data_length);
         }
     }
     return record_length;