From 676ae79af06721621b1f66bdaec06164b3ba7b1f Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 27 Jun 2007 22:17:20 +0000 Subject: [PATCH] For data-1, do not chop text data in ISO2709 creation . The problem is that in some cases the chop operation will remove essential content. However, chop is needed in cases where input is XML/SGML. Therefore, this operation performed in the data-1 map code instead and chop is enabled by default. The chop can be disabled with 'nochop' in parameter in map rule, e.g. map title /(3,245)/(3,a) nochop --- data1/d1_map.c | 37 ++++++++++++++++++----------------- data1/d1_marc.c | 49 +++++++++++++---------------------------------- include/idzebra/data1.h | 11 +++-------- 3 files changed, 35 insertions(+), 62 deletions(-) diff --git a/data1/d1_map.c b/data1/d1_map.c index 997b9cb..724ceb1 100644 --- a/data1/d1_map.c +++ b/data1/d1_map.c @@ -1,4 +1,4 @@ -/* $Id: d1_map.c,v 1.16 2007-04-16 08:44:31 adam Exp $ +/* $Id: d1_map.c,v 1.17 2007-06-27 22:17:20 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -30,6 +30,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +struct data1_mapunit +{ + int no_data; + int no_chop; + char *source_element_name; + data1_maptag *target_path; + struct data1_mapunit *next; +}; + data1_maptab *data1_read_maptab (data1_handle dh, const char *file) { NMEM mem = data1_nmem_get (dh); @@ -109,6 +118,10 @@ data1_maptab *data1_read_maptab (data1_handle dh, const char *file) (*mapp)->no_data = 1; else (*mapp)->no_data = 0; + if (argc > 3 && !data1_matchstr(argv[3], "nochop")) + (*mapp)->no_chop = 1; + else + (*mapp)->no_chop = 0; (*mapp)->source_element_name = (char *)nmem_malloc(mem, strlen(argv[1])+1); strcpy((*mapp)->source_element_name, argv[1]); @@ -163,23 +176,6 @@ data1_maptab *data1_read_maptab (data1_handle dh, const char *file) } /* - * Locate node with given elementname. - * NOTE: This is stupid - we don't find repeats this way. - */ -static data1_node *find_node(data1_node *p, char *elementname) -{ - data1_node *c, *r; - - for (c = p->child; c; c = c->next) - if (c->which == DATA1N_tag && c->u.tag.element && - !data1_matchstr(c->u.tag.element->name, elementname)) - return c; - else if ((r = find_node(c, elementname))) - return r; - return 0; -} - -/* * See if the node n is equivalent to the tag t. */ static int tagmatch(data1_node *n, data1_maptag *t) @@ -305,6 +301,11 @@ static int map_children(data1_handle dh, data1_node *n, data1_maptab *map, cur->child = dup_child (dh, c->child, &cur->last_child, mem, cur); + if (!m->no_chop) + { + data1_concat_text(dh, mem, cur->child); + data1_chop_text(dh, mem, cur->child); + } } } } diff --git a/data1/d1_marc.c b/data1/d1_marc.c index 05fbb76..a091cc6 100644 --- a/data1/d1_marc.c +++ b/data1/d1_marc.c @@ -1,4 +1,4 @@ -/* $Id: d1_marc.c,v 1.18 2007-04-16 08:44:31 adam Exp $ +/* $Id: d1_marc.c,v 1.19 2007-06-27 22:17:20 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -160,35 +160,22 @@ data1_marctab *data1_read_marctab (data1_handle dh, const char *file) } -/* - * Locate some data under this node. This routine should handle variants - * prettily. - */ -static char *get_data(data1_node *n, int *len, int chop) +static void get_data2(data1_node *n, int *len, char *dst, size_t max) { - char *r; + *len = 0; while (n) { if (n->which == DATA1N_data) { - int i; - *len = n->u.data.len; - - if (chop) + if (dst && *len < max) { - for (i = 0; i<*len; i++) - if (!d1_isspace(n->u.data.data[i])) - break; - while (*len && d1_isspace(n->u.data.data[*len - 1])) - (*len)--; - *len = *len - i; - if (*len > 0) - return n->u.data.data + i; - } - else - if (*len > 0) - return n->u.data.data; + size_t copy_len = max - *len; + if (copy_len > n->u.data.len) + copy_len = n->u.data.len; + memcpy(dst + *len, n->u.data.data, copy_len); + } + *len += n->u.data.len; } if (n->which == DATA1N_tag) n = n->child; @@ -197,9 +184,6 @@ static char *get_data(data1_node *n, int *len, int chop) else break; } - r = ""; - *len = strlen(r); - return r; } static void memint (char *p, int val, int len) @@ -271,11 +255,7 @@ static int nodetomarc(data1_handle dh, else if (!strcmp(field->u.tag.tag, "leader")) { int dlen = 0; - char *dbuf = get_data(subf, &dlen, 0); - if (dlen > 24) - dlen = 24; - if (dbuf && dlen > 0) - memcpy (leader, dbuf, dlen); + get_data2(subf, &dlen, leader, 24); continue; } else if (!strcmp(field->u.tag.tag, "controlfield")) @@ -320,7 +300,7 @@ static int nodetomarc(data1_handle dh, continue; /* we skip comments, cdata .. */ len += p->identifier_length; } - get_data(subf, &dlen, control_field ? 0 : 1); + get_data2(subf, &dlen, 0, 0); len += dlen; } } @@ -413,8 +393,6 @@ static int nodetomarc(data1_handle dh, } for (; subf; subf = subf->next) { - char *data; - if (!control_field) { const char *identifier = "a"; @@ -439,8 +417,7 @@ static int nodetomarc(data1_handle dh, memcpy (op + data_p+1, identifier, p->identifier_length-1); data_p += p->identifier_length; } - data = get_data(subf, &dlen, control_field ? 0 : 1); - memcpy (op + data_p, data, dlen); + get_data2(subf, &dlen, op + data_p, 100000); data_p += dlen; } op[data_p++] = ISO2709_FS; diff --git a/include/idzebra/data1.h b/include/idzebra/data1.h index 5d2ffdc..ab533a5 100644 --- a/include/idzebra/data1.h +++ b/include/idzebra/data1.h @@ -1,4 +1,4 @@ -/* $Id: data1.h,v 1.24 2007-06-27 22:04:45 adam Exp $ +/* $Id: data1.h,v 1.25 2007-06-27 22:17:20 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -100,13 +100,7 @@ typedef struct data1_maptag struct data1_maptag *next; } data1_maptag; -typedef struct data1_mapunit -{ - int no_data; - char *source_element_name; - data1_maptag *target_path; - struct data1_mapunit *next; -} data1_mapunit; +typedef struct data1_mapunit data1_mapunit; typedef struct data1_maptab { @@ -117,6 +111,7 @@ typedef struct data1_maptab struct data1_maptab *next; } data1_maptab; + typedef struct data1_name { char *name; -- 1.7.10.4