-/* $Id: extract.c,v 1.249 2007-02-06 09:34:56 adam Exp $
+/* $Id: extract.c,v 1.250 2007-03-01 10:35:46 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
extract_set_store_data_prepare(&extractCtrl);
r = (*recType->extract)(recTypeClientData, &extractCtrl);
-
- if (r == RECCTRL_EXTRACT_EOF)
- return ZEBRA_FAIL;
- else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+
+ switch (r)
{
+ case RECCTRL_EXTRACT_EOF:
+ return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_ERROR_GENERIC:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: generic");
return ZEBRA_FAIL;
- }
- else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
- {
+ case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: no such filter");
return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_SKIP:
+ if (show_progress)
+ yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT,
+ recordType, pr_fname, (zint) start_offset);
+ *more = 1;
+
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+
+ return ZEBRA_OK;
+ case RECCTRL_EXTRACT_OK:
+ break;
+ default:
+ yaz_log (YLOG_WARN, "extract error: unknown error: %d", r);
+ return ZEBRA_FAIL;
}
-
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+ else
+ end_offset = stream->tellf(stream);
+
all_matches_add(&extractCtrl);
if (extractCtrl.match_criteria[0])
match_criteria = extractCtrl.match_criteria;
-
-
- end_offset = stream->endf(stream, 0);
-
- if (!end_offset)
- end_offset = stream->tellf(stream);
- else
- stream->seekf(stream, end_offset);
-
}
-
*more = 1;
if (!sysno)
{
-/* $Id: mod_dom.c,v 1.24 2007-02-28 16:46:19 marc Exp $
+/* $Id: mod_dom.c,v 1.25 2007-03-01 10:35:46 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
struct filter_retrieve *retrieve_list;
struct filter_input *input_list;
struct filter_store *store;
+ int record_info_invoked;
};
tinfo->input_list = 0;
tinfo->store = 0;
tinfo->doc_config = 0;
+ tinfo->record_info_invoked = 0;
#if YAZ_HAVE_EXSLT
exsltRegisterAll();
xmlNodePtr node,
xmlChar * index_p)
{
- xmlChar *text = xmlNodeGetContent(node);
- size_t text_len = strlen((const char *)text);
-
- /* if there is no text, we do not need to proceed */
- if (text_len)
- {
- xmlChar *look = index_p;
- xmlChar *bval;
- xmlChar *eval;
-
- xmlChar index[256];
- xmlChar type[256];
+ if (tinfo->record_info_invoked == 1)
+ {
+ xmlChar *text = xmlNodeGetContent(node);
+ size_t text_len = strlen((const char *)text);
+
+ yaz_log(YLOG_LOG, "Indexing :%.*s:", text_len, text);
+
+ /* if there is no text, we do not need to proceed */
+ if (text_len)
+ {
+ xmlChar *look = index_p;
+ xmlChar *bval;
+ xmlChar *eval;
+
+ xmlChar index[256];
+ xmlChar type[256];
- /* assingning text to be indexed */
- recword->term_buf = (const char *)text;
- recword->term_len = text_len;
+ /* assingning text to be indexed */
+ recword->term_buf = (const char *)text;
+ recword->term_len = text_len;
- /* parsing all index name/type pairs */
- /* may not start with ' ' or ':' */
- while (*look && ' ' != *look && ':' != *look)
- {
- /* setting name and type to zero */
- *index = '\0';
- *type = '\0';
-
- /* parsing one index name */
- bval = look;
- while (*look && ':' != *look && ' ' != *look)
+ /* parsing all index name/type pairs */
+ /* may not start with ' ' or ':' */
+ while (*look && ' ' != *look && ':' != *look)
{
- look++;
- }
- eval = look;
- strncpy((char *)index, (const char *)bval, eval - bval);
- index[eval - bval] = '\0';
+ /* setting name and type to zero */
+ *index = '\0';
+ *type = '\0';
-
- /* parsing one index type, if existing */
- if (':' == *look)
- {
- look++;
-
+ /* parsing one index name */
bval = look;
- while (*look && ' ' != *look)
+ while (*look && ':' != *look && ' ' != *look)
{
look++;
}
eval = look;
- strncpy((char *)type, (const char *)bval, eval - bval);
- type[eval - bval] = '\0';
- }
+ strncpy((char *)index, (const char *)bval, eval - bval);
+ index[eval - bval] = '\0';
+
+
+ /* parsing one index type, if existing */
+ if (':' == *look)
+ {
+ look++;
+
+ bval = look;
+ while (*look && ' ' != *look)
+ {
+ look++;
+ }
+ eval = look;
+ strncpy((char *)type, (const char *)bval, eval - bval);
+ type[eval - bval] = '\0';
+ }
- /* writing debug out */
- if (extctr->flagShowRecords)
- dom_log(YLOG_LOG, tinfo, 0,
+ /* actually indexing the text given */
+ dom_log(YLOG_DEBUG, tinfo, 0,
"INDEX '%s:%s' '%s'",
index ? (const char *) index : "null",
type ? (const char *) type : "null",
text ? (const char *) text : "null");
- /* actually indexing the text given */
- recword->index_name = (const char *)index;
- if (type && *type)
- recword->index_type = *type;
- (extctr->tokenAdd)(recword);
-
- /* eat whitespaces */
- if (*look && ' ' == *look && *(look+1))
- {
- look++;
- }
+ recword->index_name = (const char *)index;
+ if (type && *type)
+ recword->index_type = *type;
+
+ /* writing debug out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "INDEX '%s:%s' '%s'",
+ index ? (const char *) index : "null",
+ type ? (const char *) type : "null",
+ text ? (const char *) text : "null");
+
+ /* actually indexing the text given */
+ recword->index_name = (const char *)index;
+ if (type && *type)
+ recword->index_type = *type;
+ (extctr->tokenAdd)(recword);
+
+ /* eat whitespaces */
+ if (*look && ' ' == *look && *(look+1))
+ {
+ look++;
+ }
+ }
}
+ xmlFree(text);
}
-
- xmlFree(text);
}
/* DOM filter style indexing */
static void set_record_info(struct filter_info *tinfo,
struct recExtractCtrl *extctr,
+ xmlNodePtr node,
xmlChar * id_p,
xmlChar * rank_p,
xmlChar * type_p)
/* else */
/* dom_log(YLOG_WARN, tinfo, ptr, "dom filter: unknown record type '%s'", */
/* type_str); */
+ if (tinfo->record_info_invoked == 1)
+ {
+ /* warn about multiple only once */
+ dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
+ }
+ tinfo->record_info_invoked++;
}
attr->name);
}
}
- set_record_info(tinfo, extctr, id_p, rank_p, type_p);
+ set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
}
else
{
pi_p, look);
}
else
- set_record_info(tinfo, extctr, id, rank, 0);
+ set_record_info(tinfo, extctr, node, id, rank, 0);
}
/* parsing index instruction */
struct recExtractCtrl *extctr,
xmlDocPtr doc)
{
- xmlChar *buf_out;
- int len_out;
-
/* only need to do the initialization once, reuse recword for all terms */
RecWord recword;
(*extctr->init)(extctr, &recword);
xmlFree(buf_out);
}
*/
-
+ tinfo->record_info_invoked = 0;
process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
}
if (doc)
xmlFreeDoc(doc);
+ if (tinfo->record_info_invoked == 0)
+ return RECCTRL_EXTRACT_SKIP;
return RECCTRL_EXTRACT_OK;
}