X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=index%2Fmod_dom.c;h=5137efdddd977463cf03b5282dd193ecac5ffb68;hb=298a7903c3915135806074286f98e8b3f336e1d3;hp=f4f56436d354cb71b395d0587e440fa6cc0b9019;hpb=5e2a64f624de4bd6fae0a69a5db5fb0fdb588ee0;p=idzebra-moved-to-github.git diff --git a/index/mod_dom.c b/index/mod_dom.c index f4f5643..5137efd 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,4 +1,4 @@ -/* $Id: mod_dom.c,v 1.13 2007-02-15 14:33:41 marc Exp $ +/* $Id: mod_dom.c,v 1.15 2007-02-15 15:08:41 marc Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -42,12 +42,6 @@ #include #include - - -/* Alvis style indexing */ -#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1" -static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS; - /* DOM filter style indexing */ #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0" static const char *zebra_dom_ns = ZEBRA_DOM_NS; @@ -664,144 +658,6 @@ static int ioclose_ex(void *context) } - -/* Alvis style indexing */ -static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl, - xmlNodePtr ptr, RecWord *recWord) -{ - for(; ptr; ptr = ptr->next) - { - index_cdata(tinfo, ctrl, ptr->children, recWord); - if (ptr->type != XML_TEXT_NODE) - continue; - recWord->term_buf = (const char *)ptr->content; - recWord->term_len = XML_STRLEN(ptr->content); - (*ctrl->tokenAdd)(recWord); - } -} - -/* Alvis style indexing */ -static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, - xmlNodePtr ptr, RecWord *recWord) -{ - for(; ptr; ptr = ptr->next) - { - index_node(tinfo, ctrl, ptr->children, recWord); - if (ptr->type != XML_ELEMENT_NODE || !ptr->ns || - XML_STRCMP(ptr->ns->href, zebra_xslt_ns)) - continue; - if (!XML_STRCMP(ptr->name, "index")) - { - const char *name_str = 0; - const char *type_str = 0; - const char *xpath_str = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - { - if (attr_content(attr, "name", &name_str)) - ; - else if (attr_content(attr, "xpath", &xpath_str)) - ; - else if (attr_content(attr, "type", &type_str)) - ; - else - yaz_log(YLOG_WARN, "%s: dom filter: " - "bad attribute %s for ", - tinfo->fname, attr->name); - } - if (name_str) - { - /* save default type */ - int prev_type = recWord->index_type; - - /* type was given */ - if (type_str && *type_str) - recWord->index_type = *type_str; - - recWord->index_name = name_str; - index_cdata(tinfo, ctrl, ptr->children, recWord); - - /* restore it again */ - recWord->index_type = prev_type; - } - } - } -} - -/* Alvis style indexing */ -static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, - xmlNodePtr ptr, RecWord *recWord) -{ - const char *type_str = "update"; - - if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns && - !XML_STRCMP(ptr->ns->href, zebra_xslt_ns) - && !XML_STRCMP(ptr->name, "record")) - { - const char *id_str = 0; - const char *rank_str = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - { - if (attr_content(attr, "type", &type_str)) - ; - else if (attr_content(attr, "id", &id_str)) - ; - else if (attr_content(attr, "rank", &rank_str)) - ; - else - yaz_log(YLOG_WARN, "%s: dom filter: " - "bad attribute %s for ", - tinfo->fname, attr->name); - } - if (id_str) - sscanf(id_str, "%255s", ctrl->match_criteria); - - if (rank_str) - ctrl->staticrank = atozint(rank_str); - ptr = ptr->children; - } - - if (!strcmp("update", type_str)) - index_node(tinfo, ctrl, ptr, recWord); - else if (!strcmp("delete", type_str)) - yaz_log(YLOG_WARN, "%s dom filter: " - "delete: to be implemented"); - else - yaz_log(YLOG_WARN, "dom filter: " - "unknown record type '%s'", - type_str); -} - - -/* Alvis style indexing */ -static void extract_doc_alvis(struct filter_info *tinfo, - struct recExtractCtrl *extctr, - xmlDocPtr doc) -{ - if (doc){ - RecWord recWord; - xmlChar *buf_out; - int len_out; - xmlNodePtr root_ptr; - - (*extctr->init)(extctr, &recWord); - - if (extctr->flagShowRecords){ - xmlDocDumpMemory(doc, &buf_out, &len_out); - fwrite(buf_out, len_out, 1, stdout); - xmlFree(buf_out); - } - root_ptr = xmlDocGetRootElement(doc); - if (root_ptr) - index_record(tinfo, extctr, root_ptr, &recWord); - else - yaz_log(YLOG_WARN, "%s dom filter: " - "No root for index XML record"); - } -} - - /* DOM filter style indexing */ static int attr_content_xml(struct _xmlAttr *attr, const char *name, xmlChar **dst_content) @@ -818,7 +674,8 @@ static int attr_content_xml(struct _xmlAttr *attr, const char *name, /* DOM filter style indexing */ static void index_value_of(struct filter_info *tinfo, - struct recExtractCtrl *extctr, + struct recExtractCtrl *extctr, + RecWord* recword, xmlNodePtr node, xmlChar * index_p) { @@ -837,10 +694,8 @@ static void index_value_of(struct filter_info *tinfo, xmlChar type[256]; /* assingning text to be indexed */ - RecWord recWord; - (*extctr->init)(extctr, &recWord); - recWord.term_buf = (const char *)text; - recWord.term_len = text_len; + recword->term_buf = (const char *)text; + recword->term_len = text_len; /* parsing all index name/type pairs */ /* may not start with ' ' or ':' */ @@ -878,10 +733,10 @@ static void index_value_of(struct filter_info *tinfo, "INDEX '%s:%s' '%s'", tinfo->fname, index, type, text); - recWord.index_name = (const char *)index; + recword->index_name = (const char *)index; if (type && *type) - recWord.index_type = *type; - (extctr->tokenAdd)(&recWord); + recword->index_type = *type; + (extctr->tokenAdd)(recword); /* eat whitespaces */ if (*look && ' ' == *look && *(look+1)){ @@ -912,7 +767,7 @@ static void set_record_info(struct filter_info *tinfo, extctr->staticrank = atozint((const char *)rank_p); /* if (!strcmp("update", type_str)) */ - /* index_node(tinfo, ctrl, ptr, recWord); */ + /* index_node(tinfo, ctrl, ptr, recword); */ /* else if (!strcmp("delete", type_str)) */ /* yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */ /* else */ @@ -925,6 +780,7 @@ static void set_record_info(struct filter_info *tinfo, /* DOM filter style indexing */ static void process_xml_element_zebra_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, + RecWord* recword, xmlNodePtr node) { if (node->type == XML_ELEMENT_NODE @@ -936,7 +792,7 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, struct _xmlAttr *attr; for (attr = node->properties; attr; attr = attr->next){ if (attr_content_xml(attr, "name", &index_p)){ - index_value_of(tinfo, extctr, node, index_p); + index_value_of(tinfo, extctr, recword,node, index_p); } else yaz_log(YLOG_WARN,"%s dom filter: " @@ -1077,16 +933,15 @@ static void process_xml_pi_node(struct filter_info *tinfo, /* DOM filter style indexing */ static void process_xml_element_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, + RecWord* recword, xmlNodePtr node) { /* remember indexing instruction from PI to next element node */ xmlChar *index_p = 0; - /* yaz_log(YLOG_DEBUG,"ELEM %s\n", xmlGetNodePath(node)); */ - /* check if we are an element node in the special zebra namespace and either set record data or index value-of node content*/ - process_xml_element_zebra_node(tinfo, extctr, node); + process_xml_element_zebra_node(tinfo, extctr, recword, node); /* loop through kid nodes */ for (node = node->children; node; node = node->next) @@ -1098,10 +953,10 @@ static void process_xml_element_node(struct filter_info *tinfo, else if (node->type == XML_ELEMENT_NODE){ /* if there was a PI index instruction before this element */ if (index_p){ - index_value_of(tinfo, extctr, node, index_p); + index_value_of(tinfo, extctr, recword, node, index_p); index_p = 0; } - process_xml_element_node(tinfo, extctr, node); + process_xml_element_node(tinfo, extctr, recword,node); } else continue; @@ -1114,17 +969,20 @@ static void extract_dom_doc_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, xmlDocPtr doc) { - /* yaz_log(YLOG_DEBUG,"DOC %s\n", xmlGetNodePath((xmlNodePtr)doc)); */ - xmlChar *buf_out; int len_out; + + /* only need to do the initialization once, reuse recword for all terms */ + RecWord recword; + (*extctr->init)(extctr, &recword); + if (extctr->flagShowRecords){ xmlDocDumpMemory(doc, &buf_out, &len_out); fwrite(buf_out, len_out, 1, stdout); xmlFree(buf_out); } - process_xml_element_node(tinfo, extctr, (xmlNodePtr)doc); + process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc); }