-/* $Id: mod_dom.c,v 1.26 2007-03-03 21:39:10 adam Exp $
+
+/* $Id: mod_dom.c,v 1.27 2007-03-05 13:02:11 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
RecWord recword;
(*extctr->init)(extctr, &recword);
- tinfo->record_info_invoked = 0;
process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
}
xsltStylesheetPtr last_xsp = 0;
xmlDocPtr store_doc = 0;
+ /* per default do not ingest record */
+ tinfo->record_info_invoked = 0;
+
+ /* exit if empty document given */
+ if (!doc)
+ return RECCTRL_EXTRACT_SKIP;
+
+ /* we actuallu have a document which needs to be processed further */
params[0] = 0;
set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
params, &store_doc, &last_xsp);
}
+ /* saving either store doc or original doc in case no store doc exists */
if (last_xsp)
xsltSaveResultToString(&buf_out, &len_out,
store_doc ? store_doc : doc, last_xsp);
else
xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
-
- /* if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout); */
(*p->setStoreData)(p, buf_out, len_out);
xmlFree(buf_out);
/* extract conversion */
perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
+
/* finally, do the indexing */
- if (doc)
+ if (doc){
extract_dom_doc_node(tinfo, p, doc);
-
- if (doc)
xmlFreeDoc(doc);
-
+ }
+
+ /* there was nothing to index, so there is no inserted/updated record */
if (tinfo->record_info_invoked == 0)
return RECCTRL_EXTRACT_SKIP;
+
return RECCTRL_EXTRACT_OK;
}
{
int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
+
if (type == XML_READER_TYPE_ELEMENT &&
input->u.xmlreader.split_level == depth)
{
- xmlNodePtr ptr
+ /* per default do not ingest record */
+ tinfo->record_info_invoked = 0;
+
+ xmlNodePtr ptr
= xmlTextReaderExpand(input->u.xmlreader.reader);
+
if (ptr)
- {
+ {
+ /* we have a new document */
+
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
-# $Id: Makefile.am,v 1.12 2007-02-18 21:53:22 adam Exp $
+# $Id: Makefile.am,v 1.13 2007-03-05 13:02:11 marc Exp $
check_PROGRAMS = xslt1 xslt2 xslt3 xslt4 xslt5 dom1
TESTS = $(check_PROGRAMS)
-EXTRA_DIST=zebra.cfg zebrastaticrank.cfg \
+EXTRA_DIST= \
+ zebra.cfg zebrastaticrank.cfg zebra-dom.cfg \
marc-col.xml marc-one.xml marc-col.mrc \
marc-missing-ns.xml \
index.xsl id.xsl \
dom-index-element.xsl dom-index-pi.xsl \
marcschema-col.xml marcschema-one.xml snippet.xsl \
- dom-config-col.xml dom-config-one.xml dom-config-marc.xml
+ dom-config-col.xml dom-config-one.xml dom-config-marc.xml \
+ dom-config-skipped.xml
+
xslt1_SOURCES = xslt1.c
xslt2_SOURCES = xslt2.c
--- /dev/null
+<dom>
+ <!-- $Id: dom-config-skipped.xml,v 1.1 2007-03-05 13:02:11 marc Exp $ -->
+ <extract name="index">
+ <xslt stylesheet="dom-index-skipped.xsl"/>
+ </extract>
+ <retrieve name="F">
+ <xslt stylesheet="id.xsl"/>
+ </retrieve>
+ <input syntax="xml">
+ <xmlreader level="1"/>
+ </input>
+</dom>
--- /dev/null
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:m="http://www.loc.gov/MARC21/slim"
+ xmlns:z="http://indexdata.com/zebra-2.0"
+ exclude-result-prefixes="m z"
+ version="1.0">
+ <!-- $Id: dom-index-skipped.xsl,v 1.1 2007-03-05 13:02:11 marc Exp $ -->
+ <xsl:output indent="yes" method="xml" version="1.0" encoding="UTF-8"/>
+
+
+ <xsl:template match="text()"/>
+
+ <!-- skipping exactly first record with controlfield '11224466' -->
+ <xsl:template match="/m:record[normalize-space(m:controlfield[@tag='001']) =
+ '11224466']"/>
+
+ <!-- skipping exactly last record with controlfield '73090924' -->
+ <xsl:template match="/m:record[normalize-space(m:controlfield[@tag='001']) =
+ '73090924']"/>
+
+ <!-- and allowing one specific middle record -->
+ <xsl:template match="/m:record[normalize-space(m:controlfield[@tag='001']) =
+ '11224467']">
+ <z:record z:id="{normalize-space(m:controlfield[@tag='001'])}"
+ z:rank="{normalize-space(m:rank)}">
+ <xsl:apply-templates/>
+ </z:record>
+ </xsl:template>
+
+ <xsl:template match="m:controlfield[@tag='001']">
+ <z:index name="control">
+ <xsl:value-of select="normalize-space(.)"/>
+ </z:index>
+ </xsl:template>
+
+ <xsl:template match="m:datafield[@tag='245']/m:subfield[@code='a']">
+ <z:index name="title:w title:p title:s any:w">
+ <xsl:value-of select="."/>
+ </z:index>
+ </xsl:template>
+
+</xsl:stylesheet>
-/* $Id: dom1.c,v 1.1 2007-02-07 12:08:54 adam Exp $
+/* $Id: dom1.c,v 1.2 2007-03-05 13:02:11 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
zh = index_some(zs, "dom.bad.xml", "marc-col.xml");
zebra_close(zh);
+ /* testing XMLREADER input with PI stylesheet */
zh = index_some(zs, "dom.dom-config-col.xml", "marc-col.xml");
YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 3));
YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 1));
YAZ_CHECK(tl_query_x(zh, "@attr 1=4 computer", 0, 121));
zebra_close(zh);
+ /* testing XMLREADER input with ELEMENT stylesheet */
zh = index_some(zs, "dom.dom-config-one.xml", "marc-one.xml");
YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 1));
YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 1));
YAZ_CHECK(tl_query_x(zh, "@attr 1=4 computer", 0, 121));
zebra_close(zh);
+ /* testing MARC input with ELEMENT stylesheet */
zh = index_some(zs, "dom.dom-config-marc.xml", "marc-col.mrc");
YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 3));
YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 1));
YAZ_CHECK(tl_query_x(zh, "@attr 1=4 computer", 0, 121));
zebra_close(zh);
+ /* testing XMLREADER input with ELEMENT stylesheet and skipped records */
+ zh = index_some(zs, "dom.dom-config-skipped.xml", "marc-col.xml");
+ YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 1));
+ YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 0));
+ YAZ_CHECK(tl_query(zh, "@attr 1=control 11224467", 1));
+ YAZ_CHECK(tl_query(zh, "@attr 1=control 73090924", 0));
+
+ zebra_close(zh);
+
+
YAZ_CHECK(tl_close_down(0, zs));
}
<record xmlns="http://www.loc.gov/MARC21/slim">
<rank>3</rank>
<leader>01369cam 2200265 i 4500</leader>
- <controlfield tag="001"> 73090924 //r82 </controlfield>
+ <controlfield tag="001"> 73090924 </controlfield>
<controlfield tag="003">DLC </controlfield>
<controlfield tag="005">19820524000000.0 </controlfield>
<controlfield tag="008">760609s1974 nyua b 10110 eng </controlfield>
<datafield tag="010" ind1=" " ind2=" ">
- <subfield code="a"> 73090924 //r82</subfield>
+ <subfield code="a"> 73090924 </subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">DLC</subfield>
--- /dev/null
+profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab
+
+modulePath: ../../index/.libs
+
+recordType: dom.dom-config-skipped.xml
+#recordType: dom.dom-config-col.xml
+
+perm.anonymous: rw
+
+