From 3ad4016fb41388c1e70327924f781c5eb8ec0c51 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 20 Mar 2012 10:23:38 +0100 Subject: [PATCH] Sort module documentation, schema, fixes --- doc/Makefile.am | 2 + doc/sort.xml | 125 +++++++++++++++++++++++++++++++++++++++ etc/config-record-transform.xml | 9 +++ src/filter_sort.cpp | 64 +++++++++++--------- xml/schema/Makefile.am | 1 + xml/schema/filter_sort.rnc | 19 ++++++ xml/schema/metaproxy.rnc | 2 + 7 files changed, 195 insertions(+), 27 deletions(-) create mode 100644 doc/sort.xml create mode 100644 xml/schema/filter_sort.rnc diff --git a/doc/Makefile.am b/doc/Makefile.am index cdaadc6..838f7e1 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -23,6 +23,7 @@ XMLMAN = metaproxy.xml \ query_rewrite.xml \ record_transform.xml\ session_shared.xml \ + sort.xml \ sru_z3950.xml\ template.xml \ virt_db.xml \ @@ -44,6 +45,7 @@ MANFILES = metaproxy.1 \ multi.3mp query_rewrite.3mp \ record_transform.3mp\ session_shared.3mp \ + sort.3mp \ sru_z3950.3mp \ template.3mp \ virt_db.3mp \ diff --git a/doc/sort.xml b/doc/sort.xml new file mode 100644 index 0000000..d4a0431 --- /dev/null +++ b/doc/sort.xml @@ -0,0 +1,125 @@ + + + %idcommon; +]> + + + Metaproxy + Index Data + + + + sort + 3mp + Metaproxy Module + + + + sort + Metaproxy Z39.50 Sort Module + + + DESCRIPTION + + This filter performs sorting of Z39.50 result sets. + The sorting criteria is selected via an X-Path expression. Only + XML records are supported. The sorting is done only for the first + present request following a search. The number of records to prefetch + can is configurable. For example, if a client asks initially for 10 + records this module may extend that and fetch more records and only + return the results in the 10 record window - after sorting. + + + The configuration is given as attribute inside element + sort. This element must occur exactly once. Future + versions of the sort module may include multiple sort elements. + The attributes within sort are: + + xpath + + + Specifies the X-Path expression that picks the sorting data from + the record. + + + + namespaces + + + Allows one or more namespaces to be declared with a user-defined + prefix. Each prefix may be referred to within the xpath expression. + + + + prefetch + + + Number of records to prefetch. + + + + ascending + + + Is a boolean value (false, true). If true, the sort module will + sort ascending. If false, the sort module will sort descending. + If omitted, the sort order will be ascending. + + + + + + + + SCHEMA + + + + + EXAMPLES + + For example, to sort MARCXML records on title, one could use: + + + +]]> + + + + + SEE ALSO + + + metaproxy + 1 + + + + + record_transform + 3mp + + + + + ©right; + + + diff --git a/etc/config-record-transform.xml b/etc/config-record-transform.xml index 8e8dbc3..bce4097 100644 --- a/etc/config-record-transform.xml +++ b/etc/config-record-transform.xml @@ -16,6 +16,15 @@ Front + + + diff --git a/src/filter_sort.cpp b/src/filter_sort.cpp index 5f09492..784c6f9 100644 --- a/src/filter_sort.cpp +++ b/src/filter_sort.cpp @@ -51,6 +51,7 @@ namespace metaproxy_1 { std::string m_xpath_expr; std::string m_namespaces; bool m_ascending; + bool m_debug; boost::mutex m_mutex; boost::condition m_cond_session_ready; std::map m_clients; @@ -62,12 +63,12 @@ namespace metaproxy_1 { Z_NamePlusRecord *npr; std::string score; void get_xpath(xmlDoc *doc, const char *namespaces, - const char *expr); + const char *expr, bool debug); bool register_namespaces(xmlXPathContextPtr xpathCtx, const char *nsList); public: Record(Z_NamePlusRecord *n, const char *namespaces, - const char *expr); + const char *expr, bool debug); ~Record(); bool operator < (const Record &rhs); }; @@ -77,6 +78,7 @@ namespace metaproxy_1 { mp::odr m_odr; std::string namespaces; std::string xpath_expr; + bool debug; public: bool cmp(Odr_oid *syntax); void add(Z_NamePlusRecord *s); @@ -84,7 +86,7 @@ namespace metaproxy_1 { Z_NamePlusRecord *get(int i, bool ascending); void sort(); RecordList(Odr_oid *, std::string namespaces, - std::string xpath_expr); + std::string xpath_expr, bool debug); ~RecordList(); }; class Sort::ResultSet : boost::noncopyable { @@ -213,7 +215,7 @@ bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx, void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, - const char *expr) + const char *expr, bool debug) { xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); if (xpathCtx) @@ -224,6 +226,8 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, if (xpathObj) { xmlNodeSetPtr nodes = xpathObj->nodesetval; + if (debug) + print_xpath_nodes(nodes, yaz_log_file()); if (nodes) { int i; @@ -240,7 +244,7 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, { content = mp::xml::get_text(ptr); } - if (content.c_str()) + if (content.length()) { score = content; break; @@ -255,7 +259,8 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, yf::Sort::Record::Record(Z_NamePlusRecord *n, const char *namespaces, - const char *expr) : npr(n) + const char *expr, + bool debug) : npr(n) { if (npr->which == Z_NamePlusRecord_databaseRecord) { @@ -269,7 +274,7 @@ yf::Sort::Record::Record(Z_NamePlusRecord *n, ext->u.octet_aligned->len); if (doc) { - get_xpath(doc, namespaces, expr); + get_xpath(doc, namespaces, expr, debug); xmlFreeDoc(doc); } } @@ -289,8 +294,9 @@ bool yf::Sort::Record::operator < (const Record &rhs) yf::Sort::RecordList::RecordList(Odr_oid *syntax, std::string a_namespaces, - std::string a_xpath_expr) - : namespaces(a_namespaces), xpath_expr(a_xpath_expr) + std::string a_xpath_expr, + bool a_debug) + : namespaces(a_namespaces), xpath_expr(a_xpath_expr), debug(a_debug) { if (syntax) @@ -322,7 +328,7 @@ void yf::Sort::RecordList::add(Z_NamePlusRecord *s) { ODR oi = m_odr; Z_NamePlusRecord *npr = yaz_clone_z_NamePlusRecord(s, oi->mem); - Record record(npr, namespaces.c_str(), xpath_expr.c_str()); + Record record(npr, namespaces.c_str(), xpath_expr.c_str(), debug); npr_list.push_back(record); } @@ -375,7 +381,7 @@ yf::Sort::Frontend::~Frontend() } -yf::Sort::Impl::Impl() : m_prefetch(20), m_ascending(true) +yf::Sort::Impl::Impl() : m_prefetch(20), m_ascending(true), m_debug(false) { } @@ -435,7 +441,7 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr->name, "config")) + if (!strcmp((const char *) ptr->name, "sort")) { const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) @@ -458,18 +464,13 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, { m_namespaces = mp::xml::get_text(attr->children); } - else if (!strcmp((const char *) attr->name, "sortorder")) + else if (!strcmp((const char *) attr->name, "ascending")) { - std::string t = mp::xml::get_text(attr->children); - if (t == "ascending") - m_ascending = true; - else if (t == "descending") - m_ascending = false; - else - throw mp::filter::FilterException( - "Bad attribute value " + t + " for attribute " + - std::string((const char *) attr->name)); - + m_ascending = mp::xml::get_bool(attr->children, true); + } + else if (!strcmp((const char *) attr->name, "debug")) + { + m_debug = mp::xml::get_bool(attr->children, false); } else throw mp::filter::FilterException( @@ -515,7 +516,8 @@ void yf::Sort::Frontend::handle_records(mp::Package &package, int pos = 1; RecordListPtr rlp(new RecordList(syntax, m_p->m_namespaces.c_str(), - m_p->m_xpath_expr.c_str())); + m_p->m_xpath_expr.c_str(), + m_p->m_debug)); for (i = 0; i < nprl->num_records; i++, pos++) rlp->add(nprl->records[i]); @@ -570,6 +572,10 @@ void yf::Sort::Frontend::handle_search(mp::Package &package, Z_APDU *apdu_req) std::string resultSetId = req->resultSetName; Package b_package(package.session(), package.origin()); mp::odr odr; + Odr_oid *syntax = 0; + + if (req->preferredRecordSyntax) + syntax = odr_oiddup(odr, req->preferredRecordSyntax); b_package.copy_filter(package); Sets_it sets_it = m_sets.find(req->resultSetName); @@ -600,7 +606,7 @@ void yf::Sort::Frontend::handle_search(mp::Package &package, Z_APDU *apdu_req) Z_SearchResponse *res = gdu_res->u.z3950->u.searchResponse; s->hit_count = *res->resultCount; handle_records(b_package, apdu_req, res->records, 1, s, - req->preferredRecordSyntax, resultSetId.c_str()); + syntax, resultSetId.c_str()); package.response() = gdu_res; } } @@ -611,6 +617,11 @@ void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req) std::string resultSetId = req->resultSetId; Package b_package(package.session(), package.origin()); mp::odr odr; + Odr_oid *syntax = 0; + Odr_int start = *req->resultSetStartPoint; + + if (req->preferredRecordSyntax) + syntax = odr_oiddup(odr, req->preferredRecordSyntax); b_package.copy_filter(package); Sets_it sets_it = m_sets.find(resultSetId); @@ -666,8 +677,7 @@ void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req) { Z_PresentResponse *res = gdu_res->u.z3950->u.presentResponse; handle_records(b_package, apdu_req, res->records, - *req->resultSetStartPoint, rset, - req->preferredRecordSyntax, resultSetId.c_str()); + start, rset, syntax, resultSetId.c_str()); package.response() = gdu_res; } } diff --git a/xml/schema/Makefile.am b/xml/schema/Makefile.am index 348feec..8bfdc74 100644 --- a/xml/schema/Makefile.am +++ b/xml/schema/Makefile.am @@ -18,6 +18,7 @@ filter_multi.rnc \ filter_query_rewrite.rnc \ filter_record_transform.rnc \ filter_session_shared.rnc \ +filter_sort.rnc \ filter_sru_z3950.rnc \ filter_virt_db.rnc \ filter_z3950_client.rnc \ diff --git a/xml/schema/filter_sort.rnc b/xml/schema/filter_sort.rnc new file mode 100644 index 0000000..d62b844 --- /dev/null +++ b/xml/schema/filter_sort.rnc @@ -0,0 +1,19 @@ +# Metaproxy XML config file schema + +namespace mp = "http://indexdata.com/metaproxy" + +filter_sort = + attribute type { "sort" }, + attribute id { xsd:NCName }?, + attribute name { xsd:NCName }?, + element mp:sort { + attribute prefetch { xsd:integer }?, + attribute xpath { xsd:string }, + attribute namespaces { xsd:string }?, + attribute ascending { xsd:boolean }?, + attribute debug { xsd:boolean }? + } + + + + diff --git a/xml/schema/metaproxy.rnc b/xml/schema/metaproxy.rnc index 3442c35..b82b7ae 100644 --- a/xml/schema/metaproxy.rnc +++ b/xml/schema/metaproxy.rnc @@ -36,6 +36,7 @@ include "filter_multi.rnc" include "filter_query_rewrite.rnc" include "filter_record_transform.rnc" include "filter_session_shared.rnc" +include "filter_sort.rnc" include "filter_sru_z3950.rnc" include "filter_virt_db.rnc" include "filter_z3950_client.rnc" @@ -83,6 +84,7 @@ filter = | filter_query_rewrite | filter_record_transform | filter_session_shared + | filter_sort | filter_sru_z3950 | filter_virt_db | filter_z3950_client -- 1.7.10.4