From 90d0517a535806d6b076f7c9812f8292a037cd06 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 16 Dec 2014 14:41:45 +0100 Subject: [PATCH] Moving SPARQL to separate Git repo --- Makefile | 12 ++ bibframe/config-sparql.xml | 36 ++++ src/Makefile | 36 ++++ src/filter_sparql.cpp | 422 +++++++++++++++++++++++++++++++++++++++++ src/filter_sparql.hpp | 38 ++++ src/sparql.c | 414 ++++++++++++++++++++++++++++++++++++++++ src/sparql.h | 46 +++++ src/test_sparql.c | 451 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1455 insertions(+) create mode 100644 Makefile create mode 100644 bibframe/config-sparql.xml create mode 100644 src/Makefile create mode 100644 src/filter_sparql.cpp create mode 100644 src/filter_sparql.hpp create mode 100644 src/sparql.c create mode 100644 src/sparql.h create mode 100644 src/test_sparql.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a966b17 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +SUBDIRS = src +.PHONY: $(SUBDIRS) all + +all: $(SUBDIRS) + +$(SUBDIRS): + $(MAKE) -C $@ + +clean install: + for d in $(SUBDIRS); do \ + $(MAKE) -C $$d $@; \ + done diff --git a/bibframe/config-sparql.xml b/bibframe/config-sparql.xml new file mode 100644 index 0000000..feadbab --- /dev/null +++ b/bibframe/config-sparql.xml @@ -0,0 +1,36 @@ + + + + + + + 10 + @:9000 + FN + + + log + + + + rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns + bf: http://bibframe.org/vocab/ + ?work ?wtitle + ?work a bf:Work + ?work bf:workTitle ?wt + ?wt bf:titleValue ?wtitle + ?wt bf:titleValue %v FILTER(contains(%v, %s)) + + + + http + + + + true + + + + + + diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..0d6941d --- /dev/null +++ b/src/Makefile @@ -0,0 +1,36 @@ +ROOT=.. +prefix=/usr +libdir = ${prefix}/lib + +ifeq "${MP_CONFIG}" "" +MP_CONFIG := $(shell if test -x $(ROOT)/../metaproxy/metaproxy-config; then echo $(ROOT)/../metaproxy/metaproxy-config; else echo metaproxy-config; fi) +endif + +MP_CFLAGS := $(shell $(MP_CONFIG) --cflags) +MP_LIBS := $(shell $(MP_CONFIG) --libs) +MP_SO := metaproxy_filter_sparql.so + +O := filter_sparql.o sparql.o + +CXXFLAGS := $(MP_CFLAGS) -fPIC +CFLAGS := $(MP_CFLAGS) -fPIC + +all: $(MP_SO) + +$(MP_SO): $(O) + $(CXX) -shared $(CXXFLAGS) $(LDFLAGS) $^ -o $@ $(MP_LIBS) + +install: $(MP_SO) + mkdir -p $(DESTDIR)$(libdir)/mp-sparql + cp $(MP_SO) $(DESTDIR)$(libdir)/mp-sparql + +test_sparql: test_sparql.o sparql.o + $(CC) $(CFLAGS) $^ -o $@ $(MP_LIBS) + +$(O): sparql.h + +check: test_sparql + ./test_sparql + +clean: + rm -f *.o $(MP_SO) test_sparql diff --git a/src/filter_sparql.cpp b/src/filter_sparql.cpp new file mode 100644 index 0000000..8900033 --- /dev/null +++ b/src/filter_sparql.cpp @@ -0,0 +1,422 @@ +/* This file is part of Metaproxy. + Copyright (C) Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sparql.h" + +#include + +namespace mp = metaproxy_1; +namespace yf = mp::filter; + +namespace metaproxy_1 { + namespace filter { + class SPARQL : public Base { + class Session; + class Rep; + class Conf; + + typedef boost::shared_ptr SessionPtr; + typedef boost::shared_ptr ConfPtr; + public: + SPARQL(); + ~SPARQL(); + void process(metaproxy_1::Package & package) const; + void configure(const xmlNode * ptr, bool test_only, + const char *path); + SessionPtr get_session(Package &package, Z_APDU **apdu) const; + void release_session(Package &package) const; + boost::scoped_ptr m_p; + std::list db_conf; + }; + class SPARQL::Conf { + public: + std::string db; + std::string uri; + yaz_sparql_t s; + ~Conf(); + }; + class SPARQL::Rep { + friend class SPARQL; + boost::condition m_cond_session_ready; + boost::mutex m_mutex; + std::map m_clients; + }; + class SPARQL::Session { + public: + Session(const SPARQL *); + ~Session(); + void handle_z(Package &package, Z_APDU *apdu); + Z_APDU *run_sparql(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + const char *uri); + bool m_in_use; + private: + bool m_support_named_result_sets; + const SPARQL *m_sparql; + }; + } +} + +yf::SPARQL::SPARQL() : m_p(new Rep) +{ +} + +yf::SPARQL::~SPARQL() +{ +} + +void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, + const char *path) +{ + const xmlNode *ptr = xmlnode->children; + + for (; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) ptr->name, "db")) + { + yaz_sparql_t s = yaz_sparql_create(); + ConfPtr conf(new Conf); + conf->s = s; + + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "path")) + conf->db = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "uri")) + conf->uri = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); + } + xmlNode *p = ptr->children; + for (; p; p = p->next) + { + if (p->type != XML_ELEMENT_NODE) + continue; + std::string name = (const char *) p->name; + const struct _xmlAttr *attr; + for (attr = p->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "type")) + { + name.append("."); + name.append(mp::xml::get_text(attr->children)); + } + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); + } + std::string value = mp::xml::get_text(p); + if (yaz_sparql_add_pattern(s, name.c_str(), value.c_str())) + { + throw mp::filter::FilterException( + "Bad SPARQL config " + name); + } + } + if (!conf->uri.length()) + { + throw mp::filter::FilterException("Missing uri"); + } + if (!conf->db.length()) + { + throw mp::filter::FilterException("Missing path"); + } + db_conf.push_back(conf); + } + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in sparql filter"); + } + } +} + +yf::SPARQL::Conf::~Conf() +{ + yaz_sparql_destroy(s); +} + +yf::SPARQL::Session::Session(const SPARQL *sparql) : + m_in_use(true), + m_support_named_result_sets(false), + m_sparql(sparql) +{ +} + +yf::SPARQL::Session::~Session() +{ +} + +yf::SPARQL::SessionPtr yf::SPARQL::get_session(Package & package, + Z_APDU **apdu) const +{ + SessionPtr ptr0; + + Z_GDU *gdu = package.request().get(); + + boost::mutex::scoped_lock lock(m_p->m_mutex); + + std::map::iterator it; + + if (gdu && gdu->which == Z_GDU_Z3950) + *apdu = gdu->u.z3950; + else + *apdu = 0; + + while (true) + { + it = m_p->m_clients.find(package.session()); + if (it == m_p->m_clients.end()) + break; + if (!it->second->m_in_use) + { + it->second->m_in_use = true; + return it->second; + } + m_p->m_cond_session_ready.wait(lock); + } + if (!*apdu) + return ptr0; + + // new Z39.50 session .. + SessionPtr p(new Session(this)); + m_p->m_clients[package.session()] = p; + return p; +} + +void yf::SPARQL::release_session(Package &package) const +{ + boost::mutex::scoped_lock lock(m_p->m_mutex); + std::map::iterator it; + + it = m_p->m_clients.find(package.session()); + if (it != m_p->m_clients.end()) + { + it->second->m_in_use = false; + + if (package.session().is_closed()) + m_p->m_clients.erase(it); + m_p->m_cond_session_ready.notify_all(); + } +} + +Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + const char *uri) +{ + Package http_package(package.session(), package.origin()); + + http_package.copy_filter(package); + Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1); + + z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, + "Content-Type", "application/x-www-form-urlencoded"); + const char *names[2]; + names[0] = "query"; + names[1] = 0; + const char *values[1]; + values[0] = sparql_query; + char *path = 0; + yaz_array_to_uri(&path, odr, (char **) names, (char **) values); + + gdu->u.HTTP_Request->content_buf = path; + gdu->u.HTTP_Request->content_len = strlen(path); + + + yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query); + + http_package.request() = gdu; + http_package.move(); + + Z_GDU *gdu_resp = http_package.response().get(); + if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response) + { + Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; + } + else + { + yaz_log(YLOG_LOG, "sparql: no HTTP response"); + } + Z_APDU *apdu_res = odr.create_searchResponse(apdu_req, 0, 0); + return apdu_res; +} + +void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) +{ + mp::odr odr; + Z_APDU *apdu_res = 0; + if (apdu_req->which == Z_APDU_initRequest) + { + apdu_res = odr.create_initResponse(apdu_req, 0, 0); + Z_InitRequest *req = apdu_req->u.initRequest; + Z_InitResponse *resp = apdu_res->u.initResponse; + + resp->implementationName = odr_strdup(odr, "sparql"); + if (ODR_MASK_GET(req->options, Z_Options_namedResultSets)) + m_support_named_result_sets = true; + int i; + static const int masks[] = { + Z_Options_search, Z_Options_present, + Z_Options_namedResultSets, -1 + }; + for (i = 0; masks[i] != -1; i++) + if (ODR_MASK_GET(req->options, masks[i])) + ODR_MASK_SET(resp->options, masks[i]); + static const int versions[] = { + Z_ProtocolVersion_1, + Z_ProtocolVersion_2, + Z_ProtocolVersion_3, + -1 + }; + for (i = 0; versions[i] != -1; i++) + if (ODR_MASK_GET(req->protocolVersion, versions[i])) + ODR_MASK_SET(resp->protocolVersion, versions[i]); + else + break; + *resp->preferredMessageSize = *req->preferredMessageSize; + *resp->maximumRecordSize = *req->maximumRecordSize; + } + else if (apdu_req->which == Z_APDU_close) + { + apdu_res = odr.create_close(apdu_req, + Z_Close_finished, 0); + package.session().close(); + } + else if (apdu_req->which == Z_APDU_searchRequest) + { + Z_SearchRequest *req = apdu_req->u.searchRequest; + + if (req->query->which != Z_Query_type_1) + { + apdu_res = odr.create_searchResponse( + apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0); + } + else if (req->num_databaseNames != 1) + { + apdu_res = odr.create_searchResponse( + apdu_req, + YAZ_BIB1_ACCESS_TO_SPECIFIED_DATABASE_DENIED, 0); + } + else + { + std::string db = req->databaseNames[0]; + std::list::const_iterator it; + + it = m_sparql->db_conf.begin(); + for (; it != m_sparql->db_conf.end(); it++) + if (yaz_match_glob((*it)->db.c_str(), db.c_str())) + break; + if (it == m_sparql->db_conf.end()) + { + apdu_res = odr.create_searchResponse( + apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str()); + } + else + { + WRBUF addinfo_wr = wrbuf_alloc(); + WRBUF sparql_wr = wrbuf_alloc(); + int error = + yaz_sparql_from_rpn_wrbuf((*it)->s, + addinfo_wr, sparql_wr, + req->query->u.type_1); + if (error) + { + apdu_res = odr.create_searchResponse( + apdu_req, error, + wrbuf_len(addinfo_wr) ? + wrbuf_cstr(addinfo_wr) : 0); + } + else + { + apdu_res = run_sparql(package, apdu_req, odr, + wrbuf_cstr(sparql_wr), + (*it)->uri.c_str()); + } + wrbuf_destroy(addinfo_wr); + wrbuf_destroy(sparql_wr); + } + } + } + else + { + apdu_res = odr.create_close(apdu_req, + Z_Close_protocolError, + "sparql: unhandled APDU"); + package.session().close(); + } + + assert(apdu_res); + package.response() = apdu_res; +} + +void yf::SPARQL::process(mp::Package &package) const +{ + Z_APDU *apdu; + SessionPtr p = get_session(package, &apdu); + if (p && apdu) + { + p->handle_z(package, apdu); + } + else + package.move(); + release_session(package); +} + +static mp::filter::Base* filter_creator() +{ + return new mp::filter::SPARQL; +} + +extern "C" { + struct metaproxy_1_filter_struct metaproxy_1_filter_sparql = { + 0, + "sparql", + filter_creator + }; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/filter_sparql.hpp b/src/filter_sparql.hpp new file mode 100644 index 0000000..7cc16dc --- /dev/null +++ b/src/filter_sparql.hpp @@ -0,0 +1,38 @@ +/* This file is part of Metaproxy. + Copyright (C) Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +// Filter that does nothing. Use as template for new filters +#ifndef FILTER_SPARQL_HPP +#define FILTER_SPARQL_HPP + +#include + +extern "C" { + extern struct metaproxy_1_filter_struct metaproxy_1_filter_sparql; +} + +#endif +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/sparql.c b/src/sparql.c new file mode 100644 index 0000000..e551ead --- /dev/null +++ b/src/sparql.c @@ -0,0 +1,414 @@ +/** + * \file sparql.c + * \brief SPARQL + */ + +#include +#include +#include +#include "sparql.h" + +struct sparql_entry { + char *pattern; + char *value; + struct sparql_entry *next; +}; + +struct yaz_sparql_s { + NMEM nmem; + struct sparql_entry *conf; + struct sparql_entry **last; +}; + +yaz_sparql_t yaz_sparql_create(void) +{ + NMEM nmem = nmem_create(); + yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s); + + s->nmem = nmem; + s->conf = 0; + s->last = &s->conf; + return s; +} + +void yaz_sparql_destroy(yaz_sparql_t s) +{ + if (s) + nmem_destroy(s->nmem); +} + +int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern, + const char *value) +{ + struct sparql_entry *e; + assert(s); + + e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e)); + e->pattern = nmem_strdup(s->nmem, pattern); + e->value = nmem_strdup(s->nmem, value); + e->next = 0; + *s->last = e; + s->last = &e->next; + return 0; +} + +int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w, + Z_RPNQuery *q) +{ + return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q); +} + +static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type) +{ + int j; + for (j = 0; j < attributes->num_attributes; j++) + { + Z_AttributeElement *ae = attributes->attributes[j]; + if (*ae->attributeType == type) + { + if (ae->which == Z_AttributeValue_numeric) + return *ae->value.numeric; + } + } + return 0; +} + +static const char *lookup_attr_string(Z_AttributeList *attributes, int type) +{ + int j; + for (j = 0; j < attributes->num_attributes; j++) + { + Z_AttributeElement *ae = attributes->attributes[j]; + if (*ae->attributeType == type) + { + if (ae->which == Z_AttributeValue_complex) + { + Z_ComplexAttribute *ca = ae->value.complex; + int i; + for (i = 0; i < ca->num_list; i++) + { + Z_StringOrNumeric *son = ca->list[i]; + if (son->which == Z_StringOrNumeric_string) + return son->u.string; + } + } + } + } + return 0; +} + +static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars, + Z_AttributesPlusTerm *q, int indent, int *var_no) +{ + Z_Term *term = q->term; + Odr_int v = lookup_attr_numeric(q->attributes, 1); + struct sparql_entry *e = 0; + const char *cp; + const char *use_var = 0; + int i; + + wrbuf_puts(res, " "); + for (i = 0; i < indent; i++) + wrbuf_puts(res, " "); + if (v) + { + for (e = s->conf; e; e = e->next) + { + if (!strncmp(e->pattern, "index.", 6)) + { + char *end = 0; + Odr_int w = odr_strtol(e->pattern + 6, &end, 10); + + if (end && *end == '\0' && v == w) + break; + } + } + if (!e) + { + wrbuf_printf(addinfo, ODR_INT_PRINTF, v); + return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; + } + } + else + { + const char *index_name = lookup_attr_string(q->attributes, 1); + if (!index_name) + index_name = "any"; + for (e = s->conf; e; e = e->next) + { + if (!strncmp(e->pattern, "index.", 6)) + { + if (!strcmp(e->pattern + 6, index_name)) + break; + } + } + if (!e) + { + wrbuf_puts(addinfo, index_name); + return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; + } + } + assert(e); + wrbuf_rewind(addinfo); + + for (cp = e->value; *cp; cp++) + { + if (strchr(" \t\r\n\f", *cp) && !use_var) + { + use_var = e->value; + if (strchr("$?", e->value[0])) + { + wrbuf_write(vars, e->value + 1, cp - e->value - 1); + wrbuf_puts(vars, " "); + } + } + if (*cp == '%') + { + switch (*++cp) + { + case 's': + wrbuf_puts(addinfo, "\""); + switch (term->which) + { + case Z_Term_general: + wrbuf_json_write(addinfo, + term->u.general->buf, term->u.general->len); + break; + case Z_Term_numeric: + wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric); + break; + case Z_Term_characterString: + wrbuf_json_puts(addinfo, term->u.characterString); + break; + } + wrbuf_puts(addinfo, "\""); + break; + case 'd': + switch (term->which) + { + case Z_Term_general: + wrbuf_write(addinfo, + term->u.general->buf, term->u.general->len); + break; + case Z_Term_numeric: + wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric); + break; + case Z_Term_characterString: + wrbuf_puts(addinfo, term->u.characterString); + break; + } + break; + case 'v': + wrbuf_printf(addinfo, "?v%d", *var_no); + break; + case '%': + wrbuf_putc(addinfo, '%'); + break; + } + } + else + wrbuf_putc(addinfo, *cp); + } + wrbuf_puts(res, wrbuf_cstr(addinfo)); + (*var_no)++; + return 0; +} + + +static int rpn_structure(yaz_sparql_t s, WRBUF addinfo, + WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent, + int *var_no) +{ + int i; + if (q->which == Z_RPNStructure_complex) + { + int r; + Z_Complex *c = q->u.complex; + Z_Operator *op = c->roperator; + if (op->which == Z_Operator_and) + { + r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no); + if (r) + return r; + wrbuf_puts(res, " .\n"); + return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no); + } + else if (op->which == Z_Operator_or) + { + for (i = 0; i < indent; i++) + wrbuf_puts(res, " "); + wrbuf_puts(res, " {\n"); + r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no); + if (r) + return r; + wrbuf_puts(res, "\n"); + for (i = 0; i < indent; i++) + wrbuf_puts(res, " "); + wrbuf_puts(res, " } UNION {\n"); + r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no); + wrbuf_puts(res, "\n"); + for (i = 0; i < indent; i++) + wrbuf_puts(res, " "); + wrbuf_puts(res, " }"); + return r; + } + else + { + return YAZ_BIB1_OPERATOR_UNSUPP; + } + } + else + { + Z_Operand *op = q->u.simple; + if (op->which == Z_Operand_APT) + return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent, + var_no); + else + return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM; + } + return 0; +} + +int yaz_sparql_from_rpn_stream(yaz_sparql_t s, + WRBUF addinfo, + void (*pr)(const char *buf, + void *client_data), + void *client_data, + Z_RPNQuery *q) +{ + struct sparql_entry *e; + yaz_tok_cfg_t cfg = yaz_tok_cfg_create(); + int r = 0, errors = 0; + + for (e = s->conf; e; e = e->next) + { + if (!strcmp(e->pattern, "prefix")) + { + yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value); + int no = 0; + + pr("PREFIX", client_data); + while (1) + { + const char *tok_str; + int token = yaz_tok_move(p); + if (token != YAZ_TOK_STRING) + break; + pr(" ", client_data); + + tok_str = yaz_tok_parse_string(p); + if (tok_str[0]) + { + if (no > 0 && tok_str[0] != '<') + pr("<", client_data); + pr(tok_str, client_data); + if (no > 0 && tok_str[strlen(tok_str)-1] != '>') + pr(">", client_data); + } + no++; + } + pr("\n", client_data); + yaz_tok_parse_destroy(p); + } + else if (!strcmp(e->pattern, "criteria")) + { + ; + } + else if (!strcmp(e->pattern, "criteria.optional")) + { + ; + } + else if (!strncmp(e->pattern, "index.", 6)) + { + ; + } + else if (!strncmp(e->pattern, "field", 5)) + { + ; + } + else + { + errors++; + } + } + pr("\n", client_data); + pr("SELECT", client_data); + for (e = s->conf; e; e = e->next) + { + if (!strncmp(e->pattern, "field", 5)) + { + pr(" ", client_data); + pr(e->value, client_data); + } + } + pr("\n", client_data); + pr("WHERE {\n", client_data); + for (e = s->conf; e; e = e->next) + { + if (!strcmp(e->pattern, "criteria")) + { + pr(" ", client_data); + pr(e->value, client_data); + pr(" .\n", client_data); + } + } + if (!errors) + { + WRBUF res = wrbuf_alloc(); + WRBUF vars = wrbuf_alloc(); + int var_no = 0; + r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no); + if (r == 0) + { + WRBUF t_var = wrbuf_alloc(); + for (e = s->conf; e; e = e->next) + { + if (!strcmp(e->pattern, "criteria.optional")) + { + int optional = 1; + size_t i = strlen(e->value), j; + + while (i > 0 && strchr(" \t\r\n\f", e->value[i-1])) + --i; + j = i; + while (i > 0 && !strchr("$?", e->value[i-1])) + --i; + if (i > 0 && j > i) + { + wrbuf_rewind(t_var); + wrbuf_write(t_var, e->value + i, j - i); + wrbuf_puts(t_var, " "); + if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var))) + optional = 0; + } + + pr(" ", client_data); + if (optional) + pr("OPTIONAL { ", client_data); + pr(e->value, client_data); + if (optional) + pr(" }", client_data); + pr(" .\n", client_data); + } + } + pr(wrbuf_cstr(res), client_data); + wrbuf_destroy(t_var); + } + wrbuf_destroy(res); + wrbuf_destroy(vars); + } + pr("\n}\n", client_data); + yaz_tok_cfg_destroy(cfg); + + return errors ? -1 : r; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/sparql.h b/src/sparql.h new file mode 100644 index 0000000..257b3e3 --- /dev/null +++ b/src/sparql.h @@ -0,0 +1,46 @@ +#ifndef SPARQL_H_INCLUDED +#define SPARQL_H_INCLUDED + +#include +#include +#include + +YAZ_BEGIN_CDECL + +typedef struct yaz_sparql_s *yaz_sparql_t; + +YAZ_EXPORT +yaz_sparql_t yaz_sparql_create(void); + +YAZ_EXPORT +void yaz_sparql_destroy(yaz_sparql_t s); + +YAZ_EXPORT +int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern, + const char *value); + +YAZ_EXPORT +int yaz_sparql_from_rpn_stream(yaz_sparql_t s, + WRBUF addinfo, + void (*pr)(const char *buf, + void *client_data), + void *client_data, + Z_RPNQuery *q); + +YAZ_EXPORT +int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w, + Z_RPNQuery *q); + +YAZ_END_CDECL + +#endif +/* CQL_H_INCLUDED */ +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/test_sparql.c b/src/test_sparql.c new file mode 100644 index 0000000..3590f9b --- /dev/null +++ b/src/test_sparql.c @@ -0,0 +1,451 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) Index Data + * See the file LICENSE for details. + */ +#if HAVE_CONFIG_H +#include +#endif + +#include +#include "sparql.h" +#include +#include +#include + +static int test_query(yaz_sparql_t s, const char *pqf, const char *expect) +{ + YAZ_PQF_Parser parser = yaz_pqf_create(); + ODR odr = odr_createmem(ODR_ENCODE); + Z_RPNQuery *rpn = yaz_pqf_parse(parser, odr, pqf); + int ret = 0; + WRBUF addinfo = wrbuf_alloc(); + WRBUF w = wrbuf_alloc(); + + if (rpn) + { + int r = yaz_sparql_from_rpn_wrbuf(s, addinfo, w, rpn); + if (expect) + { + if (!r) + { + if (!strcmp(expect, wrbuf_cstr(w))) + ret = 1; + else + { + yaz_log(YLOG_WARN, "test_sparql: pqf=%s", pqf); + yaz_log(YLOG_WARN, " expect: %s", expect); + yaz_log(YLOG_WARN, " got: %s", wrbuf_cstr(w)); + } + } + else + { + yaz_log(YLOG_WARN, "test_sparql: pqf=%s", pqf); + yaz_log(YLOG_WARN, " expect: %s", expect); + yaz_log(YLOG_WARN, " got error: %d:%s", r, wrbuf_cstr(addinfo)); + } + } + else + { + if (r) + ret = 1; + else + { + yaz_log(YLOG_WARN, "test_sparql: pqf=%s", pqf); + yaz_log(YLOG_WARN, " expect error"); + yaz_log(YLOG_WARN, " got: %s", wrbuf_cstr(w)); + } + } + } + wrbuf_destroy(w); + wrbuf_destroy(addinfo); + odr_destroy(odr); + yaz_pqf_destroy(parser); + return ret; +} + +static void tst1(void) +{ + yaz_sparql_t s = yaz_sparql_create(); + + yaz_sparql_add_pattern(s, "prefix", + "rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns"); + yaz_sparql_add_pattern(s, "prefix", + "bf: "); + yaz_sparql_add_pattern(s, "prefix", + "gs: http://gs.com/panorama/domain-model"); + yaz_sparql_add_pattern(s, "field.title", "?title"); + yaz_sparql_add_pattern(s, "field.author", "?author"); + yaz_sparql_add_pattern(s, "field.description", "?description"); + yaz_sparql_add_pattern(s, "field.instanceTitle", "?ititle"); + yaz_sparql_add_pattern(s, "criteria", "?work a bf:Work"); + yaz_sparql_add_pattern(s, "criteria", "?work bf:workTitle/bf:titleValue ?title"); + yaz_sparql_add_pattern(s, "criteria", "?work bf:creator/bf:label ?author"); + yaz_sparql_add_pattern(s, "criteria", "?work bf:note ?description"); + yaz_sparql_add_pattern(s, "criteria", "?inst bf:instanceOf ?work"); + yaz_sparql_add_pattern(s, "criteria", "?inst bf:instanceTitle/bf:titleValue ?ititle"); + yaz_sparql_add_pattern(s, "criteria.optional", "?inst bf:heldBy ?lib"); + + yaz_sparql_add_pattern(s, "index.bf.title", + "?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, %s))"); + yaz_sparql_add_pattern(s, "index.bf.creator", + "?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, %s))"); + yaz_sparql_add_pattern(s, "index.bf.authorityCreator", + "?work bf:author %s"); + yaz_sparql_add_pattern(s, "index.bf.type", + "?inst rdf:type %s"); + yaz_sparql_add_pattern(s, "index.bf.format", + "?inst bf:format ?o5 FILTER(contains(?o5, %s))"); + yaz_sparql_add_pattern(s, "index.bf.nearby", "?lib gs:nearby (%d)"); + yaz_sparql_add_pattern(s, "index.bf.baseTitle", + "?work bf:derivativeOf/bf:workTitle/bf:titleValue " + "?o6 FILTER(contains(?o6, %s))"); + yaz_sparql_add_pattern(s, "index.bf.baseCreator", + "?work bf:derivativeOf/bf:creator/bf:label " + "?o7 FILTER(contains(?o7, %s))"); + yaz_sparql_add_pattern(s, "index.bf.targetAudience", + "?work bf:targetAudience %s"); + yaz_sparql_add_pattern(s, "index.bf.isbn", "?inst bf:ISBN %s"); + YAZ_CHECK(test_query( + s, "@attr 1=bf.title computer", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"computer\"))\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@attr 1=bf.creator london", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, \"london\"))\n" + "}\n")); + + + YAZ_CHECK(test_query( + s, "@and @attr 1=bf.creator london @attr 1=bf.title computer", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, \"london\")) .\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"computer\"))\n" + "}\n")); + + YAZ_CHECK(test_query( + s, "@or @attr 1=bf.creator london @attr 1=bf.title computer", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " {\n" + " ?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, \"london\"))\n" + " } UNION {\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"computer\"))\n" + " }\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@or @or @attr 1=bf.creator a @attr 1=bf.title b @attr 1=bf.title c", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " {\n" + " {\n" + " ?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, \"a\"))\n" + " } UNION {\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"b\"))\n" + " }\n" + " } UNION {\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"c\"))\n" + " }\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@or @and @attr 1=bf.creator a @attr 1=bf.title b @attr 1=bf.title c", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " {\n" + " ?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, \"a\")) .\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"b\"))\n" + " } UNION {\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"c\"))\n" + " }\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@and @and @attr 1=bf.creator a @attr 1=bf.title b @attr 1=bf.title c", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?work bf:creator/bf:label ?o2 " + "FILTER(contains(?o2, \"a\")) .\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"b\")) .\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"c\"))\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@and @attr 1=bf.title \"Phantom Tollbooth\" " + "@attr 1=bf.nearby \"40.1583 83.0742 30\"", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " ?inst bf:heldBy ?lib .\n" + " ?work bf:workTitle/bf:titleValue ?o1 " + "FILTER(contains(?o1, \"Phantom Tollbooth\")) .\n" + " ?lib gs:nearby (40.1583 83.0742 30)\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@attr 1=bf.isbn 9780316154697", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?inst bf:ISBN \"9780316154697\"\n" + "}\n" + )); + + + yaz_sparql_destroy(s); +} + +static void tst2(void) +{ + yaz_sparql_t s = yaz_sparql_create(); + + yaz_sparql_add_pattern(s, "prefix", + "rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns"); + yaz_sparql_add_pattern(s, "prefix", + "bf: "); + yaz_sparql_add_pattern(s, "prefix", + "gs: http://gs.com/panorama/domain-model"); + yaz_sparql_add_pattern(s, "field.title", "?title"); + yaz_sparql_add_pattern(s, "field.author", "?author"); + yaz_sparql_add_pattern(s, "field.description", "?description"); + yaz_sparql_add_pattern(s, "field.instanceTitle", "?ititle"); + yaz_sparql_add_pattern(s, "criteria", "?work a bf:Work"); + yaz_sparql_add_pattern(s, "criteria", "?work bf:workTitle/bf:titleValue ?title"); + yaz_sparql_add_pattern(s, "criteria", "?work bf:creator/bf:label ?author"); + yaz_sparql_add_pattern(s, "criteria", "?work bf:note ?description"); + yaz_sparql_add_pattern(s, "criteria", "?inst bf:instanceOf ?work"); + yaz_sparql_add_pattern(s, "criteria", "?inst bf:instanceTitle/bf:titleValue ?ititle"); + yaz_sparql_add_pattern(s, "criteria.optional", "?inst bf:heldBy ?lib"); + + yaz_sparql_add_pattern(s, "index.bf.title", + "?work bf:workTitle/bf:titleValue %v " + "FILTER(contains(%v, %s))"); + yaz_sparql_add_pattern(s, "index.bf.creator", + "?work bf:creator/bf:label %v " + "FILTER(contains(%v, %s))"); + yaz_sparql_add_pattern(s, "index.bf.authorityCreator", + "?work bf:author %s"); + yaz_sparql_add_pattern(s, "index.bf.type", "?inst rdf:type %s"); + yaz_sparql_add_pattern(s, "index.bf.format", + "?inst bf:format %v FILTER(contains(%v, %s))"); + yaz_sparql_add_pattern(s, "index.bf.nearby", "?lib gs:nearby (%d)"); + yaz_sparql_add_pattern(s, "index.bf.baseTitle", + "?work bf:derivativeOf/bf:workTitle/bf:titleValue " + "%v FILTER(contains(%v, %s))"); + yaz_sparql_add_pattern(s, "index.bf.baseCreator", + "?work bf:derivativeOf/bf:creator/bf:label " + "%v FILTER(contains(%v, %s))"); + yaz_sparql_add_pattern(s, "index.bf.targetAudience", + "?work bf:targetAudience %s"); + yaz_sparql_add_pattern(s, "index.bf.isbn", "?inst bf:ISBN %s"); + + YAZ_CHECK(test_query( + s, "@attr 1=bf.title computer", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?work bf:workTitle/bf:titleValue ?v0 " + "FILTER(contains(?v0, \"computer\"))\n" + "}\n" + )); + + YAZ_CHECK(test_query( + s, "@attr 1=bf.creator london", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " ?work bf:creator/bf:label ?v0 " + "FILTER(contains(?v0, \"london\"))\n" + "}\n")); + + YAZ_CHECK(test_query( + s, "@or @and @attr 1=bf.creator a @attr 1=bf.title b @attr 1=bf.title c", + "PREFIX rdf: \n" + "PREFIX bf: \n" + "PREFIX gs: \n" + "\n" + "SELECT ?title ?author ?description ?ititle\n" + "WHERE {\n" + " ?work a bf:Work .\n" + " ?work bf:workTitle/bf:titleValue ?title .\n" + " ?work bf:creator/bf:label ?author .\n" + " ?work bf:note ?description .\n" + " ?inst bf:instanceOf ?work .\n" + " ?inst bf:instanceTitle/bf:titleValue ?ititle .\n" + " OPTIONAL { ?inst bf:heldBy ?lib } .\n" + " {\n" + " ?work bf:creator/bf:label ?v0 " + "FILTER(contains(?v0, \"a\")) .\n" + " ?work bf:workTitle/bf:titleValue ?v1 " + "FILTER(contains(?v1, \"b\"))\n" + " } UNION {\n" + " ?work bf:workTitle/bf:titleValue ?v2 " + "FILTER(contains(?v2, \"c\"))\n" + " }\n" + "}\n" + )); + + yaz_sparql_destroy(s); +} + +int main(int argc, char **argv) +{ + YAZ_CHECK_INIT(argc, argv); + YAZ_CHECK_LOG(); + tst1(); + tst2(); + YAZ_CHECK_TERM; +} +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + -- 1.7.10.4