+++ /dev/null
-/* $Id: csvread.c,v 1.5 2006-05-10 08:13:28 adam Exp $
- Copyright (C) 1995-2005
- Index Data ApS
-
-This file is part of the Zebra server.
-
-Zebra is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
-*/
-
-
-
-#include <yaz/log.h>
-#include <yaz/nmem.h>
-#include <yaz/yaz-util.h>
-
-/* #include <d1_absyn.h> */
-#include <idzebra/data1.h>
-#include <idzebra/recgrs.h>
-
-/* #include <assert.h> */
-#include <ctype.h>
-
-/*
-struct csv_getc_info {
- char *buf;
- int buf_size;
- int size;
- int off;
- off_t moffset;
- void *fh;
- int (*readf)(void *, char *, size_t);
- WRBUF wrbuf;
-};
-*/
-
-struct csv_t {
- NMEM nmem;
- int buf_size;
- char *buf;
- int name_size;
- int value_size;
- char *value;
- char field_char;
- char record_char;
- char string_char;
- char *root_element;
- int field_line;
- int lower_case;
- int max_nr_fields;
- int nr_fields;
- /* char *field_names; */
- char **field_name;
-};
-
-
-static ZEBRA_RES grs_config_csv(void *clientData, Res res, const char *args)
-{
- int i;
- struct csv_t *csvp = (struct csv_t*) clientData;
-
- yaz_log (YLOG_LOG, "Called CSV filter grs_config_csv");
- yaz_log (YLOG_LOG, "'%s'", args);
-
- csvp->buf_size = 64;
- csvp->buf = nmem_malloc(csvp->nmem, csvp->buf_size);
- csvp->name_size = 256;
- csvp->value_size = 4096;
- csvp->value = nmem_malloc(csvp->nmem, csvp->value_size);
-
- csvp->field_char = '|';
- csvp->record_char = '\n';
- csvp->string_char = 0;
- csvp->root_element = nmem_strdup(csvp->nmem, "csv");
- csvp->field_line = 1;
- csvp->lower_case = 1;
- csvp->max_nr_fields = 512;
- csvp->nr_fields = 0;
- /* csvp->field_names = 0; */ /*nmem_strdup(csvp->nmem, "a|b|c|d|e");*/
-
- csvp->field_name
- = nmem_malloc(csvp->nmem,
- sizeof(*(csvp->field_name)) * csvp->max_nr_fields);
- for (i = 0; i < csvp->max_nr_fields; i++){
- csvp->field_name[i] = 0;
- }
-
- /* know field names from config file */
- /*if (strlen(csvp->field_names))
- yaz_log (YLOG_LOG, "CSV filter grs_config_csv field names");
- */
-
- yaz_log (YLOG_LOG, "Ended CSV filter grs_config_csv");
- return ZEBRA_OK;
-}
-
-
-static data1_node *grs_read_csv (struct grs_read_info *gri)
-{
- data1_node *root_node = 0;
- data1_node *node = 0;
- struct csv_t *csvp = (struct csv_t *)gri->clientData;
- int field_nr = 0;
- int end_of_record = 0;
- int read_header = 0;
- int read_bytes = 0;
- char *cb = csvp->buf;
- char *cv = csvp->value;
-
- yaz_log (YLOG_LOG, "Called CSV filter grs_read_csv");
-
- /* if on start of first line, read header line for dynamic configure */
- if(csvp->field_line && gri->offset == 0)
- read_header = 1;
-
- while (!end_of_record){
-
-#if 0
- /* configure grs.csv filter with first line in file containing field
- name information */
- if (read_header){
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv reading header line");
-
- /* create new memory for fieldname and value */
- if (old_nr_fields < csvp->nr_fields){
- yaz_log(YLOG_LOG,
- "CSV filter grs_read_csv name:'%d' ", csvp->nr_fields);
- old_nr_fields = csvp->nr_fields;
- csvp->field_name[csvp->nr_fields]
- = nmem_malloc(csvp->nmem, csvp->name_size);
- csvp->field_value[csvp->nr_fields]
- = nmem_malloc(csvp->nmem, csvp->value_size);
-
- /* read buf and copy values to field_name[] */
- read_bytes = (*gri->readf)(gri->fh, csvp->buf, csvp->buf_size);
- gri-> offset = (*gri->tellf)(gri->fh);
- /* yaz_log(YLOG_LOG, "CSV filter grs_read_csv offset:'%d' ", offset); */
- read_header = 0;
- }
- } else {
- /* read buf and copy values to field_value[] */
- read_bytes = (*gri->readf)(gri->fh, csvp->buf, csvp->buf_size);
- gri->offset = (*gri->tellf)(gri->fh);
- yaz_log(YLOG_LOG, "CSV filter grs_read_csv offset:'%d' ", offset);
- }
-
-#endif
-
-
- /* read new buffer from file */
- read_bytes = (*gri->readf)(gri->fh, csvp->buf, csvp->buf_size);
-
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv read_bytes %d", read_bytes);
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv csvp->buf %s", csvp->buf);
-
- gri->offset = (*gri->tellf)(gri->fh);
- yaz_log(YLOG_LOG, "CSV filter grs_read_csv gri->offset:'%d' ",
- (int)gri->offset);
-
- /* work on buffer */
- cb = csvp->buf;
- while ((cb - csvp->buf < read_bytes)
- && (cv - csvp->value < csvp->value_size)
- && !end_of_record){
-
- if (*cb == csvp->field_char){
- /* if field finished */
- *cv = '\0';
- if (read_header){
- /* read field names from header line */
- if (csvp->nr_fields < csvp->max_nr_fields){
- csvp->field_name[csvp->nr_fields]
- = nmem_strdup(csvp->nmem, csvp->value);
-
- csvp->nr_fields++;
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv field %d name '%s'",
- field_nr, csvp->value);
- } else {
- yaz_log (YLOG_WARN, "CSV filter grs_read_csv field %d name '%s' "
- "exceeds configured max number of fields %d",
- field_nr, csvp->value, csvp->max_nr_fields);
- }
- } else {
- /* process following value line fields */
- if (field_nr < csvp->nr_fields){
- /* less or qual fields number */
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv field %d %s: '%s'",
- field_nr, csvp->field_name[field_nr], csvp->value);
- } else {
- /* too many fields */
- yaz_log (YLOG_WARN, "CSV filter grs_read_csv field value %d %s "
- "exceeds dynamic configured number of fields %d",
- field_nr, csvp->value, csvp->nr_fields);
- }
-
- }
- /* advance buffer and proceed to next field */
- cb++;
- cv = csvp->value;
- field_nr++;
- } else if (*cb == csvp->record_char){
- /* if record finished */
- /* advance buffer and proceed to record */
- *cv = '\0';
- cb++;
- cv = csvp->value;
- field_nr = 0;
- if (read_header){
- read_header = 0;
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv header end");
- } else {
- end_of_record = 1;
- yaz_log (YLOG_LOG, "CSV filter grs_read_csv record end");
- }
- } else {
- /* just plain char to be stored in value, no special action at all */
- if (csvp->lower_case && read_header){
- *cv = tolower(*cb);
- } else {
- *cv = *cb;
- }
- cb++;
- cv++;
- }
- }
-
-
- /* if (gri->endf)
- (*gri->endf)(gri->fh, offset - 1); */
- }
-
- /* try to build GRS node and document */
-
- root_node = data1_mk_root(gri->dh, gri->mem, csvp->root_element);
- node = data1_mk_node2(gri->dh, gri->mem, DATA1N_data, root_node);
- node = data1_mk_tag(gri->dh, gri->mem, "pr_name_gn", 0, node);
- data1_mk_text_n(gri->dh, gri->mem, csvp->buf, read_bytes, node);
-
- if (!root_node){
- yaz_log (YLOG_WARN, "empty CSV record of type '%s' "
- "near file offset %d "
- "or missing abstract syntax file '%s.abs'",
- csvp->root_element, (int)gri->offset, csvp->root_element);
- return 0;
- }
-
- yaz_log (YLOG_LOG, "Ended CSV filter grs_read_csv");
- return root_node;
-}
-
-static void *grs_init_csv(Res res, RecType recType)
-{
- NMEM m = nmem_create();
- struct csv_t *csvp = (struct csv_t *) nmem_malloc(m, sizeof(*csvp));
- yaz_log (YLOG_LOG, "Called CSV filter grs_init_csv");
- csvp->nmem = m;
- yaz_log (YLOG_LOG, "Ended CSV filter grs_init_csv");
- return csvp;
-}
-
-static void grs_destroy_csv(void *clientData)
-{
- struct csv_t *csvp = (struct csv_t*) clientData;
-
- yaz_log (YLOG_LOG, "Called CSV filter grs_destroy_csv");
-
- nmem_destroy(csvp->nmem);
- clientData = 0;
-
- yaz_log (YLOG_LOG, "Ended CSV filter grs_destroy_csv");
-}
-
-static int grs_extract_csv(void *clientData, struct recExtractCtrl *ctrl)
-{
- int res;
- /* struct csv_t *csvp = (struct csv_t*) clientData; */
-
- yaz_log (YLOG_LOG, "Called CSV filter grs_extract_csv");
- yaz_log (YLOG_LOG, "recExtractCtr fh %d", (int)ctrl->fh);
- yaz_log (YLOG_LOG, "recExtractCtr offset %d", (int)ctrl->offset);
-
- res = zebra_grs_extract(clientData, ctrl, grs_read_csv);
-
- yaz_log (YLOG_LOG, "recExtractCtr fh %d", (int)ctrl->fh);
- yaz_log (YLOG_LOG, "recExtractCtr offset %d", (int)ctrl->offset);
- yaz_log (YLOG_LOG, "Ended CSV filter grs_extract_csv");
-
- return res;
-}
-
-static int grs_retrieve_csv(void *clientData, struct recRetrieveCtrl *ctrl)
-{
- int res;
- /* struct csv_t *csvp = (struct csv_t*) clientData; */
-
- yaz_log (YLOG_LOG, "Called CSV filter grs_retrieve_csv");
- res = zebra_grs_retrieve(clientData, ctrl, grs_read_csv);
- yaz_log (YLOG_LOG, "Ended CSV filter grs_retrieve_csv");
-
- return res;
-}
-
-static struct recType grs_type_csv =
-{
- 0,
- "grs.csv",
- grs_init_csv,
- grs_config_csv,
- grs_destroy_csv,
- grs_extract_csv,
- grs_retrieve_csv
-};
-
-RecType
-#ifdef IDZEBRA_STATIC_GRS_CSV
-idzebra_filter_grs_csv
-#else
-idzebra_filter
-#endif
-
-[] = {
- &grs_type_csv,
- 0,
-};
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
+++ /dev/null
-/* $Id: danbibr.c,v 1.11 2006-05-10 08:13:28 adam Exp $
- Copyright (C) 1995-2005
- Index Data ApS
-
-This file is part of the Zebra server.
-
-Zebra is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
-*/
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <yaz/log.h>
-
-#include <idzebra/recgrs.h>
-
-#include <yaz/xmalloc.h>
-
-#define READ_CHUNK 200
-
-struct danbibr_info {
- WRBUF rec_buf;
- char read_buf[READ_CHUNK+1]; /* space for \0 */
-};
-
-static void *init_danbib(Res res, RecType rt)
-{
- struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p));
-
- p->rec_buf = wrbuf_alloc();
- wrbuf_puts(p->rec_buf, "");
- return p;
-}
-
-static int read_rec(struct grs_read_info *p)
-{
- struct danbibr_info *info = p->clientData;
-
- wrbuf_rewind(info->rec_buf);
- while(1)
- {
- char *cp_split = 0;
- int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK);
- if (r <= 0)
- {
- if (wrbuf_len(info->rec_buf) > 0)
- return 1;
- else
- return 0;
- }
- info->read_buf[r] = '\0';
- wrbuf_puts(info->rec_buf, info->read_buf);
-
- cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$");
- if (cp_split)
- {
- cp_split++; /* now at $ */
- if (p->endf)
- (*p->endf)(p->fh, p->offset +
- (cp_split - wrbuf_buf(info->rec_buf)));
-
- cp_split[0] = '\0';
- return 1;
- }
- }
-}
-
-static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf)
-{
- data1_node *root = data1_mk_root(p->dh, p->mem, "danbib");
- data1_node *root_tag = data1_mk_tag(p->dh, p->mem, "danbib", 0, root);
- const char *cp = rec_buf;
-
- if (1) /* <text> all </text> */
- {
- data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root_tag);
- data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node);
- }
- while (*cp)
- {
- const char *start_tag = cp;
- const char *start_text;
- if (*cp == '\n')
- {
- cp++;
- continue;
- }
- else if (*cp == ' ') /* bad continuation */
- {
- while (*cp && *cp != '\n')
- cp++;
- }
- else if (*cp == '$') /* header */
- {
- int no = 1;
- cp++;
- start_text = cp;
- for(start_text = cp; *cp && *cp != '\n'; cp++)
- if (*cp == ':')
- {
- if (start_text != cp)
- {
- char elemstr[20];
- data1_node *hnode;
- sprintf(elemstr, "head%d", no);
-
- hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root_tag);
- data1_mk_text_n(p->dh, p->mem, start_text,
- cp - start_text, hnode);
- start_text = cp+1;
- }
- no++;
- }
- }
- else /* other */
- {
- while (*cp != ' ' && *cp && *cp != '\n')
- cp++;
- if (*cp == ' ')
- {
- data1_node *tag_node =
- data1_mk_tag_n(p->dh, p->mem,
- start_tag, cp - start_tag, 0, root_tag);
- cp++;
- start_text = cp;
- while (*cp != '\n' && *cp)
- {
- if (*cp == '*' && cp[1]) /* subfield */
- {
- data1_node *sub_tag_node;
- if (start_text != cp)
- data1_mk_text_n(p->dh, p->mem, start_text,
- cp-start_text, tag_node);
- cp++;
- sub_tag_node =
- data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node);
- cp++;
- start_text = cp;
- while (*cp)
- {
- if (*cp == '\n' && cp[1] == ' ')
- {
- cp++;
- if (start_text != cp)
- data1_mk_text_n(p->dh, p->mem, start_text,
- cp-start_text, sub_tag_node);
- while (*cp == ' ')
- cp++;
- start_text = cp;
- }
- else if (*cp == '\n')
- break;
- else if (*cp == '*')
- break;
- else
- cp++;
- }
- if (start_text != cp)
- data1_mk_text_n(p->dh, p->mem, start_text,
- cp-start_text, sub_tag_node);
- start_text = cp;
- }
- else
- cp++;
- }
- if (start_text != cp)
- data1_mk_text_n(p->dh, p->mem, start_text,
- cp-start_text, tag_node);
- }
- }
- }
- return root;
-}
-
-static data1_node *read_danbib (struct grs_read_info *p)
-{
- struct danbibr_info *info = p->clientData;
-
- if (read_rec(p))
- return mk_tree(p, wrbuf_buf(info->rec_buf));
- return 0;
-}
-
-static void destroy_danbib(void *clientData)
-{
- struct danbibr_info *p = (struct danbibr_info *) clientData;
-
- wrbuf_free(p->rec_buf, 1);
- xfree (p);
-}
-
-
-static int extract_danbib(void *clientData, struct recExtractCtrl *ctrl)
-{
- return zebra_grs_extract(clientData, ctrl, read_danbib);
-}
-
-static int retrieve_danbib(void *clientData, struct recRetrieveCtrl *ctrl)
-{
- return zebra_grs_retrieve(clientData, ctrl, read_danbib);
-}
-
-static struct recType danbib_type = {
- 0,
- "grs.danbib",
- init_danbib,
- 0,
- destroy_danbib,
- extract_danbib,
- retrieve_danbib,
-};
-
-RecType
-#ifdef IDZEBRA_STATIC_GRS_DANBIB
-idzebra_filter_grs_danbib
-#else
-idzebra_filter
-#endif
-
-[] = {
- &danbib_type,
- 0,
-};
-
-
-
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
+++ /dev/null
-/* $Id: xslt.c,v 1.27 2006-05-31 16:11:58 marc Exp $
- Copyright (C) 1995-2005
- Index Data ApS
-
-This file is part of the Zebra server.
-
-Zebra is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
-*/
-
-#include <stdio.h>
-#include <assert.h>
-#include <ctype.h>
-
-#include <yaz/diagbib1.h>
-#include <yaz/tpath.h>
-
-#include <libxml/xmlversion.h>
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#include <libxml/xmlIO.h>
-#include <libxml/xmlreader.h>
-#include <libxslt/transform.h>
-/* #include <libxslt/xsltutils.h> */
-
-#include <idzebra/util.h>
-#include <idzebra/recctrl.h>
-
-struct filter_xslt_schema {
- const char *name;
- const char *identifier;
- const char *stylesheet;
- struct filter_xslt_schema *next;
- const char *default_schema;
- const char *include_snippet;
- xsltStylesheetPtr stylesheet_xsp;
-};
-
-struct filter_xslt_info {
- xmlDocPtr doc;
- char *fname;
- char *full_name;
- const char *profile_path;
- const char *split_level;
- const char *split_path;
- ODR odr;
- struct filter_xslt_schema *schemas;
- xmlTextReaderPtr reader;
-};
-
-
-#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
-
-#define XML_STRCMP(a,b) strcmp((char*)a, b)
-#define XML_STRLEN(a) strlen((char*)a)
-
-static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
-
-static void set_param_xml(const char **params, const char *name,
- const char *value, ODR odr)
-{
- while (*params)
- params++;
- params[0] = name;
- params[1] = value;
- params[2] = 0;
-}
-
-static void set_param_str(const char **params, const char *name,
- const char *value, ODR odr)
-{
- char *quoted = odr_malloc(odr, 3 + strlen(value));
- sprintf(quoted, "'%s'", value);
- while (*params)
- params++;
- params[0] = name;
- params[1] = quoted;
- params[2] = 0;
-}
-
-static void set_param_int(const char **params, const char *name,
- zint value, ODR odr)
-{
- char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
- while (*params)
- params++;
- sprintf(quoted, "'" ZINT_FORMAT "'", value);
- params[0] = name;
- params[1] = quoted;
- params[2] = 0;
-}
-
-#define ENABLE_INPUT_CALLBACK 0
-
-#if ENABLE_INPUT_CALLBACK
-static int zebra_xmlInputMatchCallback (char const *filename)
-{
- yaz_log(YLOG_LOG, "match %s", filename);
- return 0;
-}
-
-static void * zebra_xmlInputOpenCallback (char const *filename)
-{
- return 0;
-}
-
-static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
-{
- return 0;
-}
-
-static int zebra_xmlInputCloseCallback (void * context)
-{
- return 0;
-}
-#endif
-
-static void *filter_init(Res res, RecType recType)
-{
- struct filter_xslt_info *tinfo
- = (struct filter_xslt_info *) xmalloc(sizeof(*tinfo));
- tinfo->reader = 0;
- tinfo->fname = 0;
- tinfo->full_name = 0;
- tinfo->profile_path = 0;
- tinfo->split_level = 0;
- tinfo->split_path = 0;
- tinfo->odr = odr_createmem(ODR_ENCODE);
- tinfo->doc = 0;
- tinfo->schemas = 0;
-
-#if ENABLE_INPUT_CALLBACK
- xmlRegisterDefaultInputCallbacks();
- xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
- zebra_xmlInputOpenCallback,
- zebra_xmlInputReadCallback,
- zebra_xmlInputCloseCallback);
-#endif
- return tinfo;
-}
-
-static int attr_content(struct _xmlAttr *attr, const char *name,
- const char **dst_content)
-{
- if (!XML_STRCMP(attr->name, name) && attr->children &&
- attr->children->type == XML_TEXT_NODE)
- {
- *dst_content = (const char *)(attr->children->content);
- return 1;
- }
- return 0;
-}
-
-static void destroy_schemas(struct filter_xslt_info *tinfo)
-{
- struct filter_xslt_schema *schema = tinfo->schemas;
- while (schema)
- {
- struct filter_xslt_schema *schema_next = schema->next;
- if (schema->stylesheet_xsp)
- xsltFreeStylesheet(schema->stylesheet_xsp);
- xfree(schema);
- schema = schema_next;
- }
- tinfo->schemas = 0;
- xfree(tinfo->fname);
- if (tinfo->doc)
- xmlFreeDoc(tinfo->doc);
- tinfo->doc = 0;
-}
-
-static ZEBRA_RES create_schemas(struct filter_xslt_info *tinfo,
- const char *fname)
-{
- char tmp_full_name[1024];
- xmlNodePtr ptr;
- tinfo->fname = xstrdup(fname);
-
- if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
- NULL, tmp_full_name))
- tinfo->full_name = xstrdup(tmp_full_name);
- else
- tinfo->full_name = xstrdup(tinfo->fname);
-
- yaz_log(YLOG_LOG, "xslt filter: loading config file %s", tinfo->full_name);
-
- tinfo->doc = xmlParseFile(tinfo->full_name);
- if (!tinfo->doc) {
- yaz_log(YLOG_WARN, "xslt filter: could not parse config file %s",
- tinfo->full_name);
- return ZEBRA_FAIL;
- }
-
- ptr = xmlDocGetRootElement(tinfo->doc);
- if (!ptr || ptr->type != XML_ELEMENT_NODE ||
- XML_STRCMP(ptr->name, "schemaInfo")){
- yaz_log(YLOG_WARN,
- "xslt filter: config file %s :"
- " expected root element <schemaInfo>",
- tinfo->full_name);
- return ZEBRA_FAIL;
- }
-
- for (ptr = ptr->children; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
- if (!XML_STRCMP(ptr->name, "schema"))
- {
- char tmp_xslt_full_name[1024];
- struct _xmlAttr *attr;
- struct filter_xslt_schema *schema = xmalloc(sizeof(*schema));
- schema->name = 0;
- schema->identifier = 0;
- schema->stylesheet = 0;
- schema->default_schema = 0;
- schema->next = tinfo->schemas;
- schema->stylesheet_xsp = 0;
- schema->include_snippet = 0;
- tinfo->schemas = schema;
- for (attr = ptr->properties; attr; attr = attr->next)
- {
- attr_content(attr, "identifier", &schema->identifier);
- attr_content(attr, "name", &schema->name);
- attr_content(attr, "stylesheet", &schema->stylesheet);
- attr_content(attr, "default", &schema->default_schema);
- attr_content(attr, "snippet", &schema->include_snippet);
- }
-
- if (schema->stylesheet){
- yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path,
- NULL, tmp_xslt_full_name);
- schema->stylesheet_xsp
- = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
- if (!schema->stylesheet_xsp)
- yaz_log(YLOG_WARN,
- "xslt filter: could not parse xslt stylesheet %s",
- tmp_xslt_full_name);
- }
-
- }
- else if (!XML_STRCMP(ptr->name, "split"))
- {
- struct _xmlAttr *attr;
- for (attr = ptr->properties; attr; attr = attr->next)
- {
- attr_content(attr, "level", &tinfo->split_level);
- attr_content(attr, "path", &tinfo->split_path);
- }
- }
- else
- {
- yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
- return ZEBRA_FAIL;
- }
- }
- return ZEBRA_OK;
-}
-
-static struct filter_xslt_schema *lookup_schema(struct filter_xslt_info *tinfo,
- const char *est)
-{
- struct filter_xslt_schema *schema;
- for (schema = tinfo->schemas; schema; schema = schema->next)
- {
- /* find requested schema */
- if (est)
- {
- if (schema->identifier && !strcmp(schema->identifier, est))
- return schema;
-
- if (schema->name && !strcmp(schema->name, est))
- return schema;
- }
- /* or return default schema if defined */
- else if (schema->default_schema)
- return schema;
- }
-
- /* return first schema if no default schema defined */
- if (tinfo->schemas)
- return tinfo->schemas;
-
- return 0;
-}
-
-static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
-{
- struct filter_xslt_info *tinfo = clientData;
- if (!args || !*args){
- yaz_log(YLOG_WARN, "xslt filter: need config file");
- return ZEBRA_FAIL;
- }
-
- if (tinfo->fname && !strcmp(args, tinfo->fname))
- return ZEBRA_OK;
-
- tinfo->profile_path
- /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */
- = res_get(res, "profilePath");
- yaz_log(YLOG_LOG, "xslt filter: profilePath %s", tinfo->profile_path);
-
- destroy_schemas(tinfo);
- create_schemas(tinfo, args);
- return ZEBRA_OK;
-}
-
-static void filter_destroy(void *clientData)
-{
- struct filter_xslt_info *tinfo = clientData;
- destroy_schemas(tinfo);
- if (tinfo->reader)
- xmlFreeTextReader(tinfo->reader);
- odr_destroy(tinfo->odr);
- xfree(tinfo);
-}
-
-static int ioread_ex(void *context, char *buffer, int len)
-{
- struct recExtractCtrl *p = context;
- return (*p->readf)(p->fh, buffer, len);
-}
-
-static int ioclose_ex(void *context)
-{
- return 0;
-}
-
-static void index_cdata(struct filter_xslt_info *tinfo, struct recExtractCtrl *ctrl,
- xmlNodePtr ptr, RecWord *recWord)
-{
- for(; ptr; ptr = ptr->next)
- {
- index_cdata(tinfo, ctrl, ptr->children, recWord);
- if (ptr->type != XML_TEXT_NODE)
- continue;
- recWord->term_buf = (const char *)ptr->content;
- recWord->term_len = XML_STRLEN(ptr->content);
- (*ctrl->tokenAdd)(recWord);
- }
-}
-
-static void index_node(struct filter_xslt_info *tinfo, struct recExtractCtrl *ctrl,
- xmlNodePtr ptr, RecWord *recWord)
-{
- for(; ptr; ptr = ptr->next)
- {
- index_node(tinfo, ctrl, ptr->children, recWord);
- if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
- XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
- continue;
- if (!XML_STRCMP(ptr->name, "index"))
- {
- const char *name_str = 0;
- const char *type_str = 0;
- const char *xpath_str = 0;
- struct _xmlAttr *attr;
- for (attr = ptr->properties; attr; attr = attr->next)
- {
- attr_content(attr, "name", &name_str);
- attr_content(attr, "xpath", &xpath_str);
- attr_content(attr, "type", &type_str);
- }
- if (name_str)
- {
- int prev_type = recWord->index_type; /* save default type */
-
- if (type_str && *type_str)
- recWord->index_type = *type_str; /* type was given */
- recWord->index_name = name_str;
- index_cdata(tinfo, ctrl, ptr->children, recWord);
-
- recWord->index_type = prev_type; /* restore it again */
- }
- }
- }
-}
-
-static void index_record(struct filter_xslt_info *tinfo,struct recExtractCtrl *ctrl,
- xmlNodePtr ptr, RecWord *recWord)
-{
- if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
- !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
- && !XML_STRCMP(ptr->name, "record"))
- {
- const char *type_str = "update";
- const char *id_str = 0;
- const char *rank_str = 0;
- struct _xmlAttr *attr;
- for (attr = ptr->properties; attr; attr = attr->next)
- {
- attr_content(attr, "type", &type_str);
- attr_content(attr, "id", &id_str);
- attr_content(attr, "rank", &rank_str);
- }
- if (id_str)
- sscanf(id_str, "%255s", ctrl->match_criteria);
- if (rank_str)
- {
- ctrl->staticrank = atoi(rank_str);
- yaz_log(YLOG_LOG, "rank=%d",ctrl->staticrank);
- }
- else
- yaz_log(YLOG_LOG, "no rank");
-
- ptr = ptr->children;
- }
- index_node(tinfo, ctrl, ptr, recWord);
-}
-
-static int extract_doc(struct filter_xslt_info *tinfo, struct recExtractCtrl *p,
- xmlDocPtr doc)
-{
- RecWord recWord;
- const char *params[10];
- xmlChar *buf_out;
- int len_out;
-
- struct filter_xslt_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
-
- params[0] = 0;
- set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
-
- (*p->init)(p, &recWord);
-
- if (schema && schema->stylesheet_xsp)
- {
- xmlNodePtr root_ptr;
- xmlDocPtr resDoc =
- xsltApplyStylesheet(schema->stylesheet_xsp,
- doc, params);
- if (p->flagShowRecords)
- {
- xmlDocDumpMemory(resDoc, &buf_out, &len_out);
- fwrite(buf_out, len_out, 1, stdout);
- xmlFree(buf_out);
- }
- root_ptr = xmlDocGetRootElement(resDoc);
- if (root_ptr)
- index_record(tinfo, p, root_ptr, &recWord);
- else
- {
- yaz_log(YLOG_WARN, "No root for index XML record."
- " split_level=%s stylesheet=%s",
- tinfo->split_level, schema->stylesheet);
- }
- xmlFreeDoc(resDoc);
- }
- xmlDocDumpMemory(doc, &buf_out, &len_out);
- if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout);
- (*p->setStoreData)(p, buf_out, len_out);
- xmlFree(buf_out);
-
- xmlFreeDoc(doc);
- return RECCTRL_EXTRACT_OK;
-}
-
-static int extract_split(struct filter_xslt_info *tinfo, struct recExtractCtrl *p)
-{
- int ret;
- int split_depth = 0;
- if (p->first_record)
- {
- if (tinfo->reader)
- xmlFreeTextReader(tinfo->reader);
- tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
- p /* I/O handler */,
- 0 /* URL */,
- 0 /* encoding */,
- XML_PARSE_XINCLUDE);
- }
- if (!tinfo->reader)
- return RECCTRL_EXTRACT_ERROR_GENERIC;
-
- if (tinfo->split_level)
- split_depth = atoi(tinfo->split_level);
- ret = xmlTextReaderRead(tinfo->reader);
- while (ret == 1) {
- int type = xmlTextReaderNodeType(tinfo->reader);
- int depth = xmlTextReaderDepth(tinfo->reader);
- if (split_depth == 0 ||
- (split_depth > 0 &&
- type == XML_READER_TYPE_ELEMENT && split_depth == depth))
- {
- xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
- xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
- xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
-
- xmlDocSetRootElement(doc, ptr2);
-
- return extract_doc(tinfo, p, doc);
- }
- ret = xmlTextReaderRead(tinfo->reader);
- }
- xmlFreeTextReader(tinfo->reader);
- tinfo->reader = 0;
- return RECCTRL_EXTRACT_EOF;
-}
-
-static int extract_full(struct filter_xslt_info *tinfo, struct recExtractCtrl *p)
-{
- if (p->first_record) /* only one record per stream */
- {
- xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
- 0 /* URL */,
- 0 /* encoding */,
- XML_PARSE_XINCLUDE);
- if (!doc)
- {
- return RECCTRL_EXTRACT_ERROR_GENERIC;
- }
- return extract_doc(tinfo, p, doc);
- }
- else
- return RECCTRL_EXTRACT_EOF;
-}
-
-static int filter_extract(void *clientData, struct recExtractCtrl *p)
-{
- struct filter_xslt_info *tinfo = clientData;
-
- odr_reset(tinfo->odr);
-
- if (tinfo->split_level == 0 && tinfo->split_path == 0)
- return extract_full(tinfo, p);
- else
- {
- return extract_split(tinfo, p);
- }
-}
-
-static int ioread_ret(void *context, char *buffer, int len)
-{
- struct recRetrieveCtrl *p = context;
- return (*p->readf)(p->fh, buffer, len);
-}
-
-static int ioclose_ret(void *context)
-{
- return 0;
-}
-
-
-static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
- int window_size)
-{
- const char *xml_doc_str;
- int ord = 0;
- WRBUF wrbuf = wrbuf_alloc();
- zebra_snippets *res =
- zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size);
- zebra_snippet_word *w = zebra_snippets_list(res);
-
- if (text_mode)
- wrbuf_printf(wrbuf, "\'");
- else
- wrbuf_printf(wrbuf, "<snippet xmlns='%s'>\n", zebra_xslt_ns);
- for (; w; w = w->next)
- {
- if (ord == 0)
- ord = w->ord;
- else if (ord != w->ord)
-
- break;
- if (text_mode)
- wrbuf_printf(wrbuf, "%s%s%s ",
- w->match ? "*" : "",
- w->term,
- w->match ? "*" : "");
- else
- {
- wrbuf_printf(wrbuf, " <term ord='%d' seqno='" ZINT_FORMAT "' %s>",
- w->ord, w->seqno,
- (w->match ? "match='1'" : ""));
- wrbuf_xmlputs(wrbuf, w->term);
- wrbuf_printf(wrbuf, "</term>\n");
- }
- }
- if (text_mode)
- wrbuf_printf(wrbuf, "\'");
- else
- wrbuf_printf(wrbuf, "</snippet>\n");
-
- xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
-
- zebra_snippets_destroy(res);
- wrbuf_free(wrbuf, 1);
- return xml_doc_str;
-}
-
-static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
-{
- const char *esn = 0;
- const char *params[32];
- struct filter_xslt_info *tinfo = clientData;
- xmlDocPtr resDoc;
- xmlDocPtr doc;
- struct filter_xslt_schema *schema;
- int window_size = -1;
-
- if (p->comp)
- {
- if (p->comp->which == Z_RecordComp_simple
- && p->comp->u.simple->which == Z_ElementSetNames_generic)
- {
- esn = p->comp->u.simple->u.generic;
- }
- else if (p->comp->which == Z_RecordComp_complex
- && p->comp->u.complex->generic->elementSpec
- && p->comp->u.complex->generic->elementSpec->which ==
- Z_ElementSpec_elementSetName)
- {
- esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
- }
- }
- schema = lookup_schema(tinfo, esn);
- if (!schema)
- {
- p->diagnostic =
- YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
- return 0;
- }
-
- if (schema->include_snippet)
- window_size = atoi(schema->include_snippet);
-
- params[0] = 0;
- set_param_int(params, "id", p->localno, p->odr);
- if (p->fname)
- set_param_str(params, "filename", p->fname, p->odr);
- if (p->staticrank >= 0)
- set_param_int(params, "rank", p->staticrank, p->odr);
-
- if (esn)
- set_param_str(params, "schema", esn, p->odr);
- else
- if (schema->name)
- set_param_str(params, "schema", schema->name, p->odr);
- else if (schema->identifier)
- set_param_str(params, "schema", schema->identifier, p->odr);
- else
- set_param_str(params, "schema", "", p->odr);
-
- if (p->score >= 0)
- set_param_int(params, "score", p->score, p->odr);
- set_param_int(params, "size", p->recordSize, p->odr);
-
- if (window_size >= 0)
- set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
- p->odr);
- doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
- 0 /* URL */,
- 0 /* encoding */,
- XML_PARSE_XINCLUDE);
- if (!doc)
- {
- p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
- return 0;
- }
-
- if (window_size >= 0)
- {
- xmlNodePtr node = xmlDocGetRootElement(doc);
- const char *snippet_str = snippet_doc(p, 0, window_size);
- xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str));
- xmlAddChild(node, xmlDocGetRootElement(snippet_doc));
- }
- if (!schema->stylesheet_xsp)
- resDoc = doc;
- else
- {
- resDoc = xsltApplyStylesheet(schema->stylesheet_xsp,
- doc, params);
- xmlFreeDoc(doc);
- }
- if (!resDoc)
- {
- p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
- }
- else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
- {
- xmlChar *buf_out;
- int len_out;
-
- xsltSaveResultToString(&buf_out, &len_out, resDoc,
- schema->stylesheet_xsp);
-
- p->output_format = VAL_TEXT_XML;
- p->rec_len = len_out;
- p->rec_buf = odr_malloc(p->odr, p->rec_len);
- memcpy(p->rec_buf, buf_out, p->rec_len);
-
- xmlFree(buf_out);
- }
- else if (p->output_format == VAL_SUTRS)
- {
- xmlChar *buf_out;
- int len_out;
-
- xsltSaveResultToString(&buf_out, &len_out, resDoc,
- schema->stylesheet_xsp);
-
- p->output_format = VAL_SUTRS;
- p->rec_len = len_out;
- p->rec_buf = odr_malloc(p->odr, p->rec_len);
- memcpy(p->rec_buf, buf_out, p->rec_len);
-
- xmlFree(buf_out);
- }
- else
- {
- p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
- }
- xmlFreeDoc(resDoc);
- return 0;
-}
-
-static struct recType filter_type = {
- 0,
- "xslt",
- filter_init,
- filter_config,
- filter_destroy,
- filter_extract,
- filter_retrieve
-};
-
-RecType
-#ifdef IDZEBRA_STATIC_XSLT
-idzebra_filter_xslt
-#else
-idzebra_filter
-#endif
-
-[] = {
- &filter_type,
- 0,
-};
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-