-/*
- * Copyright (c) 1995, the EUROPAGATE consortium (see below).
- *
- * The EUROPAGATE consortium members are:
- *
- * University College Dublin
- * Danmarks Teknologiske Videnscenter
- * An Chomhairle Leabharlanna
- * Consejo Superior de Investigaciones Cientificas
- *
- * Permission to use, copy, modify, distribute, and sell this software and
- * its documentation, in whole or in part, for any purpose, is hereby granted,
- * provided that:
- *
- * 1. This copyright and permission notice appear in all copies of the
- * software and its documentation. Notices of copyright or attribution
- * which appear at the beginning of any file must remain unchanged.
- *
- * 2. The names of EUROPAGATE or the project partners may not be used to
- * endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * 3. Users of this software (implementors and gateway operators) agree to
- * inform the EUROPAGATE consortium of their use of the software. This
- * information will be used to evaluate the EUROPAGATE project and the
- * software, and to plan further developments. The consortium may use
- * the information in later publications.
- *
- * 4. Users of this software agree to make their best efforts, when
- * documenting their use of the software, to acknowledge the EUROPAGATE
- * consortium, and the role played by the software in their work.
- *
- * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
- * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
- * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
- * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
- * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
- * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
- * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * USE OR PERFORMANCE OF THIS SOFTWARE.
- *
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2012 Index Data
+ * See the file LICENSE for details.
*/
-
/**
* \file cclfind.c
* \brief Implements parsing of a CCL FIND query.
* of lookahead in the handling of relational operations.. So
* it's not really pure.
*/
-
-
-/* CCL find (to rpn conversion)
- * Europagate, 1995
- *
- * $Id: cclfind.c,v 1.13 2007-04-30 19:55:40 adam Exp $
- *
- * Old Europagate log:
- *
- * Revision 1.16 1996/01/08 08:41:13 adam
- * Removed unused function.
- *
- * Revision 1.15 1995/07/20 08:14:34 adam
- * Qualifiers were observed too often. Instead tokens are treated as
- * qualifiers only when separated by comma.
- *
- * Revision 1.14 1995/05/16 09:39:26 adam
- * LICENSE.
- *
- * Revision 1.13 1995/04/17 09:31:42 adam
- * Improved handling of qualifiers. Aliases or reserved words.
- *
- * Revision 1.12 1995/03/20 15:27:43 adam
- * Minor changes.
- *
- * Revision 1.11 1995/02/23 08:31:59 adam
- * Changed header.
- *
- * Revision 1.9 1995/02/16 13:20:06 adam
- * Spell fix.
- *
- * Revision 1.8 1995/02/14 19:59:42 adam
- * Removed a syntax error.
- *
- * Revision 1.7 1995/02/14 19:55:10 adam
- * Header files ccl.h/cclp.h are gone! They have been merged an
- * moved to ../include/ccl.h.
- * Node kind(s) in ccl_rpn_node have changed names.
- *
- * Revision 1.6 1995/02/14 16:20:55 adam
- * Qualifiers are read from a file now.
- *
- * Revision 1.5 1995/02/14 14:12:41 adam
- * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990).
- *
- * Revision 1.4 1995/02/14 13:16:29 adam
- * Left and/or right truncation implemented.
- *
- * Revision 1.3 1995/02/14 10:25:56 adam
- * The constructions 'qualifier rel term ...' implemented.
- *
- * Revision 1.2 1995/02/13 15:15:07 adam
- * Added handling of qualifiers. Not finished yet.
- *
- * Revision 1.1 1995/02/13 12:35:20 adam
- * First version of CCL. Qualifiers aren't handled yet.
- *
- */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include "cclp.h"
case CCL_RPN_TERM:
p->u.t.attr_list = 0;
p->u.t.term = 0;
+ p->u.t.qual = 0;
break;
default:
break;
break;
case CCL_RPN_TERM:
xfree(rpn->u.t.term);
+ xfree(rpn->u.t.qual);
for (attr = rpn->u.t.attr_list; attr; attr = attr1)
{
attr1 = attr->next;
n->value.str = xstrdup(value);
}
+static size_t cmp_operator(const char **aliases, const char *input)
+{
+ for (; *aliases; aliases++)
+ {
+ const char *cp = *aliases;
+ size_t i;
+ for (i = 0; *cp && *cp == input[i]; i++, cp++)
+ ;
+ if (*cp == '\0')
+ return i;
+ }
+ return 0;
+}
+#define REGEX_CHARS "^[]{}()|.*+?!$"
+#define CCL_CHARS "#?\\"
/**
* search_term: Parse CCL search term.
* cclp: CCL Parser
struct ccl_rpn_node *p;
size_t no, i;
int no_spaces = 0;
- int left_trunc = 0;
- int right_trunc = 0;
- int mid_trunc = 0;
int relation_value = -1;
int position_value = -1;
int structure_value = -1;
int truncation_value = -1;
int completeness_value = -1;
int len = 0;
+ int left_trunc = 0;
+ int right_trunc = 0;
+ int regex_trunc = 0;
+ int z3958_trunc = 0;
size_t max = 200;
if (and_list || or_list || !multi)
max = 1;
for (i = 0; i<lookahead->len; i++)
if (lookahead->name[i] == ' ')
no_spaces++;
- else if (strchr(truncation_aliases[0], lookahead->name[i]))
- {
- if (no == 0 && i == 0 && lookahead->len >= 1)
- left_trunc = 1;
- else if (!is_term_ok(lookahead->next->kind, term_list) &&
- i == lookahead->len-1 && i >= 1)
- right_trunc = 1;
- else
- mid_trunc = 1;
- }
len += 1+lookahead->len+lookahead->ws_prefix_len;
lookahead = lookahead->next;
}
p = ccl_rpn_node_create(CCL_RPN_TERM);
p->u.t.attr_list = NULL;
p->u.t.term = NULL;
+ if (qa && qa[0])
+ {
+ const char *n = ccl_qual_get_name(qa[0]);
+ if (n)
+ p->u.t.qual = xstrdup(n);
+ }
/* go through all attributes and add them to the attribute list */
for (i=0; qa && qa[i]; i++)
if (truncation_value != -1)
continue;
truncation_value = attr->value.numeric;
- left_trunc = right_trunc = mid_trunc = 0;
break;
case CCL_BIB1_COM:
if (completeness_value != -1)
ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
}
+ if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+ &attset))
+ {
+ regex_trunc = 1; /* regex trunc (102) allowed */
+ }
+ else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+ &attset))
+ {
+ z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+ }
+
/* make the RPN token */
- p->u.t.term = (char *)xmalloc(len);
+ p->u.t.term = (char *)xmalloc(len * 2 + 2);
ccl_assert(p->u.t.term);
p->u.t.term[0] = '\0';
for (i = 0; i<no; i++)
{
const char *src_str = cclp->look_token->name;
size_t src_len = cclp->look_token->len;
-
- if (i == 0 && left_trunc)
+ int j;
+ int quote_mode = 0;
+
+ if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
{
- src_len--;
- src_str++;
+ strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf,
+ cclp->look_token->ws_prefix_len);
}
- if (i == no-1 && right_trunc)
- src_len--;
- if (!ccl_qual_match_stop(cclp->bibset, qa, src_str, src_len))
+ for (j = 0; j < src_len; j++)
{
-#if 0
- fprintf(stderr, "[%s %.*s]",
- ccl_qual_get_name(qa[0]), src_len, src_str);
-#endif
- if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
+ size_t op_size;
+ if (j > 0 && src_str[j-1] == '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
+ else if (src_str[j] == '"')
+ quote_mode = !quote_mode;
+ else if (!quote_mode &&
+ (op_size = cmp_operator(truncation_aliases,
+ src_str + j))
+ )
{
- size_t len = strlen(p->u.t.term);
- memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf,
- cclp->look_token->ws_prefix_len);
- p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
+ j += (op_size - 1); /* j++ in for loop */
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".*");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "?");
+ z3958_trunc = 2;
+ }
+ else if (i == 0 && j == 0)
+ left_trunc = 1;
+ else if (i == no - 1 && j == src_len - 1)
+ right_trunc = 1;
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_EMBED;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (!quote_mode && src_str[j] == '#')
+ {
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "#");
+ z3958_trunc = 2;
+ }
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_SINGLE;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (src_str[j] != '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
}
- strxcat(p->u.t.term, src_str, src_len);
}
ADVANCE;
}
- if (p->u.t.term[0] == 0)
- {
- ccl_rpn_delete(p);
- continue;
- }
-
/* make the top node point to us.. */
if (p_top)
{
}
ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
}
+ else if (regex_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+ }
+ else if (z3958_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+ }
else
{
if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
return search_term_x(cclp, qa, list, 0);
}
+
+static struct ccl_rpn_node *search_terms2(CCL_parser cclp,
+ ccl_qualifier_t *qa)
+{
+ if (KIND == CCL_TOK_LP)
+ {
+ struct ccl_rpn_node *p;
+ ADVANCE;
+ if (!(p = find_spec(cclp, qa)))
+ return NULL;
+ if (KIND != CCL_TOK_RP)
+ {
+ cclp->error_code = CCL_ERR_RP_EXPECTED;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ ADVANCE;
+ return p;
+ }
+ else
+ {
+ static int list[] = {
+ CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ,
+ CCL_TOK_REL, CCL_TOK_SET, -1};
+
+ return search_term_x(cclp, qa, list, 1);
+ }
+}
+
+
+
static
struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
ccl_qualifier_t *ap, char *attset)
ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2);
return p;
}
- else if (KIND == CCL_TOK_LP)
- {
- ADVANCE;
- if (!(p = find_spec(cclp, ap)))
- return NULL;
- if (KIND != CCL_TOK_RP)
- {
- cclp->error_code = CCL_ERR_RP_EXPECTED;
- ccl_rpn_delete(p);
- return NULL;
- }
- ADVANCE;
- return p;
- }
else
{
if (!(p = search_terms(cclp, ap)))
struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap)
{
char *attset;
- struct ccl_rpn_node *p;
if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset)
|| qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset))
return NULL;
}
ADVANCE;
- if (KIND == CCL_TOK_LP)
- {
- ADVANCE;
- if (!(p = find_spec(cclp, ap)))
- {
- return NULL;
- }
- if (KIND != CCL_TOK_RP)
- {
- cclp->error_code = CCL_ERR_RP_EXPECTED;
- ccl_rpn_delete(p);
- return NULL;
- }
- ADVANCE;
- }
- else
- p = search_terms(cclp, ap);
- return p;
+ return search_terms(cclp, ap);
}
/**
static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa)
{
static int list[] = {
- CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1};
+ CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ,
+ CCL_TOK_REL, CCL_TOK_SET, -1};
struct ccl_rpn_node *p1, *p2, *pn;
- p1 = search_term_x(cclp, qa, list, 1);
+ p1 = search_terms2(cclp, qa);
if (!p1)
return NULL;
while (1)
p_prox->u.t.attr_list = 0;
ADVANCE;
- p2 = search_term_x(cclp, qa, list, 1);
+ p2 = search_terms2(cclp, qa);
if (!p2)
{
ccl_rpn_delete(p1);
}
else if (is_term_ok(KIND, list))
{
- p2 = search_term_x(cclp, qa, list, 1);
+ p2 = search_terms2(cclp, qa);
if (!p2)
{
ccl_rpn_delete(p1);
{
struct ccl_rpn_node *p1;
struct ccl_token *lookahead;
- if (KIND == CCL_TOK_LP)
- {
- ADVANCE;
- p1 = find_spec(cclp, qa);
- if (!p1)
- return NULL;
- if (KIND != CCL_TOK_RP)
- {
- cclp->error_code = CCL_ERR_RP_EXPECTED;
- ccl_rpn_delete(p1);
- return NULL;
- }
- ADVANCE;
- return p1;
- }
- else if (KIND == CCL_TOK_SET)
+ if (KIND == CCL_TOK_SET)
{
ADVANCE;
if (KIND == CCL_TOK_EQ)
break;
lookahead = lookahead->next;
}
- if (qa)
+ if (qa || lookahead->kind == CCL_TOK_LP)
return search_terms(cclp, qa);
else
{
ccl_token_del(list);
return p;
}
+
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab