/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2011 Index Data
+ * Copyright (C) 1995-2012 Index Data
* See the file LICENSE for details.
*/
/**
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include "cclp.h"
n->value.str = xstrdup(value);
}
+static size_t cmp_operator(const char **aliases, const char *input)
+{
+ for (; *aliases; aliases++)
+ {
+ const char *cp = *aliases;
+ size_t i;
+ for (i = 0; *cp && *cp == input[i]; i++, cp++)
+ ;
+ if (*cp == '\0')
+ return i;
+ }
+ return 0;
+}
+#define REGEX_CHARS "^[]{}()|.*+?!$"
+#define CCL_CHARS "#?\\"
/**
* search_term: Parse CCL search term.
* cclp: CCL Parser
int len = 0;
int left_trunc = 0;
int right_trunc = 0;
+ int regex_trunc = 0;
+ int z3958_trunc = 0;
size_t max = 200;
if (and_list || or_list || !multi)
max = 1;
ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
}
+ if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+ &attset))
+ {
+ regex_trunc = 1; /* regex trunc (102) allowed */
+ }
+ else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+ &attset))
+ {
+ z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+ }
+
/* make the RPN token */
- p->u.t.term = (char *)xmalloc(len);
+ p->u.t.term = (char *)xmalloc(len * 2 + 2);
ccl_assert(p->u.t.term);
p->u.t.term[0] = '\0';
for (i = 0; i<no; i++)
{
const char *src_str = cclp->look_token->name;
size_t src_len = cclp->look_token->len;
+ int j;
+ int quote_mode = 0;
- if (i == 0 && src_len > 0 && *src_str == '?')
- {
- src_len--;
- src_str++;
- left_trunc = 1;
- }
- if (i == no - 1 && src_len > 0 && src_str[src_len-1] == '?')
+ if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
{
- src_len--;
- right_trunc = 1;
+ strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf,
+ cclp->look_token->ws_prefix_len);
}
- if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
+ for (j = 0; j < src_len; j++)
{
- size_t len = strlen(p->u.t.term);
- memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf,
- cclp->look_token->ws_prefix_len);
- p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
+ size_t op_size;
+ if (j > 0 && src_str[j-1] == '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
+ else if (src_str[j] == '"')
+ quote_mode = !quote_mode;
+ else if (!quote_mode &&
+ (op_size = cmp_operator(truncation_aliases,
+ src_str + j))
+ )
+ {
+ j += (op_size - 1); /* j++ in for loop */
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".*");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "?");
+ z3958_trunc = 2;
+ }
+ else if (i == 0 && j == 0)
+ left_trunc = 1;
+ else if (i == no - 1 && j == src_len - 1)
+ right_trunc = 1;
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_EMBED;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (!quote_mode && src_str[j] == '#')
+ {
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "#");
+ z3958_trunc = 2;
+ }
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_SINGLE;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (src_str[j] != '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
}
- strxcat(p->u.t.term, src_str, src_len);
ADVANCE;
}
}
ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
}
+ else if (regex_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+ }
+ else if (z3958_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+ }
else
{
if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
return search_term_x(cclp, qa, list, 0);
}
+
+static struct ccl_rpn_node *search_terms2(CCL_parser cclp,
+ ccl_qualifier_t *qa)
+{
+ if (KIND == CCL_TOK_LP)
+ {
+ struct ccl_rpn_node *p;
+ ADVANCE;
+ if (!(p = find_spec(cclp, qa)))
+ return NULL;
+ if (KIND != CCL_TOK_RP)
+ {
+ cclp->error_code = CCL_ERR_RP_EXPECTED;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ ADVANCE;
+ return p;
+ }
+ else
+ {
+ static int list[] = {
+ CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ,
+ CCL_TOK_REL, CCL_TOK_SET, -1};
+
+ return search_term_x(cclp, qa, list, 1);
+ }
+}
+
+
+
static
struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
ccl_qualifier_t *ap, char *attset)
ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2);
return p;
}
- else if (KIND == CCL_TOK_LP)
- {
- ADVANCE;
- if (!(p = find_spec(cclp, ap)))
- return NULL;
- if (KIND != CCL_TOK_RP)
- {
- cclp->error_code = CCL_ERR_RP_EXPECTED;
- ccl_rpn_delete(p);
- return NULL;
- }
- ADVANCE;
- return p;
- }
else
{
if (!(p = search_terms(cclp, ap)))
struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap)
{
char *attset;
- struct ccl_rpn_node *p;
if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset)
|| qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset))
return NULL;
}
ADVANCE;
- if (KIND == CCL_TOK_LP)
- {
- ADVANCE;
- if (!(p = find_spec(cclp, ap)))
- {
- return NULL;
- }
- if (KIND != CCL_TOK_RP)
- {
- cclp->error_code = CCL_ERR_RP_EXPECTED;
- ccl_rpn_delete(p);
- return NULL;
- }
- ADVANCE;
- }
- else
- p = search_terms(cclp, ap);
- return p;
+ return search_terms(cclp, ap);
}
/**
static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa)
{
static int list[] = {
- CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1};
+ CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ,
+ CCL_TOK_REL, CCL_TOK_SET, -1};
struct ccl_rpn_node *p1, *p2, *pn;
- p1 = search_term_x(cclp, qa, list, 1);
+ p1 = search_terms2(cclp, qa);
if (!p1)
return NULL;
while (1)
p_prox->u.t.attr_list = 0;
ADVANCE;
- p2 = search_term_x(cclp, qa, list, 1);
+ p2 = search_terms2(cclp, qa);
if (!p2)
{
ccl_rpn_delete(p1);
}
else if (is_term_ok(KIND, list))
{
- p2 = search_term_x(cclp, qa, list, 1);
+ p2 = search_terms2(cclp, qa);
if (!p2)
{
ccl_rpn_delete(p1);
{
struct ccl_rpn_node *p1;
struct ccl_token *lookahead;
- if (KIND == CCL_TOK_LP)
- {
- ADVANCE;
- p1 = find_spec(cclp, qa);
- if (!p1)
- return NULL;
- if (KIND != CCL_TOK_RP)
- {
- cclp->error_code = CCL_ERR_RP_EXPECTED;
- ccl_rpn_delete(p1);
- return NULL;
- }
- ADVANCE;
- return p1;
- }
- else if (KIND == CCL_TOK_SET)
+ if (KIND == CCL_TOK_SET)
{
ADVANCE;
if (KIND == CCL_TOK_EQ)
break;
lookahead = lookahead->next;
}
- if (qa)
+ if (qa || lookahead->kind == CCL_TOK_LP)
return search_terms(cclp, qa);
else
{