Indexing system change. Introduced new index category type
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 22 Jun 2006 15:07:20 +0000 (15:07 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 22 Jun 2006 15:07:20 +0000 (15:07 +0000)
zinfo_index_category_t which is used to properly distinguish between
index (normal index,), sort, alwaysmatches index, and anchor (to be
implemented later). Also added support for proper alwaysmatches
relation for X-Path searches. Bug #617.

index/attribute.c
index/extract.c
index/index.h
index/zinfo.c
index/zinfo.h
index/zrpn.c
index/zsets.c
test/api/t2.c
test/api/zebra.cfg
test/xpath/xpath1.c

index 10b1e6d..cb9d62c 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: attribute.c,v 1.23 2006-05-19 13:49:34 adam Exp $
+/* $Id: attribute.c,v 1.24 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -67,6 +67,7 @@ static int att_getentbyatt(ZebraHandle zi, oid_value set, int att,
 
 ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh,
                                   Z_AttributeList *attr_list,
+                                  zinfo_index_category_t cat,
                                   int index_type,
                                   oid_value curAttributeSet,
                                   int *ord)
@@ -105,7 +106,8 @@ ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh,
         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
         return ZEBRA_FAIL;
     }
-    *ord = zebraExplain_lookup_attr_str(zh->reg->zei, index_type, use_string);
+    *ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 
+                                        index_type, use_string);
     if (*ord == -1)
     {
         if (use_value < 0)
@@ -119,6 +121,7 @@ ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh,
 
 ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh,
                             Z_AttributesPlusTerm *zapt,
+                            zinfo_index_category_t cat,
                             int index_type,
                             const char *xpath_use,
                             oid_value curAttributeSet,
@@ -126,10 +129,10 @@ ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh,
 {
     if (!xpath_use)
         return zebra_attr_list_get_ord(zh, zapt->attributes,
-                                       index_type, curAttributeSet, ord);
+                                       cat, index_type, curAttributeSet, ord);
     else
     {
-        *ord = zebraExplain_lookup_attr_str(zh->reg->zei, index_type,
+        *ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat, index_type,
                                             xpath_use);
         if (*ord == -1)
         {
@@ -162,9 +165,11 @@ ZEBRA_RES zebra_sort_get_ord(ZebraHandle zh,
         *numerical = 1;
     
     if (zebra_attr_list_get_ord(zh, sortAttributes->list,
+                                zinfo_index_category_sort,
                                 's', VAL_BIB1, ord)== ZEBRA_OK)
         return ZEBRA_OK;
     if (zebra_attr_list_get_ord(zh, sortAttributes->list,
+                                zinfo_index_category_sort,
                                 'S', VAL_BIB1, ord)== ZEBRA_OK)
         return ZEBRA_OK;
     return ZEBRA_FAIL;
index 1a5eada..d4d9b69 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.222 2006-06-22 09:48:08 adam Exp $
+/* $Id: extract.c,v 1.223 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -106,7 +106,9 @@ static void logRecord (ZebraHandle zh)
     }
 }
 
-static void extract_add_index_string (RecWord *p, const char *str, int length);
+static void extract_add_index_string (RecWord *p, 
+                                      zinfo_index_category_t cat,
+                                      const char *str, int length);
 
 static void extract_set_store_data_prepare(struct recExtractCtrl *p);
 
@@ -128,16 +130,17 @@ static void searchRecordKey(ZebraHandle zh,
 {
     int i;
     int ch = -1;
+    zinfo_index_category_t cat = zinfo_index_category_index;
 
     for (i = 0; i<ws_length; i++)
         ws[i] = NULL;
 
     if (ch < 0)
-        ch = zebraExplain_lookup_attr_str(zh->reg->zei, '0', index_name);
+        ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, '0', index_name);
     if (ch < 0)
-        ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'p', index_name);
+        ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'p', index_name);
     if (ch < 0)
-        ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'w', index_name);
+        ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'w', index_name);
 
     if (ch < 0)
        return ;
@@ -403,7 +406,8 @@ static void all_matches_add(struct recExtractCtrl *ctrl)
     word.index_name = "allrecords";
     word.index_type = 'w';
     word.seqno = 1;
-    extract_add_index_string (&word, "", 0);
+    extract_add_index_string (&word, zinfo_index_category_alwaysmatches,
+                              "", 0);
 }
 
 static ZEBRA_RES file_extract_record(ZebraHandle zh,
@@ -1622,7 +1626,8 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
     }
 }
 
-static void extract_add_index_string(RecWord *p, const char *str, int length)
+static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat,
+                                     const char *str, int length)
 {
     struct it_key key;
 
@@ -1633,9 +1638,9 @@ static void extract_add_index_string(RecWord *p, const char *str, int length)
     if (!p->index_name)
         return;
 
-    ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
+    ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
     if (ch < 0)
-        ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
+        ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
 
     key.len = 4;
     key.mem[0] = ch;
@@ -1670,13 +1675,15 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length)
     ZebraHandle zh = p->extractCtrl->handle;
     ZebraExplainInfo zei = zh->reg->zei;
     int ch;
+    zinfo_index_category_t cat = zinfo_index_category_sort;
 
     if (!p->index_name)
         return;
 
-    ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
+    ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
     if (ch < 0)
-        ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
+        ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
     key.len = 4;
     key.mem[0] = ch;
     key.mem[1] = p->record_id;
@@ -1709,14 +1716,16 @@ static void extract_add_string (RecWord *p, const char *string, int length)
        extract_add_sort_string (p, string, length);
     else
     {
-       extract_add_index_string(p, string, length);
+       extract_add_index_string(p, zinfo_index_category_index,
+                                 string, length);
         if (zebra_maps_is_alwaysmatches(p->zebra_maps, p->index_type))
         {
             RecWord word;
             memcpy(&word, p, sizeof(word));
 
             word.seqno = 1;
-            extract_add_index_string (&word, "", 0);
+            extract_add_index_string(
+                &word, zinfo_index_category_alwaysmatches, "", 0);
         }
     }
 }
index 1bb24f1..4e0eae2 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: index.h,v 1.167 2006-06-13 12:02:08 adam Exp $
+/* $Id: index.h,v 1.168 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -429,6 +429,7 @@ void zebra_term_untrans(ZebraHandle zh, int reg_type,
 
 ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh,
                             Z_AttributesPlusTerm *zapt,
+                            zinfo_index_category_t cat,
                             int index_type,
                             const char *xpath_use,
                             oid_value curAttributeSet,
@@ -436,6 +437,7 @@ ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh,
 
 ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh,
                                   Z_AttributeList *attr_list,
+                                  zinfo_index_category_t cat,
                                   int index_type,
                                   oid_value curAttributeSet,
                                   int *ord);
index 908fdd3..b7d4386 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zinfo.c,v 1.66 2006-06-13 12:02:12 adam Exp $
+/* $Id: zinfo.c,v 1.67 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -33,6 +33,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 
 struct zebSUInfo {
     int index_type;
+    zinfo_index_category_t cat;
 #define ZEB_SU_SET_USE 1
 #define ZEB_SU_STR 2
     int which;
@@ -593,6 +594,7 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei,
        data1_node *node_str = NULL;
        data1_node *node_ordinal = NULL;
        data1_node *node_type = NULL;
+       data1_node *node_cat = NULL;
         data1_node *node_doc_occurrences = NULL;
         data1_node *node_term_occurrences = NULL;
        data1_node *np2;
@@ -610,6 +612,8 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei,
                node_ordinal = np2->child;
            else if (!strcmp(np2->u.tag.tag, "type"))
                node_type = np2->child;
+           else if (!strcmp(np2->u.tag.tag, "cat"))
+               node_cat = np2->child;
            else if (!strcmp(np2->u.tag.tag, "dococcurrences"))
                node_doc_occurrences = np2->child;
            else if (!strcmp(np2->u.tag.tag, "termoccurrences"))
@@ -632,6 +636,31 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei,
            yaz_log(YLOG_WARN, "Missing attribute 'type' in attribute info");
            (*zsuip)->info.index_type = 'w';
        }
+        if (node_cat && node_cat->u.data.len > 0)
+        {
+            zinfo_index_category_t cat;
+
+            data1_node *np = node_cat;
+            if (!strncmp(np->u.data.data, "index", np->u.data.len))
+                cat = zinfo_index_category_index;
+            else if (!strncmp(np->u.data.data, "sort", np->u.data.len))
+                cat = zinfo_index_category_sort;
+            else if (!strncmp(np->u.data.data, "alwaysmatches", 
+                              np->u.data.len))
+                cat = zinfo_index_category_alwaysmatches;
+            else if (!strncmp(np->u.data.data, "anchor", 
+                              np->u.data.len))
+                cat = zinfo_index_category_anchor;
+            else
+            {
+                yaz_log(YLOG_WARN, "Bad index cateogry '%.*s'",
+                        np->u.data.len, np->u.data.data);
+                cat = zinfo_index_category_index;
+            }
+            (*zsuip)->info.cat = cat;
+        }
+        else
+            (*zsuip)->info.cat = zinfo_index_category_index;
 
         if (node_doc_occurrences)
         {
@@ -1067,6 +1096,21 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei,
                                 zsui->info.doc_occurrences, zei->nmem);
         data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences",
                                 zsui->info.term_occurrences, zei->nmem);
+        switch(zsui->info.cat)
+        {
+        case zinfo_index_category_index:
+           data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+                                   "index", zei->nmem); break;
+        case zinfo_index_category_sort:
+           data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+                                   "sort", zei->nmem); break;
+        case zinfo_index_category_alwaysmatches:
+           data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+                                   "alwaysmatches", zei->nmem); break;
+        case zinfo_index_category_anchor:
+           data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+                                   "anchor", zei->nmem); break;
+        }
     }
     /* convert to "SGML" and write it */
 #if ZINFO_DEBUG
@@ -1306,7 +1350,9 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush)
     rec_put (zei->records, &trec);
 }
 
-int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type,
+int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, 
+                                 zinfo_index_category_t cat,
+                                 int index_type,
                                 const char *str)
 {
     struct zebSUInfoB **zsui;
@@ -1315,6 +1361,7 @@ int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type,
     for (zsui = &zei->curDatabaseInfo->attributeDetails->SUInfo;
         *zsui; zsui = &(*zsui)->next)
         if ((*zsui)->info.index_type == index_type
+            && (*zsui)->info.cat == cat
             && (*zsui)->info.which == ZEB_SU_STR 
             && !yaz_matchstr((*zsui)->info.u.str, str))
         {
@@ -1489,6 +1536,7 @@ void zebraExplain_addAttributeSet (ZebraExplainInfo zei, int set)
 }
 
 struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei,
+                                             zinfo_index_category_t cat,
                                              int index_type)
 {
     struct zebSUInfoB *zsui;
@@ -1500,16 +1548,19 @@ struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei,
     zei->curDatabaseInfo->attributeDetails->dirty = 1;
     zei->dirty = 1;
     zsui->info.index_type = index_type;
+    zsui->info.cat = cat;
     zsui->info.doc_occurrences = 0;
     zsui->info.term_occurrences = 0;
     zsui->info.ordinal = (zei->ordinalSU)++;
     return zsui;
 }
 
-int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type,
+int zebraExplain_add_attr_str(ZebraExplainInfo zei, 
+                              zinfo_index_category_t cat,
+                              int index_type,
                              const char *index_name)
 {
-    struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, index_type);
+    struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, cat, index_type);
 
     zsui->info.which = ZEB_SU_STR;
     zsui->info.u.str = nmem_strdup(zei->nmem, index_name);
index 3121162..eab4022 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zinfo.h,v 1.34 2006-06-13 12:02:13 adam Exp $
+/* $Id: zinfo.h,v 1.35 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -37,6 +37,13 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 
 YAZ_BEGIN_CDECL
 
+typedef enum {
+    zinfo_index_category_index,
+    zinfo_index_category_sort,
+    zinfo_index_category_alwaysmatches,
+    zinfo_index_category_anchor
+} zinfo_index_category_t;
+    
 typedef ZEBRA_RES ZebraExplainUpdateFunc(void *handle,
                                          Record drec, 
                                          data1_node *n);
@@ -55,9 +62,13 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database,
                              int explain_database);
 int zebraExplain_add_attr_su(ZebraExplainInfo zei, int index_type,
                             int set, int use);
-int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type,
+int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, 
+                                 zinfo_index_category_t cat,
+                                 int index_type,
                                 const char *str);
-int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type,
+int zebraExplain_add_attr_str(ZebraExplainInfo zei, 
+                              zinfo_index_category_t cat,
+                              int index_type,
                              const char *str);
 void zebraExplain_addSchema (ZebraExplainInfo zei, Odr_oid *oid);
 void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num);
index 8cc6515..b1c7dcc 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.217 2006-06-07 10:50:08 adam Exp $
+/* $Id: zrpn.c,v 1.218 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -66,7 +66,7 @@ static const char **rpn_char_map_handler(void *vp, const char **from, int len)
 }
 
 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
-                                  struct rpn_char_map_info *map_info)
+                                 struct rpn_char_map_info *map_info)
 {
     map_info->zm = reg->zebra_maps;
     map_info->reg_type = reg_type;
@@ -1035,11 +1035,6 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
        int attr_ok = 0;
        int regex_range = 0;
        int init_pos = 0;
-#if 0
-        attent attp;
-        data1_local_attribute id_xpath_attr;
-        data1_local_attribute *local_attr;
-#endif
         int max_pos, prefix_len = 0;
        int relation_error;
         char ord_buf[32];
@@ -1054,7 +1049,8 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             return ZEBRA_FAIL;
         }
         
-        if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, 
+        if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
+                              reg_type, xpath_use, 
                               curAttributeSet, &ord) 
             != ZEBRA_OK)
         {
@@ -1453,6 +1449,123 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh,
     return ZEBRA_OK;
 }
 
+
+static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                            oid_value attributeSet, NMEM stream,
+                            struct grep_info *grep_info,
+                            int reg_type, int complete_flag,
+                            int num_bases, char **basenames,
+                            char *term_dst,
+                             const char *xpath_use,
+                            struct ord_list **ol)
+{
+    char term_dict[2*IT_MAX_WORD+4000];
+    int r, base_no;
+    struct rpn_char_map_info rcmi;
+
+    int bases_ok = 0;     /* no of databases with OK attribute */
+
+    *ol = ord_list_create(stream);
+
+    rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
+
+    for (base_no = 0; base_no < num_bases; base_no++)
+    {
+       int ord = -1;
+       int regex_range = 0;
+       int init_pos = 0;
+        int max_pos, prefix_len = 0;
+        char ord_buf[32];
+        int ord_len, i;
+
+        if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+        {
+           zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
+                          basenames[base_no]);
+            return ZEBRA_FAIL;
+        }
+        
+        if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
+                              reg_type, xpath_use, 
+                              attributeSet, &ord) != ZEBRA_OK)
+            return ZEBRA_FAIL;
+        yaz_log(YLOG_LOG, "Got ordinal value: %d", ord);
+        *ol = ord_list_append(stream, *ol, ord);
+        
+        if (prefix_len)
+            term_dict[prefix_len++] = '|';
+        else
+            term_dict[prefix_len++] = '(';
+        
+        ord_len = key_SU_encode (ord, ord_buf);
+        for (i = 0; i<ord_len; i++)
+        {
+            term_dict[prefix_len++] = 1;
+            term_dict[prefix_len++] = ord_buf[i];
+        }
+        if (ord_len > init_pos)
+            init_pos = ord_len;
+        
+       bases_ok++;
+
+        term_dict[prefix_len++] = ')';
+        term_dict[prefix_len] = '\0';
+        
+        r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
+                             grep_info, &max_pos, init_pos,
+                             grep_handle);
+    }
+    if (!bases_ok)
+        return ZEBRA_FAIL;
+    yaz_log(YLOG_LOG, "always_term: %d positions", grep_info->isam_p_indx);
+    return ZEBRA_OK;
+}
+
+static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
+                                              Z_AttributesPlusTerm *zapt,
+                                              const char *termz_org,
+                                              oid_value attributeSet,
+                                              NMEM stream,
+                                              int reg_type, int complete_flag,
+                                              const char *rank_type,
+                                              const char *xpath_use,
+                                              int num_bases, char **basenames, 
+                                              NMEM rset_nmem,
+                                              RSET *rset,
+                                              struct rset_key_control *kc)
+{
+    char term_dst[IT_MAX_WORD+1];
+    struct grep_info grep_info;
+    zint hits_limit_value;
+    const char *term_ref_id_str = 0;
+    ZEBRA_RES res;
+    struct ord_list *ol;
+
+    term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
+                   stream);
+    if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
+        return ZEBRA_FAIL;
+
+    grep_info.isam_p_indx = 0;
+
+    res = always_term(zh, zapt, attributeSet, stream, &grep_info,
+                     reg_type, complete_flag, num_bases, basenames,
+                     term_dst, xpath_use, &ol);
+    if (res == ZEBRA_OK)
+    {
+        *rset = rset_trunc(zh, grep_info.isam_p_buf,
+                           grep_info.isam_p_indx, term_dst, strlen(term_dst),
+                           rank_type, 1 /* preserve pos */,
+                           zapt->term->which, rset_nmem,
+                           kc, kc->scope, ol, reg_type, hits_limit_value,
+                           term_ref_id_str);
+        if (!*rset)
+            res = ZEBRA_FAIL;
+    }
+    grep_info_delete (&grep_info);
+    return res;
+}
+
 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
                                       Z_AttributesPlusTerm *zapt,
                                       const char *termz_org,
@@ -1662,8 +1775,8 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             return ZEBRA_FAIL;
         }
 
-        if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
-                              curAttributeSet, &ord) 
+        if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
+                              reg_type, xpath_use, curAttributeSet, &ord) 
             != ZEBRA_OK)
         {
             break;
@@ -1891,9 +2004,10 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 
-static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                       oid_value attributeSet,
-                       struct xpath_location_step *xpath, int max, NMEM mem)
+static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                           oid_value attributeSet,
+                           struct xpath_location_step *xpath, int max,
+                           NMEM mem)
 {
     oid_value curAttributeSet = attributeSet;
     AttrType use;
@@ -1913,7 +2027,7 @@ static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
                         int reg_type, const char *term, 
                         const char *xpath_use,
-                        oid_value curAttributeSet, NMEM rset_nmem,
+                        NMEM rset_nmem,
                        struct rset_key_control *kc)
 {
     RSET rset;
@@ -1921,7 +2035,9 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
     char term_dict[2048];
     char ord_buf[32];
     int prefix_len = 0;
-    int ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_type,
+    int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
+                                           zinfo_index_category_index,
+                                           reg_type,
                                            xpath_use);
     int ord_len, i, r, max_pos;
     int term_type = Z_Term_characterString;
@@ -1962,7 +2078,6 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
 
 static
 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
-                          oid_value attributeSet,
                           int num_bases, char **basenames,
                           NMEM stream, const char *rank_type, RSET rset,
                           int xpath_len, struct xpath_location_step *xpath,
@@ -1970,9 +2085,9 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                           RSET *rset_out,
                           struct rset_key_control *kc)
 {
-    oid_value curAttributeSet = attributeSet;
     int base_no;
     int i;
+    int always_matches = rset ? 0 : 1;
 
     if (xpath_len < 0)
     {
@@ -1987,8 +2102,6 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
 
     }
 
-    curAttributeSet = VAL_IDXPATH;
-
     /*
       //a    ->    a/.*
       //a/b  ->    b/a/.*
@@ -2082,7 +2195,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                 wrbuf_puts(wbuf, "");
                 rset_attr = xpath_trunc(
                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
-                    curAttributeSet, rset_nmem, kc);
+                    rset_nmem, kc);
                 wrbuf_free(wbuf, 1);
             } 
             else 
@@ -2096,18 +2209,20 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                 rset_start_tag = xpath_trunc(zh, stream, '0', 
                                              xpath_rev, 
                                              ZEBRA_XPATH_ELM_BEGIN, 
-                                             curAttributeSet,
                                              rset_nmem, kc);
-            
-                rset_end_tag = xpath_trunc(zh, stream, '0', 
-                                           xpath_rev, 
-                                           ZEBRA_XPATH_ELM_END, 
-                                           curAttributeSet,
-                                           rset_nmem, kc);
-
-                rset = rset_create_between(rset_nmem, kc, kc->scope,
-                                           rset_start_tag, rset,
-                                           rset_end_tag, rset_attr);
+                if (always_matches)
+                    rset = rset_start_tag;
+                else
+                {
+                    rset_end_tag = xpath_trunc(zh, stream, '0', 
+                                               xpath_rev, 
+                                               ZEBRA_XPATH_ELM_END, 
+                                               rset_nmem, kc);
+                    
+                    rset = rset_create_between(rset_nmem, kc, kc->scope,
+                                               rset_start_tag, rset,
+                                               rset_end_tag, rset_attr);
+                }
             }
             first_path = 0;
         }
@@ -2116,6 +2231,8 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
     return ZEBRA_OK;
 }
 
+#define MAX_XPATH_STEPS 10
+
 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                oid_value attributeSet, NMEM stream,
                                Z_SortKeySpecList *sort_sequence,
@@ -2133,7 +2250,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     char termz[IT_MAX_WORD+1];
     int xpath_len;
     const char *xpath_use = 0;
-    struct xpath_location_step xpath[10];
+    struct xpath_location_step xpath[MAX_XPATH_STEPS];
 
     if (!log_level_set)
     {
@@ -2155,7 +2272,8 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
                             rank_type, rset_nmem, rset, kc);
     /* consider if an X-Path query is used */
-    xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
+    xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
+                                xpath, MAX_XPATH_STEPS, stream);
     if (xpath_len >= 0)
     {
         if (xpath[xpath_len-1].part[0] == '@') 
@@ -2211,23 +2329,34 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     }
     else if (!strcmp(search_type, "always"))
     {
-        *termz = '\0';
-        res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
-                                    reg_id, complete_flag, rank_type,
-                                    xpath_use,
-                                    num_bases, basenames, rset_nmem,
-                                    rset, kc);
+        if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
+        {
+            *rset = 0; /* signal no "term" set */
+            return rpn_search_xpath(zh, num_bases, basenames,
+                           stream, rank_type, *rset, 
+                           xpath_len, xpath, rset_nmem, rset, kc);
+        }
+        else
+        {
+            res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
+                                               attributeSet, stream,
+                                               reg_id, complete_flag,
+                                               rank_type,
+                                               xpath_use,
+                                               num_bases, basenames, rset_nmem,
+                                               rset, kc);
+        }
     }
     else
     {
-       zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
+       zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
        res = ZEBRA_FAIL;
     }
     if (res != ZEBRA_OK)
        return res;
     if (!*rset)
        return ZEBRA_FAIL;
-    return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
+    return rpn_search_xpath(zh, num_bases, basenames,
                            stream, rank_type, *rset, 
                            xpath_len, xpath, rset_nmem, rset, kc);
 }
@@ -2619,7 +2748,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
            return ZEBRA_FAIL;
        }
 
-        if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord) 
+        if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
+                              index_type, 0, attributeset, &ord) 
             != ZEBRA_OK)
         {
             break;
index a8d8ce9..3d7b61d 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zsets.c,v 1.107 2006-06-07 10:14:42 adam Exp $
+/* $Id: zsets.c,v 1.108 2006-06-22 15:07:20 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -856,7 +856,9 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
                    i+1);
             sort_criteria[i].numerical = 0;
             sort_criteria[i].ord = 
-                zebraExplain_lookup_attr_str(zh->reg->zei, 's',
+                zebraExplain_lookup_attr_str(zh->reg->zei,
+                                             zinfo_index_category_sort,
+                                             's',
                                              sk->u.sortField);
             if (sks->which != Z_SortKeySpec_null
                 && sort_criteria[i].ord == -1)
index 449331a..ac30474 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: t2.c,v 1.19 2006-05-10 08:13:35 adam Exp $
-   Copyright (C) 1995-2005
+/* $Id: t2.c,v 1.20 2006-06-22 15:07:20 adam Exp $
+   Copyright (C) 1995-2006
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -34,7 +34,12 @@ void tst(int argc, char **argv)
     ZebraHandle  zh = zebra_open(zs, 0);
 
     YAZ_CHECK(tl_init_data(zh, myrec));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title my", 1));
     YAZ_CHECK(tl_query(zh, "@attr 1=4 my", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title nope", 0));
+    YAZ_CHECK(tl_query(zh, "@attr 1=4 nope", 0));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 2=103 dummy", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 dummy", 1));
 
     YAZ_CHECK(tl_close_down(zh, zs));
 }
index 354a1c2..b4b2bac 100644 (file)
@@ -1,7 +1,8 @@
-# $Id: zebra.cfg,v 1.3 2004-06-15 08:06:33 adam Exp $
+# $Id: zebra.cfg,v 1.4 2006-06-22 15:07:20 adam Exp $
 profilepath: ${srcdir:-.}/../../tab
 
 attset: bib1.att
+attset: explain.att
 
 recordType: grs.sgml
 
index b9356a3..a45e8bd 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: xpath1.c,v 1.6 2006-05-10 08:13:41 adam Exp $
+/* $Id: xpath1.c,v 1.7 2006-06-22 15:07:21 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -31,8 +31,8 @@ static void tst(int argc, char **argv)
     const char *myrec[] = {
         "<sgml> \n"
         "  before \n"
-        "  <tag> \n"
-        "    inside \n"
+        "  <tag x='v'> \n"
+        "    inside it\n"
         "  </tag> \n"
         "  after \n"
         "</sgml> \n",
@@ -44,6 +44,7 @@ static void tst(int argc, char **argv)
 
     YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag before", 0));
     YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag inside", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag {inside it}", 1));
     YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag after", 0));
 
     YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/none after", 0));
@@ -53,6 +54,21 @@ static void tst(int argc, char **argv)
     YAZ_CHECK(tl_query(zh, "@attr 1=/sgml inside", 1));
     YAZ_CHECK(tl_query(zh, "@attr 1=/sgml after", 1));
 
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@x v", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@x no", 0));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@y v", 0));
+
+    YAZ_CHECK(tl_query(zh, "@attr 1=_XPATH_BEGIN @attr 4=3 tag/sgml/", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=_XPATH_BEGIN @attr 4=3 sgml/", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=_XPATH_BEGIN @attr 4=3 tag/", 0));
+
+    /* bug #617 */
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag @attr 2=103 dummy", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml @attr 2=103 dummy", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/tag @attr 2=103 dummy", 0));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@x @attr 2=103 dummy", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@y @attr 2=103 dummy", 0));
+
     YAZ_CHECK(tl_close_down(zh, zs));
 }