Do not truncate more than at most 'truncmax' terms in one single
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.4 2006-12-20 14:19:21 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode (&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211                     const char **src, char *dst, int space_split,
212                     char *dst_term)
213 {
214     const char *s0;
215     const char **map;
216     int i = 0;
217     int j = 0;
218
219     const char *space_start = 0;
220     const char *space_end = 0;
221
222     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223         return 0;
224     s0 = *src;
225     while (*s0)
226     {
227         const char *s1 = s0;
228         int q_map_match = 0;
229         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
230                                 &q_map_match);
231         if (space_split)
232         {
233             if (**map == *CHR_SPACE)
234                 break;
235         }
236         else  /* complete subfield only. */
237         {
238             if (**map == *CHR_SPACE)
239             {   /* save space mapping for later  .. */
240                 space_start = s1;
241                 space_end = s0;
242                 continue;
243             }
244             else if (space_start)
245             {   /* reload last space */
246                 while (space_start < space_end)
247                 {
248                     if (strchr(REGEX_CHARS, *space_start))
249                         dst[i++] = '\\';
250                     dst_term[j++] = *space_start;
251                     dst[i++] = *space_start++;
252                 }
253                 /* and reset */
254                 space_start = space_end = 0;
255             }
256         }
257         /* add non-space char */
258         memcpy(dst_term+j, s1, s0 - s1);
259         j += (s0 - s1);
260         if (!q_map_match)
261         {
262             while (s1 < s0)
263             {
264                 if (strchr(REGEX_CHARS, *s1))
265                     dst[i++] = '\\';
266                 dst[i++] = *s1++;
267             }
268         }
269         else
270         {
271             char tmpbuf[80];
272             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
273             
274             strcpy(dst + i, map[0]);
275             i += strlen(map[0]);
276         }
277     }
278     dst[i] = '\0';
279     dst_term[j] = '\0';
280     *src = s0;
281     return i;
282 }
283
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286                     const char **src, char *dst, int space_split,
287                     char *dst_term)
288 {
289     const char *s0;
290     const char **map;
291     int i = 0;
292     int j = 0;
293
294     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
295         return 0;
296     s0 = *src;
297     while (*s0)
298     {
299         if (*s0 == '#')
300         {
301             dst[i++] = '.';
302             dst[i++] = '*';
303             dst_term[j++] = *s0++;
304         }
305         else
306         {
307             const char *s1 = s0;
308             int q_map_match = 0;
309             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
310                                     &q_map_match);
311             if (space_split && **map == *CHR_SPACE)
312                 break;
313
314             /* add non-space char */
315             memcpy(dst_term+j, s1, s0 - s1);
316             j += (s0 - s1);
317             if (!q_map_match)
318             {
319                 while (s1 < s0)
320                 {
321                     if (strchr(REGEX_CHARS, *s1))
322                         dst[i++] = '\\';
323                     dst[i++] = *s1++;
324                 }
325             }
326             else
327             {
328                 char tmpbuf[80];
329                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
330                 
331                 strcpy(dst + i, map[0]);
332                 i += strlen(map[0]);
333             }
334         }
335     }
336     dst[i] = '\0';
337     dst_term[j++] = '\0';
338     *src = s0;
339     return i;
340 }
341
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344                     char *dst, int *errors, int space_split,
345                     char *dst_term)
346 {
347     int i = 0;
348     int j = 0;
349     const char *s0;
350     const char **map;
351
352     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
353         return 0;
354     s0 = *src;
355     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356         isdigit(((const unsigned char *)s0)[1]))
357     {
358         *errors = s0[1] - '0';
359         s0 += 3;
360         if (*errors > 3)
361             *errors = 3;
362     }
363     while (*s0)
364     {
365         if (strchr("^\\()[].*+?|-", *s0))
366         {
367             dst_term[j++] = *s0;
368             dst[i++] = *s0++;
369         }
370         else
371         {
372             const char *s1 = s0;
373             int q_map_match = 0;
374             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
375                                     &q_map_match);
376             if (space_split && **map == *CHR_SPACE)
377                 break;
378
379             /* add non-space char */
380             memcpy(dst_term+j, s1, s0 - s1);
381             j += (s0 - s1);
382             if (!q_map_match)
383             {
384                 while (s1 < s0)
385                 {
386                     if (strchr(REGEX_CHARS, *s1))
387                         dst[i++] = '\\';
388                     dst[i++] = *s1++;
389                 }
390             }
391             else
392             {
393                 char tmpbuf[80];
394                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
395                 
396                 strcpy(dst + i, map[0]);
397                 i += strlen(map[0]);
398             }
399         }
400     }
401     dst[i] = '\0';
402     dst_term[j] = '\0';
403     *src = s0;
404     
405     return i;
406 }
407
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410                     char *dst, int space_split, char *dst_term)
411 {
412     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
413                     dst_term);
414 }
415
416
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419                     const char **src, char *dst, int space_split,
420                     char *dst_term)
421 {
422     const char *s0;
423     const char **map;
424     int i = 0;
425     int j = 0;
426
427     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
428         return 0;
429     s0 = *src;
430     while (*s0)
431     {
432         if (*s0 == '?')
433         {
434             dst_term[j++] = *s0++;
435             if (*s0 >= '0' && *s0 <= '9')
436             {
437                 int limit = 0;
438                 while (*s0 >= '0' && *s0 <= '9')
439                 {
440                     limit = limit * 10 + (*s0 - '0');
441                     dst_term[j++] = *s0++;
442                 }
443                 if (limit > 20)
444                     limit = 20;
445                 while (--limit >= 0)
446                 {
447                     dst[i++] = '.';
448                     dst[i++] = '?';
449                 }
450             }
451             else
452             {
453                 dst[i++] = '.';
454                 dst[i++] = '*';
455             }
456         }
457         else if (*s0 == '*')
458         {
459             dst[i++] = '.';
460             dst[i++] = '*';
461             dst_term[j++] = *s0++;
462         }
463         else if (*s0 == '#')
464         {
465             dst[i++] = '.';
466             dst_term[j++] = *s0++;
467         }
468         else
469         {
470             const char *s1 = s0;
471             int q_map_match = 0;
472             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
473                                     &q_map_match);
474             if (space_split && **map == *CHR_SPACE)
475                 break;
476
477             /* add non-space char */
478             memcpy(dst_term+j, s1, s0 - s1);
479             j += (s0 - s1);
480             if (!q_map_match)
481             {
482                 while (s1 < s0)
483                 {
484                     if (strchr(REGEX_CHARS, *s1))
485                         dst[i++] = '\\';
486                     dst[i++] = *s1++;
487                 }
488             }
489             else
490             {
491                 char tmpbuf[80];
492                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
493                 
494                 strcpy(dst + i, map[0]);
495                 i += strlen(map[0]);
496             }
497         }
498     }
499     dst[i] = '\0';
500     dst_term[j++] = '\0';
501     *src = s0;
502     return i;
503 }
504
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507                     const char **src, char *dst, int space_split,
508                     char *dst_term, int right_truncate)
509 {
510     const char *s0;
511     const char **map;
512     int i = 0;
513     int j = 0;
514
515     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
516         return 0;
517     s0 = *src;
518     while (*s0)
519     {
520         if (*s0 == '*')
521         {
522             dst[i++] = '.';
523             dst[i++] = '*';
524             dst_term[j++] = *s0++;
525         }
526         else if (*s0 == '!')
527         {
528             dst[i++] = '.';
529             dst_term[j++] = *s0++;
530         }
531         else
532         {
533             const char *s1 = s0;
534             int q_map_match = 0;
535             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
536                                     &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             /* add non-space char */
541             memcpy(dst_term+j, s1, s0 - s1);
542             j += (s0 - s1);
543             if (!q_map_match)
544             {
545                 while (s1 < s0)
546                 {
547                     if (strchr(REGEX_CHARS, *s1))
548                         dst[i++] = '\\';
549                     dst[i++] = *s1++;
550                 }
551             }
552             else
553             {
554                 char tmpbuf[80];
555                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
556                 
557                 strcpy(dst + i, map[0]);
558                 i += strlen(map[0]);
559             }
560         }
561     }
562     if (right_truncate)
563     {
564         dst[i++] = '.';
565         dst[i++] = '*';
566     }
567     dst[i] = '\0';
568     
569     dst_term[j++] = '\0';
570     *src = s0;
571     return i;
572 }
573
574
575 /* gen_regular_rel - generate regular expression from relation
576  *  val:     border value (inclusive)
577  *  islt:    1 if <=; 0 if >=.
578  */
579 static void gen_regular_rel(char *dst, int val, int islt)
580 {
581     int dst_p;
582     int w, d, i;
583     int pos = 0;
584     char numstr[20];
585
586     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
587     if (val >= 0)
588     {
589         if (islt)
590             strcpy(dst, "(-[0-9]+|(");
591         else
592             strcpy(dst, "((");
593     } 
594     else
595     {
596         if (!islt)
597         {
598             strcpy(dst, "([0-9]+|-(");
599             dst_p = strlen(dst);
600             islt = 1;
601         }
602         else
603         {
604             strcpy(dst, "(-(");
605             islt = 0;
606         }
607         val = -val;
608     }
609     dst_p = strlen(dst);
610     sprintf(numstr, "%d", val);
611     for (w = strlen(numstr); --w >= 0; pos++)
612     {
613         d = numstr[w];
614         if (pos > 0)
615         {
616             if (islt)
617             {
618                 if (d == '0')
619                     continue;
620                 d--;
621             } 
622             else
623             {
624                 if (d == '9')
625                     continue;
626                 d++;
627             }
628         }
629         
630         strcpy(dst + dst_p, numstr);
631         dst_p = strlen(dst) - pos - 1;
632
633         if (islt)
634         {
635             if (d != '0')
636             {
637                 dst[dst_p++] = '[';
638                 dst[dst_p++] = '0';
639                 dst[dst_p++] = '-';
640                 dst[dst_p++] = d;
641                 dst[dst_p++] = ']';
642             }
643             else
644                 dst[dst_p++] = d;
645         }
646         else
647         {
648             if (d != '9')
649             { 
650                 dst[dst_p++] = '[';
651                 dst[dst_p++] = d;
652                 dst[dst_p++] = '-';
653                 dst[dst_p++] = '9';
654                 dst[dst_p++] = ']';
655             }
656             else
657                 dst[dst_p++] = d;
658         }
659         for (i = 0; i<pos; i++)
660         {
661             dst[dst_p++] = '[';
662             dst[dst_p++] = '0';
663             dst[dst_p++] = '-';
664             dst[dst_p++] = '9';
665             dst[dst_p++] = ']';
666         }
667         dst[dst_p++] = '|';
668     }
669     dst[dst_p] = '\0';
670     if (islt)
671     {
672         /* match everything less than 10^(pos-1) */
673         strcat(dst, "0*");
674         for (i = 1; i<pos; i++)
675             strcat(dst, "[0-9]?");
676     }
677     else
678     {
679         /* match everything greater than 10^pos */
680         for (i = 0; i <= pos; i++)
681             strcat(dst, "[0-9]");
682         strcat(dst, "[0-9]*");
683     }
684     strcat(dst, "))");
685 }
686
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
688 {
689     if (src[*indx] == '\\')
690         *(*term_p)++ = src[(*indx)++];
691     *(*term_p)++ = src[(*indx)++];
692 }
693
694 /*
695  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
696  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
698  *              ([^-a].*|a[^-b].*|ab[c-].*)
699  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*)
701  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
702  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
703  */
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705                            const char **term_sub, char *term_dict,
706                            oid_value attributeSet,
707                            int reg_type, int space_split, char *term_dst,
708                            int *error_code)
709 {
710     AttrType relation;
711     int relation_value;
712     int i;
713     char *term_tmp = term_dict + strlen(term_dict);
714     char term_component[2*IT_MAX_WORD+20];
715
716     attr_init_APT(&relation, zapt, 2);
717     relation_value = attr_find(&relation, NULL);
718
719     *error_code = 0;
720     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721     switch (relation_value)
722     {
723     case 1:
724         if (!term_100(zh->reg->zebra_maps, reg_type,
725                       term_sub, term_component,
726                       space_split, term_dst))
727             return 0;
728         yaz_log(log_level_rpn, "Relation <");
729         
730         *term_tmp++ = '(';
731         for (i = 0; term_component[i]; )
732         {
733             int j = 0;
734
735             if (i)
736                 *term_tmp++ = '|';
737             while (j < i)
738                 string_rel_add_char(&term_tmp, term_component, &j);
739
740             *term_tmp++ = '[';
741
742             *term_tmp++ = '^';
743
744             *term_tmp++ = 1;
745             *term_tmp++ = FIRST_IN_FIELD_CHAR;
746
747             string_rel_add_char(&term_tmp, term_component, &i);
748             *term_tmp++ = '-';
749
750             *term_tmp++ = ']';
751             *term_tmp++ = '.';
752             *term_tmp++ = '*';
753
754             if ((term_tmp - term_dict) > IT_MAX_WORD)
755                 break;
756         }
757         *term_tmp++ = ')';
758         *term_tmp = '\0';
759         yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
760         break;
761     case 2:
762         if (!term_100(zh->reg->zebra_maps, reg_type,
763                       term_sub, term_component,
764                       space_split, term_dst))
765             return 0;
766         yaz_log(log_level_rpn, "Relation <=");
767
768         *term_tmp++ = '(';
769         for (i = 0; term_component[i]; )
770         {
771             int j = 0;
772
773             while (j < i)
774                 string_rel_add_char(&term_tmp, term_component, &j);
775             *term_tmp++ = '[';
776
777             *term_tmp++ = '^';
778
779             *term_tmp++ = 1;
780             *term_tmp++ = FIRST_IN_FIELD_CHAR;
781
782             string_rel_add_char(&term_tmp, term_component, &i);
783             *term_tmp++ = '-';
784
785             *term_tmp++ = ']';
786             *term_tmp++ = '.';
787             *term_tmp++ = '*';
788
789             *term_tmp++ = '|';
790
791             if ((term_tmp - term_dict) > IT_MAX_WORD)
792                 break;
793         }
794         for (i = 0; term_component[i]; )
795             string_rel_add_char(&term_tmp, term_component, &i);
796         *term_tmp++ = ')';
797         *term_tmp = '\0';
798         break;
799     case 5:
800         if (!term_100 (zh->reg->zebra_maps, reg_type,
801                        term_sub, term_component, space_split, term_dst))
802             return 0;
803         yaz_log(log_level_rpn, "Relation >");
804
805         *term_tmp++ = '(';
806         for (i = 0; term_component[i];)
807         {
808             int j = 0;
809
810             while (j < i)
811                 string_rel_add_char(&term_tmp, term_component, &j);
812             *term_tmp++ = '[';
813             
814             *term_tmp++ = '^';
815             *term_tmp++ = '-';
816             string_rel_add_char(&term_tmp, term_component, &i);
817
818             *term_tmp++ = ']';
819             *term_tmp++ = '.';
820             *term_tmp++ = '*';
821
822             *term_tmp++ = '|';
823
824             if ((term_tmp - term_dict) > IT_MAX_WORD)
825                 break;
826         }
827         for (i = 0; term_component[i];)
828             string_rel_add_char(&term_tmp, term_component, &i);
829         *term_tmp++ = '.';
830         *term_tmp++ = '+';
831         *term_tmp++ = ')';
832         *term_tmp = '\0';
833         break;
834     case 4:
835         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836                       term_component, space_split, term_dst))
837             return 0;
838         yaz_log(log_level_rpn, "Relation >=");
839
840         *term_tmp++ = '(';
841         for (i = 0; term_component[i];)
842         {
843             int j = 0;
844
845             if (i)
846                 *term_tmp++ = '|';
847             while (j < i)
848                 string_rel_add_char(&term_tmp, term_component, &j);
849             *term_tmp++ = '[';
850
851             if (term_component[i+1])
852             {
853                 *term_tmp++ = '^';
854                 *term_tmp++ = '-';
855                 string_rel_add_char(&term_tmp, term_component, &i);
856             }
857             else
858             {
859                 string_rel_add_char(&term_tmp, term_component, &i);
860                 *term_tmp++ = '-';
861             }
862             *term_tmp++ = ']';
863             *term_tmp++ = '.';
864             *term_tmp++ = '*';
865
866             if ((term_tmp - term_dict) > IT_MAX_WORD)
867                 break;
868         }
869         *term_tmp++ = ')';
870         *term_tmp = '\0';
871         break;
872     case 3:
873     case 102:
874     case -1:
875         if (!**term_sub)
876             return 1;
877         yaz_log(log_level_rpn, "Relation =");
878         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879                       term_component, space_split, term_dst))
880             return 0;
881         strcat(term_tmp, "(");
882         strcat(term_tmp, term_component);
883         strcat(term_tmp, ")");
884         break;
885     case 103:
886         yaz_log(log_level_rpn, "Relation always matches");
887         /* skip to end of term (we don't care what it is) */
888         while (**term_sub != '\0')
889             (*term_sub)++;
890         break;
891     default:
892         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
893         return 0;
894     }
895     return 1;
896 }
897
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899                              const char **term_sub, 
900                              oid_value attributeSet, NMEM stream,
901                              struct grep_info *grep_info,
902                              int reg_type, int complete_flag,
903                              int num_bases, char **basenames,
904                              char *term_dst,
905                              const char *xpath_use,
906                              struct ord_list **ol);
907
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909                                  Z_AttributesPlusTerm *zapt,
910                                  zint *hits_limit_value,
911                                  const char **term_ref_id_str,
912                                  NMEM nmem)
913 {
914     AttrType term_ref_id_attr;
915     AttrType hits_limit_attr;
916     int term_ref_id_int;
917  
918     attr_init_APT(&hits_limit_attr, zapt, 11);
919     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
920
921     attr_init_APT(&term_ref_id_attr, zapt, 10);
922     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923     if (term_ref_id_int >= 0)
924     {
925         char *res = nmem_malloc(nmem, 20);
926         sprintf(res, "%d", term_ref_id_int);
927         *term_ref_id_str = res;
928     }
929
930     /* no limit given ? */
931     if (*hits_limit_value == -1)
932     {
933         if (*term_ref_id_str)
934         {
935             /* use global if term_ref is present */
936             *hits_limit_value = zh->approx_limit;
937         }
938         else
939         {
940             /* no counting if term_ref is not present */
941             *hits_limit_value = 0;
942         }
943     }
944     else if (*hits_limit_value == 0)
945     {
946         /* 0 is the same as global limit */
947         *hits_limit_value = zh->approx_limit;
948     }
949     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950             *term_ref_id_str ? *term_ref_id_str : "none",
951             *hits_limit_value);
952     return ZEBRA_OK;
953 }
954
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956                             Z_AttributesPlusTerm *zapt,
957                             const char **term_sub, 
958                             oid_value attributeSet, NMEM stream,
959                             struct grep_info *grep_info,
960                             int reg_type, int complete_flag,
961                             int num_bases, char **basenames,
962                             char *term_dst,
963                             const char *rank_type, 
964                             const char *xpath_use,
965                             NMEM rset_nmem,
966                             RSET *rset,
967                             struct rset_key_control *kc)
968 {
969     ZEBRA_RES res;
970     struct ord_list *ol;
971     zint hits_limit_value;
972     const char *term_ref_id_str = 0;
973     *rset = 0;
974
975     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976     grep_info->isam_p_indx = 0;
977     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978                       reg_type, complete_flag, num_bases, basenames,
979                       term_dst, xpath_use, &ol);
980     if (res != ZEBRA_OK)
981         return res;
982     if (!*term_sub)  /* no more terms ? */
983         return res;
984     yaz_log(log_level_rpn, "term: %s", term_dst);
985     *rset = rset_trunc(zh, grep_info->isam_p_buf,
986                        grep_info->isam_p_indx, term_dst,
987                        strlen(term_dst), rank_type, 1 /* preserve pos */,
988                        zapt->term->which, rset_nmem,
989                        kc, kc->scope, ol, reg_type, hits_limit_value,
990                        term_ref_id_str);
991     if (!*rset)
992         return ZEBRA_FAIL;
993     return ZEBRA_OK;
994 }
995
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997                              const char **term_sub, 
998                              oid_value attributeSet, NMEM stream,
999                              struct grep_info *grep_info,
1000                              int reg_type, int complete_flag,
1001                              int num_bases, char **basenames,
1002                              char *term_dst,
1003                              const char *xpath_use,
1004                              struct ord_list **ol)
1005 {
1006     char term_dict[2*IT_MAX_WORD+4000];
1007     int j, r, base_no;
1008     AttrType truncation;
1009     int truncation_value;
1010     const char *termp;
1011     struct rpn_char_map_info rcmi;
1012     int space_split = complete_flag ? 0 : 1;
1013
1014     int bases_ok = 0;     /* no of databases with OK attribute */
1015
1016     *ol = ord_list_create(stream);
1017
1018     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019     attr_init_APT(&truncation, zapt, 5);
1020     truncation_value = attr_find(&truncation, NULL);
1021     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1022
1023     for (base_no = 0; base_no < num_bases; base_no++)
1024     {
1025         int ord = -1;
1026         int regex_range = 0;
1027         int max_pos, prefix_len = 0;
1028         int relation_error;
1029         char ord_buf[32];
1030         int ord_len, i;
1031
1032         termp = *term_sub; /* start of term for each database */
1033
1034         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1035         {
1036             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037                            basenames[base_no]);
1038             return ZEBRA_FAIL;
1039         }
1040         
1041         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042                               attributeSet, &ord) != ZEBRA_OK)
1043             continue;
1044
1045         bases_ok++;
1046
1047         *ol = ord_list_append(stream, *ol, ord);
1048         ord_len = key_SU_encode (ord, ord_buf);
1049         
1050         term_dict[prefix_len++] = '(';
1051         for (i = 0; i<ord_len; i++)
1052         {
1053             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1054             term_dict[prefix_len++] = ord_buf[i];
1055         }
1056         term_dict[prefix_len++] = ')';
1057         term_dict[prefix_len] = '\0';
1058         j = prefix_len;
1059         switch (truncation_value)
1060         {
1061         case -1:         /* not specified */
1062         case 100:        /* do not truncate */
1063             if (!string_relation(zh, zapt, &termp, term_dict,
1064                                  attributeSet,
1065                                  reg_type, space_split, term_dst,
1066                                  &relation_error))
1067             {
1068                 if (relation_error)
1069                 {
1070                     zebra_setError(zh, relation_error, 0);
1071                     return ZEBRA_FAIL;
1072                 }
1073                 *term_sub = 0;
1074                 return ZEBRA_OK;
1075             }
1076             break;
1077         case 1:          /* right truncation */
1078             term_dict[j++] = '(';
1079             if (!term_100(zh->reg->zebra_maps, reg_type,
1080                           &termp, term_dict + j, space_split, term_dst))
1081             {
1082                 *term_sub = 0;
1083                 return ZEBRA_OK;
1084             }
1085             strcat(term_dict, ".*)");
1086             break;
1087         case 2:          /* keft truncation */
1088             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089             if (!term_100(zh->reg->zebra_maps, reg_type,
1090                           &termp, term_dict + j, space_split, term_dst))
1091             {
1092                 *term_sub = 0;
1093                 return ZEBRA_OK;
1094             }
1095             strcat(term_dict, ")");
1096             break;
1097         case 3:          /* left&right truncation */
1098             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099             if (!term_100(zh->reg->zebra_maps, reg_type,
1100                           &termp, term_dict + j, space_split, term_dst))
1101             {
1102                 *term_sub = 0;
1103                 return ZEBRA_OK;
1104             }
1105             strcat(term_dict, ".*)");
1106             break;
1107         case 101:        /* process # in term */
1108             term_dict[j++] = '(';
1109             if (!term_101(zh->reg->zebra_maps, reg_type,
1110                           &termp, term_dict + j, space_split, term_dst))
1111             {
1112                 *term_sub = 0;
1113                 return ZEBRA_OK;
1114             }
1115             strcat(term_dict, ")");
1116             break;
1117         case 102:        /* Regexp-1 */
1118             term_dict[j++] = '(';
1119             if (!term_102(zh->reg->zebra_maps, reg_type,
1120                           &termp, term_dict + j, space_split, term_dst))
1121             {
1122                 *term_sub = 0;
1123                 return ZEBRA_OK;
1124             }
1125             strcat(term_dict, ")");
1126             break;
1127         case 103:       /* Regexp-2 */
1128             regex_range = 1;
1129             term_dict[j++] = '(';
1130             if (!term_103(zh->reg->zebra_maps, reg_type,
1131                           &termp, term_dict + j, &regex_range,
1132                           space_split, term_dst))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             strcat(term_dict, ")");
1138             break;
1139         case 104:        /* process # and ! in term */
1140             term_dict[j++] = '(';
1141             if (!term_104(zh->reg->zebra_maps, reg_type,
1142                           &termp, term_dict + j, space_split, term_dst))
1143             {
1144                 *term_sub = 0;
1145                 return ZEBRA_OK;
1146             }
1147             strcat(term_dict, ")");
1148             break;
1149         case 105:        /* process * and ! in term */
1150             term_dict[j++] = '(';
1151             if (!term_105(zh->reg->zebra_maps, reg_type,
1152                           &termp, term_dict + j, space_split, term_dst, 1))
1153             {
1154                 *term_sub = 0;
1155                 return ZEBRA_OK;
1156             }
1157             strcat(term_dict, ")");
1158             break;
1159         case 106:        /* process * and ! in term */
1160             term_dict[j++] = '(';
1161             if (!term_105(zh->reg->zebra_maps, reg_type,
1162                           &termp, term_dict + j, space_split, term_dst, 0))
1163             {
1164                 *term_sub = 0;
1165                 return ZEBRA_OK;
1166             }
1167             strcat(term_dict, ")");
1168             break;
1169         default:
1170             zebra_setError_zint(zh,
1171                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1172                                 truncation_value);
1173             return ZEBRA_FAIL;
1174         }
1175         if (1)
1176         {
1177             char buf[80];
1178             const char *input = term_dict + prefix_len;
1179             esc_str(buf, sizeof(buf), input, strlen(input));
1180         }
1181         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183                              grep_info, &max_pos, 
1184                              ord_len /* number of "exact" chars */,
1185                              grep_handle);
1186         if (r)
1187             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1188     }
1189     if (!bases_ok)
1190         return ZEBRA_FAIL;
1191     *term_sub = termp;
1192     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1193     return ZEBRA_OK;
1194 }
1195
1196
1197
1198 static void grep_info_delete(struct grep_info *grep_info)
1199 {
1200 #ifdef TERM_COUNT
1201     xfree(grep_info->term_no);
1202 #endif
1203     xfree(grep_info->isam_p_buf);
1204 }
1205
1206 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1207                                    Z_AttributesPlusTerm *zapt,
1208                                    struct grep_info *grep_info,
1209                                    int reg_type)
1210 {
1211     AttrType termset;
1212     int termset_value_numeric;
1213     const char *termset_value_string;
1214
1215 #ifdef TERM_COUNT
1216     grep_info->term_no = 0;
1217 #endif
1218     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1219     grep_info->isam_p_size = 0;
1220     grep_info->isam_p_buf = NULL;
1221     grep_info->zh = zh;
1222     grep_info->reg_type = reg_type;
1223     grep_info->termset = 0;
1224     if (!zapt)
1225         return ZEBRA_OK;
1226     attr_init_APT(&termset, zapt, 8);
1227     termset_value_numeric =
1228         attr_find_ex(&termset, NULL, &termset_value_string);
1229     if (termset_value_numeric != -1)
1230     {
1231 #if TERMSET_DISABLE
1232         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1233         return ZEBRA_FAIL;
1234 #else
1235         char resname[32];
1236         const char *termset_name = 0;
1237         if (termset_value_numeric != -2)
1238         {
1239     
1240             sprintf(resname, "%d", termset_value_numeric);
1241             termset_name = resname;
1242         }
1243         else
1244             termset_name = termset_value_string;
1245         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1246         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1247         if (!grep_info->termset)
1248         {
1249             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1250             return ZEBRA_FAIL;
1251         }
1252 #endif
1253     }
1254     return ZEBRA_OK;
1255 }
1256                                
1257 /**
1258   \brief Create result set(s) for list of terms
1259   \param zh Zebra Handle
1260   \param zapt Attributes Plust Term (RPN leaf)
1261   \param termz term as used in query but converted to UTF-8
1262   \param attributeSet default attribute set
1263   \param stream memory for result
1264   \param reg_type register type ('w', 'p',..)
1265   \param complete_flag whether it's phrases or not
1266   \param rank_type term flags for ranking
1267   \param xpath_use use attribute for X-Path (-1 for no X-path)
1268   \param num_bases number of databases
1269   \param basenames array of databases
1270   \param rset_nmem memory for result sets
1271   \param result_sets output result set for each term in list (output)
1272   \param num_result_sets number of output result sets
1273   \param kc rset key control to be used for created result sets
1274 */
1275 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1276                                  Z_AttributesPlusTerm *zapt,
1277                                  const char *termz,
1278                                  oid_value attributeSet,
1279                                  NMEM stream,
1280                                  int reg_type, int complete_flag,
1281                                  const char *rank_type,
1282                                  const char *xpath_use,
1283                                  int num_bases, char **basenames, 
1284                                  NMEM rset_nmem,
1285                                  RSET **result_sets, int *num_result_sets,
1286                                  struct rset_key_control *kc)
1287 {
1288     char term_dst[IT_MAX_WORD+1];
1289     struct grep_info grep_info;
1290     const char *termp = termz;
1291     int alloc_sets = 0;
1292
1293     *num_result_sets = 0;
1294     *term_dst = 0;
1295     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1296         return ZEBRA_FAIL;
1297     while(1)
1298     { 
1299         ZEBRA_RES res;
1300
1301         if (alloc_sets == *num_result_sets)
1302         {
1303             int add = 10;
1304             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1305                                               sizeof(*rnew));
1306             if (alloc_sets)
1307                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1308             alloc_sets = alloc_sets + add;
1309             *result_sets = rnew;
1310         }
1311         res = term_trunc(zh, zapt, &termp, attributeSet,
1312                          stream, &grep_info,
1313                          reg_type, complete_flag,
1314                          num_bases, basenames,
1315                          term_dst, rank_type,
1316                          xpath_use, rset_nmem,
1317                          &(*result_sets)[*num_result_sets],
1318                          kc);
1319         if (res != ZEBRA_OK)
1320         {
1321             int i;
1322             for (i = 0; i < *num_result_sets; i++)
1323                 rset_delete((*result_sets)[i]);
1324             grep_info_delete (&grep_info);
1325             return res;
1326         }
1327         if ((*result_sets)[*num_result_sets] == 0)
1328             break;
1329         (*num_result_sets)++;
1330
1331         if (!*termp)
1332             break;
1333     }
1334     grep_info_delete(&grep_info);
1335     return ZEBRA_OK;
1336 }
1337
1338 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1339                                          Z_AttributesPlusTerm *zapt,
1340                                          oid_value attributeSet,
1341                                          int reg_type,
1342                                          int num_bases, char **basenames,
1343                                          NMEM rset_nmem,
1344                                          RSET *rset,
1345                                          struct rset_key_control *kc)
1346 {
1347     RSET *f_set;
1348     int base_no;
1349     int position_value;
1350     int num_sets = 0;
1351     AttrType position;
1352
1353     attr_init_APT(&position, zapt, 3);
1354     position_value = attr_find(&position, NULL);
1355     switch(position_value)
1356     {
1357     case 3:
1358     case -1:
1359         return ZEBRA_OK;
1360     case 1:
1361     case 2:
1362         break;
1363     default:
1364         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1365                             position_value);
1366         return ZEBRA_FAIL;
1367     }
1368
1369     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1370     {
1371         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1372                             position_value);
1373         return ZEBRA_FAIL;
1374     }
1375
1376     if (!zh->reg->isamb && !zh->reg->isamc)
1377     {
1378         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1379                             position_value);
1380         return ZEBRA_FAIL;
1381     }
1382     f_set = xmalloc(sizeof(RSET) * num_bases);
1383     for (base_no = 0; base_no < num_bases; base_no++)
1384     {
1385         int ord = -1;
1386         char ord_buf[32];
1387         char term_dict[100];
1388         int ord_len;
1389         char *val;
1390         ISAM_P isam_p;
1391
1392         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1393         {
1394             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1395                            basenames[base_no]);
1396             return ZEBRA_FAIL;
1397         }
1398         
1399         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1400                               attributeSet, &ord) != ZEBRA_OK)
1401             continue;
1402
1403         ord_len = key_SU_encode (ord, ord_buf);
1404         memcpy(term_dict, ord_buf, ord_len);
1405         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1406         val = dict_lookup(zh->reg->dict, term_dict);
1407         if (!val)
1408             continue;
1409         assert(*val == sizeof(ISAM_P));
1410         memcpy(&isam_p, val+1, sizeof(isam_p));
1411         
1412
1413         if (zh->reg->isamb)
1414             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1415                                                zh->reg->isamb, isam_p, 0);
1416         else if (zh->reg->isamc)
1417             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1418                                                zh->reg->isamc, isam_p, 0);
1419     }
1420     if (num_sets)
1421     {
1422         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1423                                0 /* termid */, num_sets, f_set);
1424     }
1425     xfree(f_set);
1426     return ZEBRA_OK;
1427 }
1428                                          
1429 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1430                                        Z_AttributesPlusTerm *zapt,
1431                                        const char *termz_org,
1432                                        oid_value attributeSet,
1433                                        NMEM stream,
1434                                        int reg_type, int complete_flag,
1435                                        const char *rank_type,
1436                                        const char *xpath_use,
1437                                        int num_bases, char **basenames, 
1438                                        NMEM rset_nmem,
1439                                        RSET *rset,
1440                                        struct rset_key_control *kc)
1441 {
1442     RSET *result_sets = 0;
1443     int num_result_sets = 0;
1444     ZEBRA_RES res =
1445         term_list_trunc(zh, zapt, termz_org, attributeSet,
1446                         stream, reg_type, complete_flag,
1447                         rank_type, xpath_use,
1448                         num_bases, basenames,
1449                         rset_nmem,
1450                         &result_sets, &num_result_sets, kc);
1451
1452     if (res != ZEBRA_OK)
1453         return res;
1454
1455     if (num_result_sets > 0)
1456     {
1457         RSET first_set = 0;
1458         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1459                                       reg_type,
1460                                       num_bases, basenames,
1461                                       rset_nmem, &first_set,
1462                                       kc);
1463         if (res != ZEBRA_OK)
1464             return res;
1465         if (first_set)
1466         {
1467             RSET *nsets = nmem_malloc(stream,
1468                                       sizeof(RSET) * (num_result_sets+1));
1469             nsets[0] = first_set;
1470             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1471             result_sets = nsets;
1472             num_result_sets++;
1473         }
1474     }
1475     if (num_result_sets == 0)
1476         *rset = rset_create_null(rset_nmem, kc, 0); 
1477     else if (num_result_sets == 1)
1478         *rset = result_sets[0];
1479     else
1480         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1481                                  num_result_sets, result_sets,
1482                                  1 /* ordered */, 0 /* exclusion */,
1483                                  3 /* relation */, 1 /* distance */);
1484     if (!*rset)
1485         return ZEBRA_FAIL;
1486     return ZEBRA_OK;
1487 }
1488
1489 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1490                                         Z_AttributesPlusTerm *zapt,
1491                                         const char *termz_org,
1492                                         oid_value attributeSet,
1493                                         NMEM stream,
1494                                         int reg_type, int complete_flag,
1495                                         const char *rank_type,
1496                                         const char *xpath_use,
1497                                         int num_bases, char **basenames,
1498                                         NMEM rset_nmem,
1499                                         RSET *rset,
1500                                         struct rset_key_control *kc)
1501 {
1502     RSET *result_sets = 0;
1503     int num_result_sets = 0;
1504     int i;
1505     ZEBRA_RES res =
1506         term_list_trunc(zh, zapt, termz_org, attributeSet,
1507                         stream, reg_type, complete_flag,
1508                         rank_type, xpath_use,
1509                         num_bases, basenames,
1510                         rset_nmem,
1511                         &result_sets, &num_result_sets, kc);
1512     if (res != ZEBRA_OK)
1513         return res;
1514
1515     for (i = 0; i<num_result_sets; i++)
1516     {
1517         RSET first_set = 0;
1518         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1519                                       reg_type,
1520                                       num_bases, basenames,
1521                                       rset_nmem, &first_set,
1522                                       kc);
1523         if (res != ZEBRA_OK)
1524         {
1525             for (i = 0; i<num_result_sets; i++)
1526                 rset_delete(result_sets[i]);
1527             return res;
1528         }
1529
1530         if (first_set)
1531         {
1532             RSET tmp_set[2];
1533
1534             tmp_set[0] = first_set;
1535             tmp_set[1] = result_sets[i];
1536             
1537             result_sets[i] = rset_create_prox(
1538                 rset_nmem, kc, kc->scope,
1539                 2, tmp_set,
1540                 1 /* ordered */, 0 /* exclusion */,
1541                 3 /* relation */, 1 /* distance */);
1542         }
1543     }
1544     if (num_result_sets == 0)
1545         *rset = rset_create_null(rset_nmem, kc, 0); 
1546     else if (num_result_sets == 1)
1547         *rset = result_sets[0];
1548     else
1549         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1550                                num_result_sets, result_sets);
1551     if (!*rset)
1552         return ZEBRA_FAIL;
1553     return ZEBRA_OK;
1554 }
1555
1556 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1557                                          Z_AttributesPlusTerm *zapt,
1558                                          const char *termz_org,
1559                                          oid_value attributeSet,
1560                                          NMEM stream,
1561                                          int reg_type, int complete_flag,
1562                                          const char *rank_type, 
1563                                          const char *xpath_use,
1564                                          int num_bases, char **basenames,
1565                                          NMEM rset_nmem,
1566                                          RSET *rset,
1567                                          struct rset_key_control *kc)
1568 {
1569     RSET *result_sets = 0;
1570     int num_result_sets = 0;
1571     int i;
1572     ZEBRA_RES res =
1573         term_list_trunc(zh, zapt, termz_org, attributeSet,
1574                         stream, reg_type, complete_flag,
1575                         rank_type, xpath_use,
1576                         num_bases, basenames,
1577                         rset_nmem,
1578                         &result_sets, &num_result_sets,
1579                         kc);
1580     if (res != ZEBRA_OK)
1581         return res;
1582     for (i = 0; i<num_result_sets; i++)
1583     {
1584         RSET first_set = 0;
1585         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1586                                       reg_type,
1587                                       num_bases, basenames,
1588                                       rset_nmem, &first_set,
1589                                       kc);
1590         if (res != ZEBRA_OK)
1591         {
1592             for (i = 0; i<num_result_sets; i++)
1593                 rset_delete(result_sets[i]);
1594             return res;
1595         }
1596
1597         if (first_set)
1598         {
1599             RSET tmp_set[2];
1600
1601             tmp_set[0] = first_set;
1602             tmp_set[1] = result_sets[i];
1603             
1604             result_sets[i] = rset_create_prox(
1605                 rset_nmem, kc, kc->scope,
1606                 2, tmp_set,
1607                 1 /* ordered */, 0 /* exclusion */,
1608                 3 /* relation */, 1 /* distance */);
1609         }
1610     }
1611
1612
1613     if (num_result_sets == 0)
1614         *rset = rset_create_null(rset_nmem, kc, 0); 
1615     else if (num_result_sets == 1)
1616         *rset = result_sets[0];
1617     else
1618         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1619                                 num_result_sets, result_sets);
1620     if (!*rset)
1621         return ZEBRA_FAIL;
1622     return ZEBRA_OK;
1623 }
1624
1625 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1626                             const char **term_sub,
1627                             char *term_dict,
1628                             oid_value attributeSet,
1629                             struct grep_info *grep_info,
1630                             int *max_pos,
1631                             int reg_type,
1632                             char *term_dst,
1633                             int *error_code)
1634 {
1635     AttrType relation;
1636     int relation_value;
1637     int term_value;
1638     int r;
1639     char *term_tmp = term_dict + strlen(term_dict);
1640
1641     *error_code = 0;
1642     attr_init_APT(&relation, zapt, 2);
1643     relation_value = attr_find(&relation, NULL);
1644
1645     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1646
1647     switch (relation_value)
1648     {
1649     case 1:
1650         yaz_log(log_level_rpn, "Relation <");
1651         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1652                       term_dst))
1653             return 0;
1654         term_value = atoi (term_tmp);
1655         gen_regular_rel(term_tmp, term_value-1, 1);
1656         break;
1657     case 2:
1658         yaz_log(log_level_rpn, "Relation <=");
1659         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1660                       term_dst))
1661             return 0;
1662         term_value = atoi (term_tmp);
1663         gen_regular_rel(term_tmp, term_value, 1);
1664         break;
1665     case 4:
1666         yaz_log(log_level_rpn, "Relation >=");
1667         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1668                       term_dst))
1669             return 0;
1670         term_value = atoi (term_tmp);
1671         gen_regular_rel(term_tmp, term_value, 0);
1672         break;
1673     case 5:
1674         yaz_log(log_level_rpn, "Relation >");
1675         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1676                       term_dst))
1677             return 0;
1678         term_value = atoi (term_tmp);
1679         gen_regular_rel(term_tmp, term_value+1, 0);
1680         break;
1681     case -1:
1682     case 3:
1683         yaz_log(log_level_rpn, "Relation =");
1684         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1685                       term_dst))
1686             return 0;
1687         term_value = atoi (term_tmp);
1688         sprintf(term_tmp, "(0*%d)", term_value);
1689         break;
1690     case 103:
1691         /* term_tmp untouched.. */
1692         while (**term_sub != '\0')
1693             (*term_sub)++;
1694         break;
1695     default:
1696         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1697         return 0;
1698     }
1699     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1700     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1701                           0, grep_handle);
1702     if (r)
1703         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1704     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1705     return 1;
1706 }
1707
1708 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1709                               const char **term_sub, 
1710                               oid_value attributeSet, NMEM stream,
1711                               struct grep_info *grep_info,
1712                               int reg_type, int complete_flag,
1713                               int num_bases, char **basenames,
1714                               char *term_dst, 
1715                               const char *xpath_use,
1716                               struct ord_list **ol)
1717 {
1718     char term_dict[2*IT_MAX_WORD+2];
1719     int base_no;
1720     const char *termp;
1721     struct rpn_char_map_info rcmi;
1722
1723     int bases_ok = 0;     /* no of databases with OK attribute */
1724
1725     *ol = ord_list_create(stream);
1726
1727     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1728
1729     for (base_no = 0; base_no < num_bases; base_no++)
1730     {
1731         int max_pos, prefix_len = 0;
1732         int relation_error = 0;
1733         int ord, ord_len, i;
1734         char ord_buf[32];
1735
1736         termp = *term_sub;
1737
1738         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1739         {
1740             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1741                            basenames[base_no]);
1742             return ZEBRA_FAIL;
1743         }
1744
1745         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1746                               attributeSet, &ord) != ZEBRA_OK)
1747             continue;
1748         bases_ok++;
1749
1750         *ol = ord_list_append(stream, *ol, ord);
1751
1752         ord_len = key_SU_encode (ord, ord_buf);
1753
1754         term_dict[prefix_len++] = '(';
1755         for (i = 0; i < ord_len; i++)
1756         {
1757             term_dict[prefix_len++] = 1;
1758             term_dict[prefix_len++] = ord_buf[i];
1759         }
1760         term_dict[prefix_len++] = ')';
1761         term_dict[prefix_len] = '\0';
1762
1763         if (!numeric_relation(zh, zapt, &termp, term_dict,
1764                               attributeSet, grep_info, &max_pos, reg_type,
1765                               term_dst, &relation_error))
1766         {
1767             if (relation_error)
1768             {
1769                 zebra_setError(zh, relation_error, 0);
1770                 return ZEBRA_FAIL;
1771             }
1772             *term_sub = 0;
1773             return ZEBRA_OK;
1774         }
1775     }
1776     if (!bases_ok)
1777         return ZEBRA_FAIL;
1778     *term_sub = termp;
1779     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1780     return ZEBRA_OK;
1781 }
1782
1783                                  
1784 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1785                                         Z_AttributesPlusTerm *zapt,
1786                                         const char *termz,
1787                                         oid_value attributeSet,
1788                                         NMEM stream,
1789                                         int reg_type, int complete_flag,
1790                                         const char *rank_type, 
1791                                         const char *xpath_use,
1792                                         int num_bases, char **basenames,
1793                                         NMEM rset_nmem,
1794                                         RSET *rset,
1795                                         struct rset_key_control *kc)
1796 {
1797     char term_dst[IT_MAX_WORD+1];
1798     const char *termp = termz;
1799     RSET *result_sets = 0;
1800     int num_result_sets = 0;
1801     ZEBRA_RES res;
1802     struct grep_info grep_info;
1803     int alloc_sets = 0;
1804     zint hits_limit_value;
1805     const char *term_ref_id_str = 0;
1806
1807     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1808
1809     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1810     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1811         return ZEBRA_FAIL;
1812     while (1)
1813     { 
1814         struct ord_list *ol;
1815         if (alloc_sets == num_result_sets)
1816         {
1817             int add = 10;
1818             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1819                                               sizeof(*rnew));
1820             if (alloc_sets)
1821                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1822             alloc_sets = alloc_sets + add;
1823             result_sets = rnew;
1824         }
1825         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1826         grep_info.isam_p_indx = 0;
1827         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1828                            reg_type, complete_flag, num_bases, basenames,
1829                            term_dst, xpath_use, &ol);
1830         if (res == ZEBRA_FAIL || termp == 0)
1831             break;
1832         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1833         result_sets[num_result_sets] =
1834             rset_trunc(zh, grep_info.isam_p_buf,
1835                        grep_info.isam_p_indx, term_dst,
1836                        strlen(term_dst), rank_type,
1837                        0 /* preserve position */,
1838                        zapt->term->which, rset_nmem, 
1839                        kc, kc->scope, ol, reg_type,
1840                        hits_limit_value,
1841                        term_ref_id_str);
1842         if (!result_sets[num_result_sets])
1843             break;
1844         num_result_sets++;
1845         if (!*termp)
1846             break;
1847     }
1848     grep_info_delete(&grep_info);
1849
1850     if (res != ZEBRA_OK)
1851         return res;
1852     if (num_result_sets == 0)
1853         *rset = rset_create_null(rset_nmem, kc, 0);
1854     else if (num_result_sets == 1)
1855         *rset = result_sets[0];
1856     else
1857         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1858                                 num_result_sets, result_sets);
1859     if (!*rset)
1860         return ZEBRA_FAIL;
1861     return ZEBRA_OK;
1862 }
1863
1864 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1865                                       Z_AttributesPlusTerm *zapt,
1866                                       const char *termz,
1867                                       oid_value attributeSet,
1868                                       NMEM stream,
1869                                       const char *rank_type, NMEM rset_nmem,
1870                                       RSET *rset,
1871                                       struct rset_key_control *kc)
1872 {
1873     RSFD rsfd;
1874     struct it_key key;
1875     int sys;
1876     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1877                              res_get (zh->res, "setTmpDir"),0 );
1878     rsfd = rset_open(*rset, RSETF_WRITE);
1879     
1880     sys = atoi(termz);
1881     if (sys <= 0)
1882         sys = 1;
1883     key.mem[0] = sys;
1884     key.mem[1] = 1;
1885     key.len = 2;
1886     rset_write (rsfd, &key);
1887     rset_close (rsfd);
1888     return ZEBRA_OK;
1889 }
1890
1891 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1892                                oid_value attributeSet, NMEM stream,
1893                                Z_SortKeySpecList *sort_sequence,
1894                                const char *rank_type,
1895                                NMEM rset_nmem,
1896                                RSET *rset,
1897                                struct rset_key_control *kc)
1898 {
1899     int i;
1900     int sort_relation_value;
1901     AttrType sort_relation_type;
1902     Z_SortKeySpec *sks;
1903     Z_SortKey *sk;
1904     int oid[OID_SIZE];
1905     oident oe;
1906     char termz[20];
1907     
1908     attr_init_APT(&sort_relation_type, zapt, 7);
1909     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1910
1911     if (!sort_sequence->specs)
1912     {
1913         sort_sequence->num_specs = 10;
1914         sort_sequence->specs = (Z_SortKeySpec **)
1915             nmem_malloc(stream, sort_sequence->num_specs *
1916                          sizeof(*sort_sequence->specs));
1917         for (i = 0; i<sort_sequence->num_specs; i++)
1918             sort_sequence->specs[i] = 0;
1919     }
1920     if (zapt->term->which != Z_Term_general)
1921         i = 0;
1922     else
1923         i = atoi_n ((char *) zapt->term->u.general->buf,
1924                     zapt->term->u.general->len);
1925     if (i >= sort_sequence->num_specs)
1926         i = 0;
1927     sprintf(termz, "%d", i);
1928
1929     oe.proto = PROTO_Z3950;
1930     oe.oclass = CLASS_ATTSET;
1931     oe.value = attributeSet;
1932     if (!oid_ent_to_oid (&oe, oid))
1933         return ZEBRA_FAIL;
1934
1935     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1936     sks->sortElement = (Z_SortElement *)
1937         nmem_malloc(stream, sizeof(*sks->sortElement));
1938     sks->sortElement->which = Z_SortElement_generic;
1939     sk = sks->sortElement->u.generic = (Z_SortKey *)
1940         nmem_malloc(stream, sizeof(*sk));
1941     sk->which = Z_SortKey_sortAttributes;
1942     sk->u.sortAttributes = (Z_SortAttributes *)
1943         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1944
1945     sk->u.sortAttributes->id = oid;
1946     sk->u.sortAttributes->list = zapt->attributes;
1947
1948     sks->sortRelation = (int *)
1949         nmem_malloc(stream, sizeof(*sks->sortRelation));
1950     if (sort_relation_value == 1)
1951         *sks->sortRelation = Z_SortKeySpec_ascending;
1952     else if (sort_relation_value == 2)
1953         *sks->sortRelation = Z_SortKeySpec_descending;
1954     else 
1955         *sks->sortRelation = Z_SortKeySpec_ascending;
1956
1957     sks->caseSensitivity = (int *)
1958         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1959     *sks->caseSensitivity = 0;
1960
1961     sks->which = Z_SortKeySpec_null;
1962     sks->u.null = odr_nullval ();
1963     sort_sequence->specs[i] = sks;
1964     *rset = rset_create_null(rset_nmem, kc, 0);
1965     return ZEBRA_OK;
1966 }
1967
1968
1969 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1970                            oid_value attributeSet,
1971                            struct xpath_location_step *xpath, int max,
1972                            NMEM mem)
1973 {
1974     oid_value curAttributeSet = attributeSet;
1975     AttrType use;
1976     const char *use_string = 0;
1977     
1978     attr_init_APT(&use, zapt, 1);
1979     attr_find_ex(&use, &curAttributeSet, &use_string);
1980
1981     if (!use_string || *use_string != '/')
1982         return -1;
1983
1984     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1985 }
1986  
1987                
1988
1989 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1990                         int reg_type, const char *term, 
1991                         const char *xpath_use,
1992                         NMEM rset_nmem,
1993                         struct rset_key_control *kc)
1994 {
1995     RSET rset;
1996     struct grep_info grep_info;
1997     char term_dict[2048];
1998     char ord_buf[32];
1999     int prefix_len = 0;
2000     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2001                                            zinfo_index_category_index,
2002                                            reg_type,
2003                                            xpath_use);
2004     int ord_len, i, r, max_pos;
2005     int term_type = Z_Term_characterString;
2006     const char *flags = "void";
2007
2008     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2009         return rset_create_null(rset_nmem, kc, 0);
2010     
2011     if (ord < 0)
2012         return rset_create_null(rset_nmem, kc, 0);
2013     if (prefix_len)
2014         term_dict[prefix_len++] = '|';
2015     else
2016         term_dict[prefix_len++] = '(';
2017     
2018     ord_len = key_SU_encode (ord, ord_buf);
2019     for (i = 0; i<ord_len; i++)
2020     {
2021         term_dict[prefix_len++] = 1;
2022         term_dict[prefix_len++] = ord_buf[i];
2023     }
2024     term_dict[prefix_len++] = ')';
2025     strcpy(term_dict+prefix_len, term);
2026     
2027     grep_info.isam_p_indx = 0;
2028     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2029                           &grep_info, &max_pos, 0, grep_handle);
2030     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2031              grep_info.isam_p_indx);
2032     rset = rset_trunc(zh, grep_info.isam_p_buf,
2033                       grep_info.isam_p_indx, term, strlen(term),
2034                       flags, 1, term_type,rset_nmem,
2035                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2036                       0 /* term_ref_id_str */);
2037     grep_info_delete(&grep_info);
2038     return rset;
2039 }
2040
2041 static
2042 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2043                            int num_bases, char **basenames,
2044                            NMEM stream, const char *rank_type, RSET rset,
2045                            int xpath_len, struct xpath_location_step *xpath,
2046                            NMEM rset_nmem,
2047                            RSET *rset_out,
2048                            struct rset_key_control *kc)
2049 {
2050     int base_no;
2051     int i;
2052     int always_matches = rset ? 0 : 1;
2053
2054     if (xpath_len < 0)
2055     {
2056         *rset_out = rset;
2057         return ZEBRA_OK;
2058     }
2059
2060     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2061     for (i = 0; i<xpath_len; i++)
2062     {
2063         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2064
2065     }
2066
2067     /*
2068       //a    ->    a/.*
2069       //a/b  ->    b/a/.*
2070       /a     ->    a/
2071       /a/b   ->    b/a/
2072
2073       /      ->    none
2074
2075    a[@attr = value]/b[@other = othervalue]
2076
2077  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2078  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2079  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2080  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2081  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2082  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2083       
2084     */
2085
2086     dict_grep_cmap (zh->reg->dict, 0, 0);
2087
2088     for (base_no = 0; base_no < num_bases; base_no++)
2089     {
2090         int level = xpath_len;
2091         int first_path = 1;
2092         
2093         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2094         {
2095             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2096                            basenames[base_no]);
2097             *rset_out = rset;
2098             return ZEBRA_FAIL;
2099         }
2100         while (--level >= 0)
2101         {
2102             WRBUF xpath_rev = wrbuf_alloc();
2103             int i;
2104             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2105
2106             for (i = level; i >= 1; --i)
2107             {
2108                 const char *cp = xpath[i].part;
2109                 if (*cp)
2110                 {
2111                     for (; *cp; cp++)
2112                     {
2113                         if (*cp == '*')
2114                             wrbuf_puts(xpath_rev, "[^/]*");
2115                         else if (*cp == ' ')
2116                             wrbuf_puts(xpath_rev, "\001 ");
2117                         else
2118                             wrbuf_putc(xpath_rev, *cp);
2119
2120                         /* wrbuf_putc does not null-terminate , but
2121                            wrbuf_puts below ensures it does.. so xpath_rev
2122                            is OK iff length is > 0 */
2123                     }
2124                     wrbuf_puts(xpath_rev, "/");
2125                 }
2126                 else if (i == 1)  /* // case */
2127                     wrbuf_puts(xpath_rev, ".*");
2128             }
2129             if (xpath[level].predicate &&
2130                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2131                 xpath[level].predicate->u.relation.name[0])
2132             {
2133                 WRBUF wbuf = wrbuf_alloc();
2134                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2135                 if (xpath[level].predicate->u.relation.value)
2136                 {
2137                     const char *cp = xpath[level].predicate->u.relation.value;
2138                     wrbuf_putc(wbuf, '=');
2139                     
2140                     while (*cp)
2141                     {
2142                         if (strchr(REGEX_CHARS, *cp))
2143                             wrbuf_putc(wbuf, '\\');
2144                         wrbuf_putc(wbuf, *cp);
2145                         cp++;
2146                     }
2147                 }
2148                 wrbuf_puts(wbuf, "");
2149                 rset_attr = xpath_trunc(
2150                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2151                     rset_nmem, kc);
2152                 wrbuf_free(wbuf, 1);
2153             } 
2154             else 
2155             {
2156                 if (!first_path)
2157                 {
2158                     wrbuf_free(xpath_rev, 1);
2159                     continue;
2160                 }
2161             }
2162             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2163                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2164             if (wrbuf_len(xpath_rev))
2165             {
2166                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2167                                              wrbuf_buf(xpath_rev),
2168                                              ZEBRA_XPATH_ELM_BEGIN, 
2169                                              rset_nmem, kc);
2170                 if (always_matches)
2171                     rset = rset_start_tag;
2172                 else
2173                 {
2174                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2175                                                wrbuf_buf(xpath_rev),
2176                                                ZEBRA_XPATH_ELM_END, 
2177                                                rset_nmem, kc);
2178                     
2179                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2180                                                rset_start_tag, rset,
2181                                                rset_end_tag, rset_attr);
2182                 }
2183             }
2184             wrbuf_free(xpath_rev, 1);
2185             first_path = 0;
2186         }
2187     }
2188     *rset_out = rset;
2189     return ZEBRA_OK;
2190 }
2191
2192 #define MAX_XPATH_STEPS 10
2193
2194 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2195                                 oid_value attributeSet, NMEM stream,
2196                                 Z_SortKeySpecList *sort_sequence,
2197                                 int num_bases, char **basenames, 
2198                                 NMEM rset_nmem,
2199                                 RSET *rset,
2200                                 struct rset_key_control *kc)
2201 {
2202     ZEBRA_RES res = ZEBRA_OK;
2203     unsigned reg_id;
2204     char *search_type = NULL;
2205     char rank_type[128];
2206     int complete_flag;
2207     int sort_flag;
2208     char termz[IT_MAX_WORD+1];
2209     int xpath_len;
2210     const char *xpath_use = 0;
2211     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2212
2213     if (!log_level_set)
2214     {
2215         log_level_rpn = yaz_log_module_level("rpn");
2216         log_level_set = 1;
2217     }
2218     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2219                     rank_type, &complete_flag, &sort_flag);
2220     
2221     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2222     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2223     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2224     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2225
2226     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2227         return ZEBRA_FAIL;
2228
2229     if (sort_flag)
2230         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2231                              rank_type, rset_nmem, rset, kc);
2232     /* consider if an X-Path query is used */
2233     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2234                                 xpath, MAX_XPATH_STEPS, stream);
2235     if (xpath_len >= 0)
2236     {
2237         if (xpath[xpath_len-1].part[0] == '@') 
2238             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2239         else
2240             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2241
2242         if (1)
2243         {
2244             AttrType relation;
2245             int relation_value;
2246
2247             attr_init_APT(&relation, zapt, 2);
2248             relation_value = attr_find(&relation, NULL);
2249
2250             if (relation_value == 103) /* alwaysmatches */
2251             {
2252                 *rset = 0; /* signal no "term" set */
2253                 return rpn_search_xpath(zh, num_bases, basenames,
2254                                         stream, rank_type, *rset, 
2255                                         xpath_len, xpath, rset_nmem, rset, kc);
2256             }
2257         }
2258     }
2259
2260     /* search using one of the various search type strategies
2261        termz is our UTF-8 search term
2262        attributeSet is top-level default attribute set 
2263        stream is ODR for search
2264        reg_id is the register type
2265        complete_flag is 1 for complete subfield, 0 for incomplete
2266        xpath_use is use-attribute to be used for X-Path search, 0 for none
2267     */
2268     if (!strcmp(search_type, "phrase"))
2269     {
2270         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2271                                     reg_id, complete_flag, rank_type,
2272                                     xpath_use,
2273                                     num_bases, basenames, rset_nmem,
2274                                     rset, kc);
2275     }
2276     else if (!strcmp(search_type, "and-list"))
2277     {
2278         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2279                                       reg_id, complete_flag, rank_type,
2280                                       xpath_use,
2281                                       num_bases, basenames, rset_nmem,
2282                                       rset, kc);
2283     }
2284     else if (!strcmp(search_type, "or-list"))
2285     {
2286         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2287                                      reg_id, complete_flag, rank_type,
2288                                      xpath_use,
2289                                      num_bases, basenames, rset_nmem,
2290                                      rset, kc);
2291     }
2292     else if (!strcmp(search_type, "local"))
2293     {
2294         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2295                                    rank_type, rset_nmem, rset, kc);
2296     }
2297     else if (!strcmp(search_type, "numeric"))
2298     {
2299         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2300                                      reg_id, complete_flag, rank_type,
2301                                      xpath_use,
2302                                      num_bases, basenames, rset_nmem,
2303                                      rset, kc);
2304     }
2305     else
2306     {
2307         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2308         res = ZEBRA_FAIL;
2309     }
2310     if (res != ZEBRA_OK)
2311         return res;
2312     if (!*rset)
2313         return ZEBRA_FAIL;
2314     return rpn_search_xpath(zh, num_bases, basenames,
2315                             stream, rank_type, *rset, 
2316                             xpath_len, xpath, rset_nmem, rset, kc);
2317 }
2318
2319 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2320                                       oid_value attributeSet, 
2321                                       NMEM stream, NMEM rset_nmem,
2322                                       Z_SortKeySpecList *sort_sequence,
2323                                       int num_bases, char **basenames,
2324                                       RSET **result_sets, int *num_result_sets,
2325                                       Z_Operator *parent_op,
2326                                       struct rset_key_control *kc);
2327
2328 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2329                                    zint *approx_limit)
2330 {
2331     ZEBRA_RES res = ZEBRA_OK;
2332     if (zs->which == Z_RPNStructure_complex)
2333     {
2334         if (res == ZEBRA_OK)
2335             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2336                                            approx_limit);
2337         if (res == ZEBRA_OK)
2338             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2339                                            approx_limit);
2340     }
2341     else if (zs->which == Z_RPNStructure_simple)
2342     {
2343         if (zs->u.simple->which == Z_Operand_APT)
2344         {
2345             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2346             AttrType global_hits_limit_attr;
2347             int l;
2348             
2349             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2350             
2351             l = attr_find(&global_hits_limit_attr, NULL);
2352             if (l != -1)
2353                 *approx_limit = l;
2354         }
2355     }
2356     return res;
2357 }
2358
2359 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2360                          oid_value attributeSet, 
2361                          NMEM stream, NMEM rset_nmem,
2362                          Z_SortKeySpecList *sort_sequence,
2363                          int num_bases, char **basenames,
2364                          RSET *result_set)
2365 {
2366     RSET *result_sets = 0;
2367     int num_result_sets = 0;
2368     ZEBRA_RES res;
2369     struct rset_key_control *kc = zebra_key_control_create(zh);
2370
2371     res = rpn_search_structure(zh, zs, attributeSet,
2372                                stream, rset_nmem,
2373                                sort_sequence, 
2374                                num_bases, basenames,
2375                                &result_sets, &num_result_sets,
2376                                0 /* no parent op */,
2377                                kc);
2378     if (res != ZEBRA_OK)
2379     {
2380         int i;
2381         for (i = 0; i<num_result_sets; i++)
2382             rset_delete(result_sets[i]);
2383         *result_set = 0;
2384     }
2385     else
2386     {
2387         assert(num_result_sets == 1);
2388         assert(result_sets);
2389         assert(*result_sets);
2390         *result_set = *result_sets;
2391     }
2392     (*kc->dec)(kc);
2393     return res;
2394 }
2395
2396 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2397                                oid_value attributeSet, 
2398                                NMEM stream, NMEM rset_nmem,
2399                                Z_SortKeySpecList *sort_sequence,
2400                                int num_bases, char **basenames,
2401                                RSET **result_sets, int *num_result_sets,
2402                                Z_Operator *parent_op,
2403                                struct rset_key_control *kc)
2404 {
2405     *num_result_sets = 0;
2406     if (zs->which == Z_RPNStructure_complex)
2407     {
2408         ZEBRA_RES res;
2409         Z_Operator *zop = zs->u.complex->roperator;
2410         RSET *result_sets_l = 0;
2411         int num_result_sets_l = 0;
2412         RSET *result_sets_r = 0;
2413         int num_result_sets_r = 0;
2414
2415         res = rpn_search_structure(zh, zs->u.complex->s1,
2416                                    attributeSet, stream, rset_nmem,
2417                                    sort_sequence,
2418                                    num_bases, basenames,
2419                                    &result_sets_l, &num_result_sets_l,
2420                                    zop, kc);
2421         if (res != ZEBRA_OK)
2422         {
2423             int i;
2424             for (i = 0; i<num_result_sets_l; i++)
2425                 rset_delete(result_sets_l[i]);
2426             return res;
2427         }
2428         res = rpn_search_structure(zh, zs->u.complex->s2,
2429                                    attributeSet, stream, rset_nmem,
2430                                    sort_sequence,
2431                                    num_bases, basenames,
2432                                    &result_sets_r, &num_result_sets_r,
2433                                    zop, kc);
2434         if (res != ZEBRA_OK)
2435         {
2436             int i;
2437             for (i = 0; i<num_result_sets_l; i++)
2438                 rset_delete(result_sets_l[i]);
2439             for (i = 0; i<num_result_sets_r; i++)
2440                 rset_delete(result_sets_r[i]);
2441             return res;
2442         }
2443
2444         /* make a new list of result for all children */
2445         *num_result_sets = num_result_sets_l + num_result_sets_r;
2446         *result_sets = nmem_malloc(stream, *num_result_sets * 
2447                                    sizeof(**result_sets));
2448         memcpy(*result_sets, result_sets_l, 
2449                num_result_sets_l * sizeof(**result_sets));
2450         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2451                num_result_sets_r * sizeof(**result_sets));
2452
2453         if (!parent_op || parent_op->which != zop->which
2454             || (zop->which != Z_Operator_and &&
2455                 zop->which != Z_Operator_or))
2456         {
2457             /* parent node different from this one (or non-present) */
2458             /* we must combine result sets now */
2459             RSET rset;
2460             switch (zop->which)
2461             {
2462             case Z_Operator_and:
2463                 rset = rset_create_and(rset_nmem, kc,
2464                                        kc->scope,
2465                                        *num_result_sets, *result_sets);
2466                 break;
2467             case Z_Operator_or:
2468                 rset = rset_create_or(rset_nmem, kc,
2469                                       kc->scope, 0, /* termid */
2470                                       *num_result_sets, *result_sets);
2471                 break;
2472             case Z_Operator_and_not:
2473                 rset = rset_create_not(rset_nmem, kc,
2474                                        kc->scope,
2475                                        (*result_sets)[0],
2476                                        (*result_sets)[1]);
2477                 break;
2478             case Z_Operator_prox:
2479                 if (zop->u.prox->which != Z_ProximityOperator_known)
2480                 {
2481                     zebra_setError(zh, 
2482                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2483                                    0);
2484                     return ZEBRA_FAIL;
2485                 }
2486                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2487                 {
2488                     zebra_setError_zint(zh,
2489                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2490                                         *zop->u.prox->u.known);
2491                     return ZEBRA_FAIL;
2492                 }
2493                 else
2494                 {
2495                     rset = rset_create_prox(rset_nmem, kc,
2496                                             kc->scope,
2497                                             *num_result_sets, *result_sets, 
2498                                             *zop->u.prox->ordered,
2499                                             (!zop->u.prox->exclusion ? 
2500                                              0 : *zop->u.prox->exclusion),
2501                                             *zop->u.prox->relationType,
2502                                             *zop->u.prox->distance );
2503                 }
2504                 break;
2505             default:
2506                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2507                 return ZEBRA_FAIL;
2508             }
2509             *num_result_sets = 1;
2510             *result_sets = nmem_malloc(stream, *num_result_sets * 
2511                                        sizeof(**result_sets));
2512             (*result_sets)[0] = rset;
2513         }
2514     }
2515     else if (zs->which == Z_RPNStructure_simple)
2516     {
2517         RSET rset;
2518         ZEBRA_RES res;
2519
2520         if (zs->u.simple->which == Z_Operand_APT)
2521         {
2522             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2523             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2524                                  attributeSet, stream, sort_sequence,
2525                                  num_bases, basenames, rset_nmem, &rset,
2526                                  kc);
2527             if (res != ZEBRA_OK)
2528                 return res;
2529         }
2530         else if (zs->u.simple->which == Z_Operand_resultSetId)
2531         {
2532             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2533             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2534             if (!rset)
2535             {
2536                 zebra_setError(zh, 
2537                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2538                                zs->u.simple->u.resultSetId);
2539                 return ZEBRA_FAIL;
2540             }
2541             rset_dup(rset);
2542         }
2543         else
2544         {
2545             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2546             return ZEBRA_FAIL;
2547         }
2548         *num_result_sets = 1;
2549         *result_sets = nmem_malloc(stream, *num_result_sets * 
2550                                    sizeof(**result_sets));
2551         (*result_sets)[0] = rset;
2552     }
2553     else
2554     {
2555         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2556         return ZEBRA_FAIL;
2557     }
2558     return ZEBRA_OK;
2559 }
2560
2561
2562
2563 /*
2564  * Local variables:
2565  * c-basic-offset: 4
2566  * indent-tabs-mode: nil
2567  * End:
2568  * vim: shiftwidth=4 tabstop=8 expandtab
2569  */
2570