1 /* $Id: retrieve.c,v 1.61 2006-12-18 23:40:07 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36 #include <yaz/diagbib1.h>
40 #define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
42 static int zebra_create_record_stream(ZebraHandle zh,
44 struct ZebraRecStream *stream){
46 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
48 if ((*rec)->size[recInfo_storeData] > 0)
49 zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
50 (*rec)->size[recInfo_storeData]);
56 if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
57 strcpy(full_rep, zh->path_reg);
58 strcat(full_rep, "/");
59 strcat(full_rep, (*rec)->info[recInfo_filename]);
62 strcpy(full_rep, (*rec)->info[recInfo_filename]);
64 if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1){
65 yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
70 zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
77 static int parse_zebra_elem(const char *elem,
78 const char **index, size_t *index_len,
79 const char **type, size_t *type_len)
90 /* verify that '::' is in the beginning of *elem
91 and something more follows */
93 || !(elem +1) || ':' != *(elem +1)
94 || !(elem +2) || '\0' == *(elem +2))
97 /* pick out info from string after '::' */
99 cp = strchr(elem, ':');
101 if (!cp) /* index, no colon, no type */
104 *index_len = strlen(elem);
106 else if (cp[1] == '\0') /* colon, but no following type */
110 else /* index, colon and type */
113 *index_len = cp - elem;
115 *type_len = strlen(cp+1);
122 int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr,
123 const char *elemsetname,
124 oid_value input_format,
125 oid_value *output_format,
126 char **rec_bufp, int *rec_lenp)
128 const char *retrieval_index;
129 size_t retrieval_index_len;
130 const char *retrieval_type;
131 size_t retrieval_type_len;
132 char retrieval_index_cstr[256];
135 /* only accept XML and SUTRS requests */
136 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
138 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
140 *output_format = VAL_NONE;
141 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
144 if (!parse_zebra_elem(elemsetname,
145 &retrieval_index, &retrieval_index_len,
146 &retrieval_type, &retrieval_type_len))
148 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
151 if (retrieval_type_len != 0 && retrieval_type_len != 1)
153 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
155 if (!retrieval_index_len ||
156 retrieval_index_len >= sizeof(retrieval_index_cstr)-1)
158 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
161 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
162 retrieval_index_cstr[retrieval_index_len] = '\0';
164 ord = zebraExplain_lookup_attr_str(zh->reg->zei,
165 zinfo_index_category_sort,
166 (retrieval_type_len == 0 ? -1 :
168 retrieval_index_cstr);
170 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
173 char dst_buf[IT_MAX_WORD];
174 char str[IT_MAX_WORD];
177 const char *string_index = 0;
178 WRBUF wrbuf = wrbuf_alloc();
180 zebra_sort_sysno(zh->reg->sort_index, sysno);
181 zebra_sort_type(zh->reg->sort_index, ord);
182 zebra_sort_read(zh->reg->sort_index, str);
184 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index);
186 zebra_term_untrans(zh, index_type, dst_buf, str);
189 if (input_format == VAL_TEXT_XML)
191 *output_format = VAL_TEXT_XML;
192 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
193 " sysno=\"" ZINT_FORMAT "\""
194 " set=\"zebra::sort%s/\">\n",
197 wrbuf_printf(wrbuf, " <sort name=\"%s\"",
199 wrbuf_printf(wrbuf, " type=\"%c\">", index_type);
200 wrbuf_xmlputs(wrbuf, dst_buf);
201 wrbuf_printf(wrbuf, "</sort>\n");
202 wrbuf_printf(wrbuf, "</record>\n");
204 else if (input_format == VAL_SUTRS)
206 *output_format = VAL_SUTRS;
208 wrbuf_printf(wrbuf, "%s %c %s\n", string_index, index_type,
211 *rec_lenp = wrbuf_len(wrbuf);
212 *rec_bufp = odr_malloc(odr, *rec_lenp);
213 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
214 wrbuf_free(wrbuf, 1);
219 int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
221 const char *elemsetname,
222 oid_value input_format,
223 oid_value *output_format,
224 char **rec_bufp, int *rec_lenp)
226 const char *retrieval_index;
227 size_t retrieval_index_len;
228 const char *retrieval_type;
229 size_t retrieval_type_len;
231 zebra_rec_keys_t keys;
233 /* set output variables before processing possible error states */
236 /* only accept XML and SUTRS requests */
237 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
239 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
241 *output_format = VAL_NONE;
242 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
245 if (!parse_zebra_elem(elemsetname,
246 &retrieval_index, &retrieval_index_len,
247 &retrieval_type, &retrieval_type_len))
248 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
250 if (retrieval_type_len != 0 && retrieval_type_len != 1)
252 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
255 if (retrieval_index_len)
257 char retrieval_index_cstr[256];
259 if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
261 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
262 retrieval_index_cstr[retrieval_index_len] = '\0';
264 if (zebraExplain_lookup_attr_str(zh->reg->zei,
265 zinfo_index_category_index,
266 (retrieval_type_len == 0 ? -1 :
268 retrieval_index_cstr) == -1)
269 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
273 keys = zebra_rec_keys_open();
274 zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
275 rec->size[recInfo_delKeys], 0);
277 wrbuf = wrbuf_alloc();
278 if (zebra_rec_keys_rewind(keys)){
281 struct it_key key_in;
283 if (input_format == VAL_TEXT_XML)
285 *output_format = VAL_TEXT_XML;
286 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
287 " sysno=\"" ZINT_FORMAT "\""
288 " set=\"zebra::index%s/\">\n",
291 else if (input_format == VAL_SUTRS)
292 *output_format = VAL_SUTRS;
294 while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
297 int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
300 const char *string_index = 0;
301 size_t string_index_len;
302 char dst_buf[IT_MAX_WORD];
304 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
306 string_index_len = strlen(string_index);
308 /* process only if index is not defined,
309 or if defined and matching */
310 if (retrieval_index == 0
311 || (string_index_len == retrieval_index_len
312 && !memcmp(string_index, retrieval_index,
315 /* process only if type is not defined, or is matching */
316 if (retrieval_type == 0
317 || (retrieval_type_len == 1
318 && retrieval_type[0] == index_type))
320 zebra_term_untrans(zh, index_type, dst_buf, str);
323 if (input_format == VAL_TEXT_XML){
324 wrbuf_printf(wrbuf, " <index name=\"%s\"",
327 wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
329 wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">",
330 key_in.mem[key_in.len -1]);
332 wrbuf_xmlputs(wrbuf, dst_buf);
333 wrbuf_printf(wrbuf, "</index>\n");
335 else if (input_format == VAL_SUTRS){
336 wrbuf_printf(wrbuf, "%s ", string_index);
338 wrbuf_printf(wrbuf, "%c", index_type);
340 for (i = 1; i < key_in.len; i++)
341 wrbuf_printf(wrbuf, " " ZINT_FORMAT,
344 /* zebra_term_untrans(zh, index_type, dst_buf, str); */
345 wrbuf_printf(wrbuf, " %s", dst_buf);
347 wrbuf_printf(wrbuf, "\n");
354 if (input_format == VAL_TEXT_XML)
355 wrbuf_printf(wrbuf, "</record>\n");
357 *rec_lenp = wrbuf_len(wrbuf);
358 *rec_bufp = odr_malloc(odr, *rec_lenp);
359 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
360 wrbuf_free(wrbuf, 1);
361 zebra_rec_keys_close(keys);
366 static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
371 wrbuf_printf(wrbuf, " %s=\"", name);
372 wrbuf_xmlputs(wrbuf, value);
373 wrbuf_printf(wrbuf, "\"");
377 static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
380 wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
383 static void retrieve_puts_str(WRBUF wrbuf, const char *name,
387 wrbuf_printf(wrbuf, "%s %s\n", name, value);
390 static void retrieve_puts_int(WRBUF wrbuf, const char *name,
393 wrbuf_printf(wrbuf, "%s %i\n", name, value);
396 int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
397 const char *elemsetname,
398 oid_value input_format,
399 oid_value *output_format,
400 char **rec_bufp, int *rec_lenp)
404 /* set output variables before processing possible error states */
409 /* processing zebra::meta::sysno elemset without fetching binary data */
410 if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno"))
413 WRBUF wrbuf = wrbuf_alloc();
414 if (input_format == VAL_SUTRS)
416 wrbuf_printf(wrbuf, ZINT_FORMAT, sysno);
417 *output_format = VAL_SUTRS;
419 else if (input_format == VAL_TEXT_XML)
421 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
422 " sysno=\"" ZINT_FORMAT "\"/>\n",
424 *output_format = VAL_TEXT_XML;
426 *rec_lenp = wrbuf_len(wrbuf);
428 *rec_bufp = odr_strdup(odr, wrbuf_buf(wrbuf));
430 ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
431 wrbuf_free(wrbuf, 1);
435 /* processing special elementsetnames zebra::sort:: */
436 if (elemsetname && 0 == strncmp(elemsetname, "sort", 4))
438 return zebra_special_sort_fetch(zh, sysno, odr,
440 input_format, output_format,
445 /* fetching binary record up for all other display elementsets */
446 rec = rec_get(zh->reg->records, sysno);
449 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
450 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
453 /* processing special elementsetnames zebra::data */
454 if (elemsetname && 0 == strcmp(elemsetname, "data"))
456 struct ZebraRecStream stream;
457 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
458 zebra_create_record_stream(zh, &rec, &stream);
459 *output_format = input_format;
460 *rec_lenp = recordAttr->recordSize;
461 *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
462 stream.readf(&stream, *rec_bufp, *rec_lenp);
463 stream.destroy(&stream);
468 /* only accept XML and SUTRS requests from now */
469 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
471 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
473 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
477 /* processing special elementsetnames zebra::meta:: */
478 if (elemsetname && 0 == strcmp(elemsetname, "meta"))
481 WRBUF wrbuf = wrbuf_alloc();
482 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
484 if (input_format == VAL_TEXT_XML)
486 *output_format = VAL_TEXT_XML;
488 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
489 " sysno=\"" ZINT_FORMAT "\"", sysno);
490 retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]);
491 retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]);
492 retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]);
494 retrieve_puts_attr_int(wrbuf, "score", score);
497 " rank=\"" ZINT_FORMAT "\""
499 " set=\"zebra::%s\"/>\n",
500 recordAttr->staticrank,
501 recordAttr->recordSize,
504 else if (input_format == VAL_SUTRS)
506 *output_format = VAL_SUTRS;
507 wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno);
508 retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]);
509 retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]);
510 retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]);
512 retrieve_puts_int(wrbuf, "score", score);
515 "rank " ZINT_FORMAT "\n"
518 recordAttr->staticrank,
519 recordAttr->recordSize,
522 *rec_lenp = wrbuf_len(wrbuf);
524 *rec_bufp = odr_strdup(odr, wrbuf_buf(wrbuf));
526 ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
528 wrbuf_free(wrbuf, 1);
533 /* processing special elementsetnames zebra::index:: */
534 if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
536 int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
538 input_format, output_format,
547 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
551 int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
552 zebra_snippets *hit_snippet, ODR odr,
553 oid_value input_format, Z_RecordComposition *comp,
554 oid_value *output_format,
555 char **rec_bufp, int *rec_lenp, char **basenamep,
559 char *fname, *file_type, *basename;
560 const char *elemsetname;
561 struct ZebraRecStream stream;
562 RecordAttr *recordAttr;
568 elemsetname = yaz_get_esn(comp);
570 /* processing zebra special elementset names of form 'zebra:: */
571 if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7))
572 return zebra_special_fetch(zh, sysno, score, odr,
574 input_format, output_format,
578 /* processing all other element set names */
579 rec = rec_get(zh->reg->records, sysno);
582 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
584 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
588 recordAttr = rec_init_attr(zh->reg->zei, rec);
590 file_type = rec->info[recInfo_fileType];
591 fname = rec->info[recInfo_filename];
592 basename = rec->info[recInfo_databaseName];
593 *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
594 strcpy (*basenamep, basename);
596 yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
599 zebra_create_record_stream(zh, &rec, &stream);
603 zebra_snippets *snippet;
604 zebra_rec_keys_t reckeys = zebra_rec_keys_open();
606 struct recRetrieveCtrl retrieveCtrl;
608 retrieveCtrl.stream = &stream;
609 retrieveCtrl.fname = fname;
610 retrieveCtrl.localno = sysno;
611 retrieveCtrl.staticrank = recordAttr->staticrank;
612 retrieveCtrl.score = score;
613 retrieveCtrl.recordSize = recordAttr->recordSize;
614 retrieveCtrl.odr = odr;
615 retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
616 retrieveCtrl.comp = comp;
617 retrieveCtrl.encoding = zh->record_encoding;
618 retrieveCtrl.diagnostic = 0;
619 retrieveCtrl.addinfo = 0;
620 retrieveCtrl.dh = zh->reg->dh;
621 retrieveCtrl.res = zh->res;
622 retrieveCtrl.rec_buf = 0;
623 retrieveCtrl.rec_len = -1;
624 retrieveCtrl.hit_snippet = hit_snippet;
625 retrieveCtrl.doc_snippet = zebra_snippets_create();
627 zebra_rec_keys_set_buf(reckeys,
628 rec->info[recInfo_delKeys],
629 rec->size[recInfo_delKeys],
631 zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
632 zebra_rec_keys_close(reckeys);
635 /* for debugging purposes */
636 yaz_log(YLOG_LOG, "DOC SNIPPET:");
637 zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
638 yaz_log(YLOG_LOG, "HIT SNIPPET:");
639 zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
641 snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
642 retrieveCtrl.hit_snippet,
645 /* for debugging purposes */
646 yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
647 zebra_snippets_log(snippet, YLOG_LOG);
650 if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
651 file_type, &clientData)))
653 return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
657 (*rt->retrieve)(clientData, &retrieveCtrl);
658 return_code = retrieveCtrl.diagnostic;
660 *output_format = retrieveCtrl.output_format;
661 *rec_bufp = (char *) retrieveCtrl.rec_buf;
662 *rec_lenp = retrieveCtrl.rec_len;
663 *addinfo = retrieveCtrl.addinfo;
666 zebra_snippets_destroy(snippet);
667 zebra_snippets_destroy(retrieveCtrl.doc_snippet);
670 stream.destroy(&stream);
679 * indent-tabs-mode: nil
681 * vim: shiftwidth=4 tabstop=8 expandtab