X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=25a20fc69ce8992b18f33230450bf36a21a6b3fc;hb=b48a14d0d5fafde2fbbcc3c45451fc6f6de93c00;hp=de6aa9e776431ecbab32bc66729547e9b997c4f8;hpb=47eeb5384a8fae8bcac8afb8a84782ca094683f9;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index de6aa9e..25a20fc 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.186 2005-06-14 20:28:54 adam Exp $ +/* $Id: extract.c,v 1.188 2005-08-05 10:40:13 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -86,51 +86,57 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) { w->zebra_maps = p->zebra_maps; w->seqno = 1; +#if NATTR +#else w->attrSet = VAL_BIB1; w->attrUse = 1016; - w->attrStr = 0; - w->reg_type = 'w'; +#endif + w->index_name = 0; + w->index_type = 'w'; w->extractCtrl = p; w->record_id = 0; w->section_id = 0; } -static const char **searchRecordKey (ZebraHandle zh, - struct recKeys *reckeys, - int attrSetS, int attrUseS) +static void searchRecordKey(ZebraHandle zh, + const struct recKeys *reckeys, + int attrSetS, int attrUseS, + const char **ws, int ws_length) { - static const char *ws[32]; void *decode_handle = iscz1_start(); int off = 0; int startSeq = -1; int seqno = 0; int i; + int ch; - for (i = 0; i<32; i++) + for (i = 0; ireg->zei, + attrSetS, attrUseS); + if (ch < 0) + return ; + while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; struct it_key key; char *dst = (char*) &key; - int attrSet, attrUse; iscz1_decode(decode_handle, &dst, &src); assert(key.len <= 4 && key.len > 2); - attrSet = (int) key.mem[0] >> 16; - attrUse = (int) key.mem[0] & 65535; seqno = (int) key.mem[key.len-1]; - if (attrUseS == attrUse && attrSetS == attrSet) + if (key.mem[0] == ch) { int woff; if (startSeq == -1) startSeq = seqno; woff = seqno - startSeq; - if (woff >= 0 && woff < 31) + if (woff >= 0 && woff < ws_length) ws[woff] = src; } @@ -140,7 +146,6 @@ static const char **searchRecordKey (ZebraHandle zh, } iscz1_stop(decode_handle); assert (off == reckeys->buf_used); - return ws; } struct file_read_info { @@ -215,7 +220,6 @@ static char *fileMatchStr (ZebraHandle zh, static char dstBuf[2048]; /* static here ??? */ char *dst = dstBuf; const char *s = spec; - static const char **w; while (1) { @@ -225,6 +229,7 @@ static char *fileMatchStr (ZebraHandle zh, break; if (*s == '(') { + const char *ws[32]; char attset_str[64], attname_str[64]; data1_attset *attset; int i; @@ -257,8 +262,7 @@ static char *fileMatchStr (ZebraHandle zh, else attUse = atoi (attname_str); } - w = searchRecordKey (zh, reckeys, attSet, attUse); - assert (w); + searchRecordKey (zh, reckeys, attSet, attUse, ws, 32); if (*s == ')') { @@ -274,15 +278,15 @@ static char *fileMatchStr (ZebraHandle zh, s++; for (i = 0; i<32; i++) - if (matchFlag[i] && w[i]) + if (matchFlag[i] && ws[i]) { if (first) { *dst++ = ' '; first = 0; } - strcpy (dst, w[i]); - dst += strlen(w[i]); + strcpy (dst, ws[i]); + dst += strlen(ws[i]); } if (first) { @@ -414,9 +418,11 @@ static int file_extract_record(ZebraHandle zh, /* we are going to read from a file, so prepare the extraction */ create_rec_keys_codec(&zh->reg->keys); - +#if NATTR + create_rec_keys_codec(&zh->reg->sortKeys); +#else zh->reg->sortKeys.buf_used = 0; - +#endif recordOffset = fi->file_moffset; extractCtrl.handle = zh; extractCtrl.offset = fi->file_moffset; @@ -549,7 +555,11 @@ static int file_extract_record(ZebraHandle zh, { /* record already exists */ struct recKeys delkeys; +#if NATTR + struct recKeys sortKeys; +#else struct sortKeys sortKeys; +#endif rec = rec_get (zh->reg->records, *sysno); assert (rec); @@ -867,9 +877,11 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, extractCtrl.fh = &fc; create_rec_keys_codec(&zh->reg->keys); - +#if NATTR + create_rec_keys_codec(&zh->reg->sortKeys); +#else zh->reg->sortKeys.buf_used = 0; - +#endif if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) { if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], @@ -1001,7 +1013,11 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, { /* record already exists */ struct recKeys delkeys; +#if NATTR + struct recKeys sortKeys; +#else struct sortKeys sortKeys; +#endif if (!allow_update) { @@ -1190,9 +1206,11 @@ int explain_extract (void *handle, Record rec, data1_node *n) } create_rec_keys_codec(&zh->reg->keys); - +#if NATTR + create_rec_keys_codec(&zh->reg->sortKeys); +#else zh->reg->sortKeys.buf_used = 0; - +#endif extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; @@ -1213,7 +1231,11 @@ int explain_extract (void *handle, Record rec, data1_node *n) if (rec->size[recInfo_delKeys]) { struct recKeys delkeys; +#if NATTR + struct recKeys sortkeys; +#else struct sortKeys sortkeys; +#endif delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; @@ -1448,11 +1470,11 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) } void extract_add_it_key (ZebraHandle zh, + struct recKeys *keys, int reg_type, const char *str, int slen, struct it_key *key) { char *dst; - struct recKeys *keys = &zh->reg->keys; const char *src = (char*) key; if (keys->buf_used+1024 > keys->buf_max) @@ -1467,7 +1489,9 @@ void extract_add_it_key (ZebraHandle zh, iscz1_encode(keys->codec_handle, &dst, &src); +#if REG_TYPE_PREFIX *dst++ = reg_type; +#endif memcpy (dst, str, slen); dst += slen; *dst++ = '\0'; @@ -1557,17 +1581,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length) ZebraExplainInfo zei = zh->reg->zei; int ch; - if (p->attrStr) + if (p->index_name) { - ch = zebraExplain_lookup_attr_str(zei, p->attrStr); + ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->attrStr); + ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); } else { - ch = zebraExplain_lookup_attr_su(zei, p->attrSet, p->attrUse); +#if NATTR + return; +#else + ch = zebraExplain_lookup_attr_su(zei, p->index_type, + p->attrSet, p->attrUse); if (ch < 0) - ch = zebraExplain_add_attr_su(zei, p->attrSet, p->attrUse); + ch = zebraExplain_add_attr_su(zei, p->index_type, + p->attrSet, p->attrUse); +#endif } key.len = 4; key.mem[0] = ch; @@ -1582,12 +1612,44 @@ void extract_add_index_string (RecWord *p, const char *str, int length) p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno); #endif - extract_add_it_key(p->extractCtrl->handle, p->reg_type, str, + extract_add_it_key(p->extractCtrl->handle, + &zh->reg->keys, + p->index_type, str, length, &key); } -static void extract_add_sort_string (RecWord *p, const char *str, - int length) +#if NATTR +static void extract_add_sort_string (RecWord *p, const char *str, int length) +{ + struct it_key key; + + ZebraHandle zh = p->extractCtrl->handle; + ZebraExplainInfo zei = zh->reg->zei; + int ch; + + if (p->index_name) + { + ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + if (ch < 0) + ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); + } + else + { + return; + } + key.len = 4; + key.mem[0] = ch; + key.mem[1] = p->record_id; + key.mem[2] = p->section_id; + key.mem[3] = p->seqno; + + extract_add_it_key(p->extractCtrl->handle, + &zh->reg->sortKeys, + p->index_type, str, + length, &key); +} +#else +static void extract_add_sort_string (RecWord *p, const char *str, int length) { ZebraHandle zh = p->extractCtrl->handle; struct sortKeys *sk = &zh->reg->sortKeys; @@ -1622,11 +1684,12 @@ static void extract_add_sort_string (RecWord *p, const char *str, memcpy (sk->buf + off, str, length); sk->buf_used = off + length; } +#endif void extract_add_string (RecWord *p, const char *string, int length) { assert (length > 0); - if (zebra_maps_is_sort (p->zebra_maps, p->reg_type)) + if (zebra_maps_is_sort (p->zebra_maps, p->index_type)) extract_add_sort_string (p, string, length); else extract_add_index_string (p, string, length); @@ -1641,7 +1704,7 @@ static void extract_add_incomplete_field (RecWord *p) yaz_log(YLOG_DEBUG, "Incomplete field, w='%.*s'", p->term_len, p->term_buf); if (remain > 0) - map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0); + map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, 0); while (map) { @@ -1653,7 +1716,8 @@ static void extract_add_incomplete_field (RecWord *p) { remain = p->term_len - (b - p->term_buf); if (remain > 0) - map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0); + map = zebra_maps_input(p->zebra_maps, p->index_type, &b, + remain, 0); else map = 0; } @@ -1668,7 +1732,7 @@ static void extract_add_incomplete_field (RecWord *p) buf[i++] = *(cp++); remain = p->term_len - (b - p->term_buf); if (remain > 0) - map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0); + map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, 0); else map = 0; } @@ -1690,7 +1754,7 @@ static void extract_add_complete_field (RecWord *p) p->term_len, p->term_buf); if (remain > 0) - map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain, 1); + map = zebra_maps_input (p->zebra_maps, p->index_type, &b, remain, 1); while (remain > 0 && i < IT_MAX_WORD) { @@ -1701,7 +1765,7 @@ static void extract_add_complete_field (RecWord *p) if (remain > 0) { int first = i ? 0 : 1; /* first position */ - map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, first); + map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, first); } else map = 0; @@ -1730,7 +1794,7 @@ static void extract_add_complete_field (RecWord *p) remain = p->term_len - (b - p->term_buf); if (remain > 0) { - map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, + map = zebra_maps_input (p->zebra_maps, p->index_type, &b, remain, 0); } else @@ -1751,13 +1815,13 @@ void extract_token_add (RecWord *p) p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length, p->string); #endif - if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0, + if ((wrbuf = zebra_replace(p->zebra_maps, p->index_type, 0, p->term_buf, p->term_len))) { p->term_buf = wrbuf_buf(wrbuf); p->term_len = wrbuf_len(wrbuf); } - if (zebra_maps_is_complete (p->zebra_maps, p->reg_type)) + if (zebra_maps_is_complete (p->zebra_maps, p->index_type)) extract_add_complete_field (p); else extract_add_incomplete_field(p); @@ -1794,6 +1858,41 @@ void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid) zebraExplain_addSchema (zh->reg->zei, oid); } +#if NATTR +void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, + int cmd, struct recKeys *reckeys) +{ + SortIdx sortIdx = zh->reg->sortIdx; + void *decode_handle = iscz1_start(); + int off = 0; + int ch = 0; + + while (off < reckeys->buf_used) + { + const char *src = reckeys->buf + off; + struct it_key key; + char *dst = (char*) &key; + + iscz1_decode(decode_handle, &dst, &src); + assert(key.len == 4); + + ch = (int) key.mem[0]; /* ordinal for field/use/attribute */ + + sortIdx_type(sortIdx, ch); + if (cmd == 1) + sortIdx_add(sortIdx, src, strlen(src)); + else + sortIdx_add(sortIdx, "", 1); + + src += strlen(src); + src++; + + off = src - reckeys->buf; + } + assert (off == reckeys->buf_used); + iscz1_stop(decode_handle); +} +#else void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct sortKeys *sk) { @@ -1818,6 +1917,7 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, off += slen; } } +#endif void encode_key_init (struct encode_info *i) {