- struct it_key key;
- char *bp = i->buf;
-
- while ((*bp++ = *k++))
- ;
- memcpy (&key, k+1, sizeof(struct it_key));
- bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp);
- if (i->sysno != key.sysno)
- {
- i->sysno = key.sysno;
- i->seqno = 0;
- }
- else if (!i->seqno && !key.seqno && i->cmd == *k)
- return;
- bp = encode_key_int (key.seqno - i->seqno, bp);
- i->seqno = key.seqno;
- i->cmd = *k;
- if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
- {
- logf (LOG_FATAL|LOG_ERRNO, "fwrite");
- exit (1);
- }
-}
-
-#define SORT_EXTRA 0
-
-#if SORT_EXTRA
-static int key_y_len;
-
-static int key_y_compare (const void *p1, const void *p2)
-{
- int r;
-
- if ((r = key_compare (*(char**) p1 + key_y_len + 1,
- *(char**) p2 + key_y_len + 1)))
- return r;
- return *(*(char**) p1 + key_y_len) - *(*(char**) p2 + key_y_len);
-}
-
-static int key_x_compare (const void *p1, const void *p2)
-{
- return strcmp (*(char**) p1, *(char**) p2);
-}
-#endif
-
-void key_flush (void)
-{
- FILE *outf;
- char out_fname[200];
- char *prevcp, *cp;
- struct encode_info encode_info;
-#if SORT_EXTRA
- int i;
-#endif
-
- if (ptr_i <= 0)
- return;
-
- key_file_no++;
- logf (LOG_LOG, "sorting section %d", key_file_no);
-#if !SORT_EXTRA
- qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_qsort_compare);
- getFnameTmp (out_fname, key_file_no);
-
- if (!(outf = fopen (out_fname, "wb")))
- {
- logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname);
- exit (1);
- }
- logf (LOG_LOG, "writing section %d", key_file_no);
- prevcp = cp = key_buf[ptr_top-ptr_i];
-
- encode_key_init (&encode_info);
- encode_key_write (cp, &encode_info, outf);
- while (--ptr_i > 0)
- {
- cp = key_buf[ptr_top-ptr_i];
- if (strcmp (cp, prevcp))
- {
- encode_key_init (&encode_info);
- encode_key_write (cp, &encode_info, outf);
- prevcp = cp;
- }
- else
- encode_key_write (cp + strlen(cp), &encode_info, outf);
- }
-#else
- qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare);
- getFnameTmp (out_fname, key_file_no);
-
- if (!(outf = fopen (out_fname, "wb")))
- {
- logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname);
- exit (1);
- }
- logf (LOG_LOG, "writing section %d", key_file_no);
- i = ptr_i;
- prevcp = key_buf[ptr_top-i];
- while (1)
- if (!--i || strcmp (prevcp, key_buf[ptr_top-i]))
- {
- key_y_len = strlen(prevcp)+1;
-#if 0
- logf (LOG_LOG, "key_y_len: %2d %02x %02x %s",
- key_y_len, prevcp[0], prevcp[1], 2+prevcp);
-#endif
- qsort (key_buf + ptr_top-ptr_i, ptr_i - i,
- sizeof(char*), key_y_compare);
- cp = key_buf[ptr_top-ptr_i];
- --key_y_len;
- encode_key_init (&encode_info);
- encode_key_write (cp, &encode_info, outf);
- while (--ptr_i > i)
- {
- cp = key_buf[ptr_top-ptr_i];
- encode_key_write (cp+key_y_len, &encode_info, outf);
- }
- if (!i)
- break;
- prevcp = key_buf[ptr_top-ptr_i];
- }
-#endif
- if (fclose (outf))
- {
- logf (LOG_FATAL|LOG_ERRNO, "fclose %s", out_fname);
- exit (1);
- }
- logf (LOG_LOG, "finished section %d", key_file_no);
- ptr_i = 0;
- key_buf_used = 0;
-}
-
-int key_close (struct recordGroup *rGroup)
-{
- int rw = rGroup->flagRw;
- if (rw)
- zebraExplain_runNumberIncrement (zti, 1);
- zebraExplain_close (zti, rw, 0);
- key_flush ();
- xfree (key_buf);
- rec_close (&records);
- dict_close (matchDict);
- sortIdx_close (sortIdx);
-
- logRecord (1);
- return key_file_no;
-}
-
-static void wordInit (struct recExtractCtrl *p, RecWord *w)
-{
- w->zebra_maps = p->zebra_maps;
- w->seqnos = p->seqno;
- w->attrSet = VAL_BIB1;
- w->attrUse = 1016;
- w->reg_type = 'w';
-}
-
-static struct sortKey {
- char *string;
- int length;
- int attrSet;
- int attrUse;
- struct sortKey *next;
-} *sortKeys = NULL;
-
-static struct recKeys {
- int buf_used;
- int buf_max;
- char *buf;
- char prevAttrSet;
- short prevAttrUse;
- int prevSeqNo;
-} reckeys;
-
-static void addIndexString (RecWord *p, const char *string, int length)
-{
- char *dst;
- unsigned char attrSet;
- unsigned short attrUse;
- int lead = 0;
- int diff = 0;
- int *pseqno = &p->seqnos[p->reg_type];
-
- if (reckeys.buf_used+1024 > reckeys.buf_max)
- {
- char *b;
-
- b = xmalloc (reckeys.buf_max += 128000);
- if (reckeys.buf_used > 0)
- memcpy (b, reckeys.buf, reckeys.buf_used);
- xfree (reckeys.buf);
- reckeys.buf = b;
- }
- dst = reckeys.buf + reckeys.buf_used;
-
- attrSet = p->attrSet;
- if (reckeys.buf_used > 0 && reckeys.prevAttrSet == attrSet)
- lead |= 1;
- else
- reckeys.prevAttrSet = attrSet;
- attrUse = p->attrUse;
- if (reckeys.buf_used > 0 && reckeys.prevAttrUse == attrUse)
- lead |= 2;
- else
- reckeys.prevAttrUse = attrUse;
-#if 1
- diff = 1 + *pseqno - reckeys.prevSeqNo;
- if (diff >= 1 && diff <= 15)
- lead |= (diff << 2);
- else
- diff = 0;
-#endif
- reckeys.prevSeqNo = *pseqno;
-
- *dst++ = lead;
-
- if (!(lead & 1))
- {
- memcpy (dst, &attrSet, sizeof(attrSet));
- dst += sizeof(attrSet);
- }
- if (!(lead & 2))
- {
- memcpy (dst, &attrUse, sizeof(attrUse));
- dst += sizeof(attrUse);
- }
- *dst++ = p->reg_type;
- memcpy (dst, string, length);
- dst += length;
- *dst++ = '\0';
-
- if (!diff)
- {
- memcpy (dst, pseqno, sizeof(*pseqno));
- dst += sizeof(*pseqno);
- }
- reckeys.buf_used = dst - reckeys.buf;
- if (*pseqno)
- (*pseqno)++;
-}
-
-static void addSortString (RecWord *p, const char *string, int length)
-{
- struct sortKey *sk;
-
- for (sk = sortKeys; sk; sk = sk->next)
- if (sk->attrSet == p->attrSet && sk->attrUse == p->attrUse)
- return;
-
- sk = xmalloc (sizeof(*sk));
- sk->next = sortKeys;
- sortKeys = sk;
-
- sk->string = xmalloc (length);
- sk->length = length;
- memcpy (sk->string, string, length);
-
- sk->attrSet = p->attrSet;
- sk->attrUse = p->attrUse;
-}
-
-static void addString (RecWord *p, const char *string, int length)
-{
- assert (length > 0);
- if (zebra_maps_is_sort (p->zebra_maps, p->reg_type))
- addSortString (p, string, length);
- else
- addIndexString (p, string, length);
-}
-
-static void addIncompleteField (RecWord *p)
-{
- const char *b = p->string;
- int remain = p->length;
- const char **map = 0;
-
- if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
-
- while (map)
- {
- char buf[IT_MAX_WORD+1];
- int i, remain;
-
- /* Skip spaces */
- while (map && *map && **map == *CHR_SPACE)
- {
- remain = p->length - (b - p->string);
- if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
- else
- map = 0;
- }
- if (!map)
- break;
- i = 0;
- while (map && *map && **map != *CHR_SPACE)
- {
- const char *cp = *map;
-
- while (i < IT_MAX_WORD && *cp)
- buf[i++] = *(cp++);
- remain = p->length - (b - p->string);
- if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
- else
- map = 0;
- }
- if (!i)
- return;
- addString (p, buf, i);
- }
-}
-
-static void addCompleteField (RecWord *p)
-{
- const char *b = p->string;
- char buf[IT_MAX_WORD+1];
- const char **map = 0;
- int i = 0, remain = p->length;
-
- if (remain > 0)
- map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain);
-
- while (remain > 0 && i < IT_MAX_WORD)
- {
- while (map && *map && **map == *CHR_SPACE)
- {
- remain = p->length - (b - p->string);
- if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
- else
- map = 0;
- }
- if (!map)
- break;
-
- if (i && i < IT_MAX_WORD)
- buf[i++] = *CHR_SPACE;
- while (map && *map && **map != *CHR_SPACE)
- {
- const char *cp = *map;
-
- if (i >= IT_MAX_WORD)
- break;
- while (i < IT_MAX_WORD && *cp)
- buf[i++] = *(cp++);
- remain = p->length - (b - p->string);
- if (remain > 0)
- map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
- remain);
- else
- map = 0;
- }
- }
- if (!i)
- return;
- addString (p, buf, i);
-}
-
-static void addRecordKey (RecWord *p)
-{
- if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
- addCompleteField (p);
- else
- addIncompleteField(p);
-}
-
-static void flushSortKeys (SYSNO sysno, int cmd)
-{
- struct sortKey *sk = sortKeys;
-
- sortIdx_sysno (sortIdx, sysno);
- while (sk)
- {
- struct sortKey *sk_next = sk->next;
- sortIdx_type (sortIdx, sk->attrUse);
- sortIdx_add (sortIdx, sk->string, sk->length);
- xfree (sk->string);
- xfree (sk);
- sk = sk_next;
- }
- sortKeys = NULL;
-}
-
-static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys)
-{
- unsigned char attrSet = (unsigned char) -1;
- unsigned short attrUse = (unsigned short) -1;
- int seqno = 0;
- int off = 0;
-
- zebraExplain_recordCountIncrement (zti, cmd ? 1 : -1);
- while (off < reckeys->buf_used)
- {
- const char *src = reckeys->buf + off;
- struct it_key key;
- int lead, ch;
-
- lead = *src++;
-
- if (!(lead & 1))
- {
- memcpy (&attrSet, src, sizeof(attrSet));
- src += sizeof(attrSet);
- }
- if (!(lead & 2))
- {
- memcpy (&attrUse, src, sizeof(attrUse));
- src += sizeof(attrUse);
- }
- if (key_buf_used + 1024 > (ptr_top-ptr_i)*sizeof(char*))
- key_flush ();
- ++ptr_i;
- key_buf[ptr_top-ptr_i] = (char*)key_buf + key_buf_used;
-
- ch = zebraExplain_lookupSU (zti, attrSet, attrUse);
- if (ch < 0)
- ch = zebraExplain_addSU (zti, attrSet, attrUse);
- assert (ch > 0);
- key_buf_used += key_SU_code (ch, ((char*)key_buf) + key_buf_used);
-
- while (*src)
- ((char*)key_buf) [key_buf_used++] = *src++;
- src++;
- ((char*)key_buf) [key_buf_used++] = '\0';
- ((char*) key_buf)[key_buf_used++] = cmd;
-
- if (lead & 60)
- seqno += ((lead>>2) & 15)-1;
- else
- {
- memcpy (&seqno, src, sizeof(seqno));
- src += sizeof(seqno);
- }
- key.seqno = seqno;
- key.sysno = sysno;
- memcpy ((char*)key_buf + key_buf_used, &key, sizeof(key));
- key_buf_used += sizeof(key);
- off = src - reckeys->buf;
- }
- assert (off == reckeys->buf_used);
-}
-
-static const char **searchRecordKey (struct recKeys *reckeys,
- int attrSetS, int attrUseS)
-{
- static const char *ws[32];
- int off = 0;
- int startSeq = -1;
- int i;
- short attrUse;
- char attrSet;
- int seqno = 0;
-
- for (i = 0; i<32; i++)
- ws[i] = NULL;
-
- while (off < reckeys->buf_used)
- {
-
- const char *src = reckeys->buf + off;
- const char *wstart;
- int lead;
-
- lead = *src++;
-
- if (!(lead & 1))
- {
- memcpy (&attrSet, src, sizeof(attrSet));
- src += sizeof(attrSet);
- }
- if (!(lead & 2))
- {
- memcpy (&attrUse, src, sizeof(attrUse));
- src += sizeof(attrUse);
- }
- wstart = src;
- while (*src++)
- ;
- if (lead & 60)
- seqno += ((lead>>2) & 15)-1;
- else
- {
- memcpy (&seqno, src, sizeof(seqno));
- src += sizeof(seqno);
- }
- if (attrUseS == attrUse && attrSetS == attrSet)
- {
- int woff;
-
-
- if (startSeq == -1)
- startSeq = seqno;
- woff = seqno - startSeq;
- if (woff >= 0 && woff < 31)
- ws[woff] = wstart;
- }
-
- off = src - reckeys->buf;
- }
- assert (off == reckeys->buf_used);
- return ws;
-}
-
-struct file_read_info {
- off_t file_max; /* maximum offset so far */
- off_t file_offset; /* current offset */
- off_t file_moffset; /* offset of rec/rec boundary */
- int file_more;
- int fd;
- char *sdrbuf;
- int sdrmax;
-};
-
-static struct file_read_info *file_read_start (int fd)
-{
- struct file_read_info *fi = xmalloc (sizeof(*fi));
-
- fi->fd = fd;
- fi->file_max = 0;
- fi->file_moffset = 0;
- fi->sdrbuf = 0;
- fi->sdrmax = 0;
- return fi;
-}
-
-static void file_read_stop (struct file_read_info *fi)
-{
- xfree (fi);
-}
-
-static off_t file_seek (void *handle, off_t offset)
-{
- struct file_read_info *p = handle;
- p->file_offset = offset;
- if (p->sdrbuf)
- return offset;
- return lseek (p->fd, offset, SEEK_SET);
-}
-
-static off_t file_tell (void *handle)
-{
- struct file_read_info *p = handle;
- return p->file_offset;
-}
-
-static int file_read (void *handle, char *buf, size_t count)
-{
- struct file_read_info *p = handle;
- int fd = p->fd;
- int r;
- if (p->sdrbuf)
- {
- r = count;
- if (r > p->sdrmax - p->file_offset)
- r = p->sdrmax - p->file_offset;
- if (r)
- memcpy (buf, p->sdrbuf + p->file_offset, r);
- }
- else
- r = read (fd, buf, count);
- if (r > 0)
- {
- p->file_offset += r;
- if (p->file_offset > p->file_max)
- p->file_max = p->file_offset;
- }
- return r;
-}
-
-static void file_begin (void *handle)
-{
- struct file_read_info *p = handle;