+Common stream reader interface for record filters (struct ZebraRecStream).
+
+Debian package fix: packages idzebra-2.0 + libidzebra-2.0-modules did
+not depend properly on sub packages.
+
Experimental segment facility (for matching of words within one
field/segment).
# Simple Zebra configuration file
-# $Id: zebra.cfg,v 1.13 2006-07-03 14:27:05 adam Exp $
+# $Id: zebra.cfg,v 1.14 2006-08-22 13:39:23 adam Exp $
#
# Where the schema files, attribute files, etc are located.
profilePath: .:../../tab
#rank: zvrank
recordId: (bib-1,title)
+storedata: 1
+
modulePath: ../../index/.libs
#shadow: shadow:100M
# register: register:100M
-/* $Id: recctrl.h,v 1.28 2006-08-16 13:16:35 adam Exp $
+/* $Id: recctrl.h,v 1.29 2006-08-22 13:39:25 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
/* 1015 */
#define ZEBRA_XPATH_ATTR_CDATA "_XPATH_ATTR_CDATA"
-/* single word entity */
+/** Indexing token */
typedef struct {
+ /** index type ('w', 'p', .. */
unsigned index_type;
+ /** index name, e.g. "title" */
const char *index_name;
+ /** token char data */
const char *term_buf;
+ /** length of term_buf */
int term_len;
+ /** sequence number */
zint seqno;
+ /** segment number */
zint segment;
+ /** record ID */
zint record_id;
+ /** section ID */
zint section_id;
struct recExtractCtrl *extractCtrl;
} RecWord;
-/* Extract record control */
+/** \brief record reader stream */
+struct ZebraRecStream {
+ /** client data */
+ void *fh;
+ /** \brief read function */
+ int (*readf)(struct ZebraRecStream *s, char *buf, size_t count);
+ /** \brief seek function */
+ off_t (*seekf)(struct ZebraRecStream *s, off_t offset);
+ /** \brief tell function */
+ off_t (*tellf)(struct ZebraRecStream *s);
+ /** \brief set and get of record position */
+ off_t (*endf)(struct ZebraRecStream *s, off_t *offset);
+ /** \brief close and destroy stream */
+ void (*destroy)(struct ZebraRecStream *s);
+};
+
+/** \brief record extract for indexing */
struct recExtractCtrl {
- void *fh; /* File handle and read function */
- int (*readf)(void *fh, char *buf, size_t count);
- off_t (*seekf)(void *fh, off_t offset); /* seek function */
- off_t (*tellf)(void *fh); /* tell function */
- void (*endf)(void *fh, off_t offset); /* end of record position */
- off_t offset; /* start offset */
+ struct ZebraRecStream *stream;
void (*init)(struct recExtractCtrl *p, RecWord *w);
void *clientData;
void (*tokenAdd)(RecWord *w);
/* Retrieve record control */
struct recRetrieveCtrl {
+ struct ZebraRecStream *stream;
/* Input parameters ... */
Res res; /* Resource pool */
ODR odr; /* ODR used to create response */
- void *fh; /* File descriptor and read function */
- int (*readf)(void *fh, char *buf, size_t count);
- off_t (*seekf)(void *fh, off_t offset);
- off_t (*tellf)(void *fh);
oid_value input_format; /* Preferred record syntax */
Z_RecordComposition *comp; /* formatting instructions */
char *encoding; /* preferred character encoding */
RecType recType_byName(RecTypes rts, Res res, const char *name,
void **clientDataP);
-
-#define KEY_SEGMENT_SIZE 1024
-
YAZ_END_CDECL
#endif
-/* $Id: recgrs.h,v 1.5 2006-08-14 10:40:14 adam Exp $
+/* $Id: recgrs.h,v 1.6 2006-08-22 13:39:25 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
YAZ_BEGIN_CDECL
struct grs_read_info {
+ struct ZebraRecStream *stream;
void *clientData;
- int (*readf)(void *, char *, size_t);
- off_t (*seekf)(void *, off_t);
- off_t (*tellf)(void *);
- void (*endf)(void *, off_t);
- void *fh;
- off_t offset;
NMEM mem;
data1_handle dh;
};
-## $Id: Makefile.am,v 1.50 2006-07-06 07:56:00 adam Exp $
+## $Id: Makefile.am,v 1.51 2006-08-22 13:39:26 adam Exp $
aux_libs = \
../rset/libidzebra-rset.la \
orddict.c orddict.h \
rank.h rank1.c ranksimilarity.c rankstatic.c \
recindex.c recindex.h recindxp.h reckeys.c reckeys.h recstat.c retrieve.c \
- sortidx.c symtab.c \
+ sortidx.c symtab.c stream.c \
update_path.c update_file.c trunc.c \
zebraapi.c zinfo.c zinfo.h zserver.h zsets.c zrpn.c
-/* $Id: alvis.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: alvis.c,v 1.3 2006-08-22 13:39:26 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
static int ioread_ex(void *context, char *buffer, int len)
{
struct recExtractCtrl *p = context;
- return (*p->readf)(p->fh, buffer, len);
+ return p->stream->readf(p->stream, buffer, len);
}
static int ioclose_ex(void *context)
static int ioread_ret(void *context, char *buffer, int len)
{
struct recRetrieveCtrl *p = context;
- return (*p->readf)(p->fh, buffer, len);
+ return p->stream->readf(p->stream, buffer, len);
}
static int ioclose_ret(void *context)
-/* $Id: extract.c,v 1.227 2006-08-16 13:16:36 adam Exp $
+/* $Id: extract.c,v 1.228 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
}
}
-struct file_read_info {
- off_t file_max; /* maximum offset so far */
- off_t file_offset; /* current offset */
- off_t file_moffset; /* offset of rec/rec boundary */
- int file_more;
- int fd;
-};
-
-static struct file_read_info *file_read_start (int fd)
-{
- struct file_read_info *fi = (struct file_read_info *)
- xmalloc (sizeof(*fi));
-
- fi->fd = fd;
- fi->file_max = 0;
- fi->file_moffset = 0;
- fi->file_offset = 0;
- fi->file_more = 0;
- return fi;
-}
-
-static void file_read_stop (struct file_read_info *fi)
-{
- xfree (fi);
-}
-
-static off_t file_seek (void *handle, off_t offset)
-{
- struct file_read_info *p = (struct file_read_info *) handle;
- p->file_offset = offset;
- return lseek (p->fd, offset, SEEK_SET);
-}
-
-static off_t file_tell (void *handle)
-{
- struct file_read_info *p = (struct file_read_info *) handle;
- return p->file_offset;
-}
-
-static int file_read (void *handle, char *buf, size_t count)
-{
- struct file_read_info *p = (struct file_read_info *) handle;
- int fd = p->fd;
- int r;
- r = read (fd, buf, count);
- if (r > 0)
- {
- p->file_offset += r;
- if (p->file_offset > p->file_max)
- p->file_max = p->file_offset;
- }
- return r;
-}
-
-static void file_end (void *handle, off_t offset)
-{
- struct file_read_info *p = (struct file_read_info *) handle;
-
- if (offset != p->file_moffset)
- {
- p->file_moffset = offset;
- p->file_more = 1;
- }
-}
-
#define FILE_MATCH_BLANK "\t "
static char *fileMatchStr (ZebraHandle zh,
"", 0);
}
-static ZEBRA_RES file_extract_record(ZebraHandle zh,
- SYSNO *sysno, const char *fname,
- int deleteFlag,
- struct file_read_info *fi,
- int force_update,
- RecType recType,
- void *recTypeClientData)
-{
- const char *match_str_to_print = "";
- RecordAttr *recordAttr;
- int r;
- const char *matchStr = 0;
- SYSNO sysnotmp;
- Record rec;
- off_t recordOffset = 0;
- struct recExtractCtrl extractCtrl;
-
- /* announce database */
- if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
- {
- if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
- zh->m_explain_database))
- return ZEBRA_FAIL;
- }
-
- if (fi->fd != -1)
- {
- /* we are going to read from a file, so prepare the extraction */
- zebra_rec_keys_reset(zh->reg->keys);
-
- zebra_rec_keys_reset(zh->reg->sortKeys);
- recordOffset = fi->file_moffset;
- extractCtrl.handle = zh;
- extractCtrl.offset = fi->file_moffset;
- extractCtrl.readf = file_read;
- extractCtrl.seekf = file_seek;
- extractCtrl.tellf = file_tell;
- extractCtrl.endf = file_end;
- extractCtrl.fh = fi;
- extractCtrl.init = extract_init;
- extractCtrl.tokenAdd = extract_token_add;
- extractCtrl.schemaAdd = extract_schema_add;
- extractCtrl.dh = zh->reg->dh;
- extractCtrl.match_criteria[0] = '\0';
- extractCtrl.staticrank = 0;
-
- extractCtrl.first_record = fi->file_offset ? 0 : 1;
-
- extract_set_store_data_prepare(&extractCtrl);
-
- init_extractCtrl(zh, &extractCtrl);
-
- if (!zh->m_flag_rw)
- printf ("File: %s " ZINT_FORMAT "\n", fname, (zint)recordOffset);
- if (zh->m_flag_rw)
- {
- char msg[512];
- sprintf (msg, "%s:" ZINT_FORMAT , fname, (zint)recordOffset);
- yaz_log_init_prefix2 (msg);
- }
-
- r = (*recType->extract)(recTypeClientData, &extractCtrl);
-
- yaz_log_init_prefix2 (0);
- if (r == RECCTRL_EXTRACT_EOF)
- return ZEBRA_FAIL;
- else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
- {
- /* error occured during extraction ... */
- if (zh->m_flag_rw &&
- zh->records_processed < zh->m_file_verbose_limit)
- {
- yaz_log (YLOG_WARN, "fail %s %s " ZINT_FORMAT,
- zh->m_record_type,
- fname, (zint) recordOffset);
- }
- return ZEBRA_FAIL;
- }
- else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
- {
- /* error occured during extraction ... */
- if (zh->m_flag_rw &&
- zh->records_processed < zh->m_file_verbose_limit)
- {
- yaz_log (YLOG_WARN, "no filter for %s %s "
- ZINT_FORMAT, zh->m_record_type,
- fname, (zint) recordOffset);
- }
- return ZEBRA_FAIL;
- }
- all_matches_add(&extractCtrl);
- if (extractCtrl.match_criteria[0])
- matchStr = extractCtrl.match_criteria;
- }
-
- /* if matchStr is set now - we assume it's printable .
- For internal matchStr (see below) we don't print */
- if (matchStr)
- match_str_to_print = matchStr;
-
- /* perform internal match if sysno not known and if match criteria is
- specified already */
- if (!sysno)
- {
- sysnotmp = 0;
- sysno = &sysnotmp;
-
- if (matchStr == 0 && zh->m_record_id && *zh->m_record_id)
- {
- matchStr = fileMatchStr (zh, zh->reg->keys, fname,
- zh->m_record_id);
- if (!matchStr)
- {
- yaz_log(YLOG_WARN, "Bad match criteria");
-
- if (zebra_rec_keys_empty(zh->reg->keys))
- {
- yaz_log(YLOG_WARN, "And no index keys");
- }
- return ZEBRA_FAIL;
- }
- }
- if (matchStr)
- {
- int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
- char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
- matchStr);
- if (rinfo)
- {
- assert(*rinfo == sizeof(*sysno));
- memcpy (sysno, rinfo+1, sizeof(*sysno));
- }
- }
- }
- if (! *sysno && zebra_rec_keys_empty(zh->reg->keys) )
- {
- /* the extraction process returned no information - the record
- is probably empty - unless flagShowRecords is in use */
- if (!zh->m_flag_rw)
- return ZEBRA_OK;
-
- if (zh->records_processed < zh->m_file_verbose_limit)
- yaz_log(YLOG_WARN, "empty %s %s " ZINT_FORMAT, zh->m_record_type,
- fname, (zint)recordOffset);
- return ZEBRA_OK;
- }
-
- if (! *sysno)
- {
- /* new record */
- if (deleteFlag)
- {
- yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, zh->m_record_type,
- fname, (zint)recordOffset);
- yaz_log(YLOG_WARN, "cannot delete record above (seems new)");
- return ZEBRA_OK;
- }
-
- rec = rec_new (zh->reg->records);
-
- *sysno = rec->sysno;
-
- if (zh->records_processed < zh->m_file_verbose_limit)
- {
- yaz_log(YLOG_LOG, "add %s %s " ZINT_FORMAT
- " " ZINT_FORMAT " %s" ,
- zh->m_record_type,
- fname, (zint) recordOffset, *sysno, match_str_to_print);
- }
- recordAttr = rec_init_attr (zh->reg->zei, rec);
- recordAttr->staticrank = extractCtrl.staticrank;
-
- if (matchStr)
- {
- int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
- dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
- sizeof(*sysno), sysno);
- }
-
-
- extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
- extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
- recordAttr->staticrank);
- zh->records_inserted++;
- }
- else
- {
- /* record already exists */
- zebra_rec_keys_t delkeys = zebra_rec_keys_open();
-
- zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
-
- rec = rec_get (zh->reg->records, *sysno);
- assert (rec);
-
- recordAttr = rec_init_attr (zh->reg->zei, rec);
-
- zebra_rec_keys_set_buf(delkeys,
- rec->info[recInfo_delKeys],
- rec->size[recInfo_delKeys],
- 0);
-
- zebra_rec_keys_set_buf(sortKeys,
- rec->info[recInfo_sortKeys],
- rec->size[recInfo_sortKeys],
- 0);
- extract_flushSortKeys (zh, *sysno, 0, sortKeys);
- extract_flushRecordKeys (zh, *sysno, 0, delkeys,
- recordAttr->staticrank); /* old values */
- if (deleteFlag)
- {
- /* record going to be deleted */
- if (zebra_rec_keys_empty(delkeys))
- {
- yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT
- " " ZINT_FORMAT,
- zh->m_record_type, fname, (zint)recordOffset, *sysno);
- yaz_log(YLOG_WARN, "cannot delete file above, storeKeys false (1)");
- }
- else
- {
- if (zh->records_processed < zh->m_file_verbose_limit)
- {
- yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT
- " " ZINT_FORMAT " %s" ,
- zh->m_record_type, fname, (zint) recordOffset,
- *sysno, match_str_to_print);
- }
- zh->records_deleted++;
- if (matchStr)
- {
- int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
- dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
- }
- rec_del (zh->reg->records, &rec);
- }
- rec_rm (&rec);
- logRecord (zh);
- return ZEBRA_OK;
- }
- else
- {
- /* flush new keys for sort&search etc */
- if (zh->records_processed < zh->m_file_verbose_limit)
- {
- yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT
- " " ZINT_FORMAT " %s" ,
- zh->m_record_type, fname, (zint) recordOffset,
- *sysno, match_str_to_print);
- }
- recordAttr->staticrank = extractCtrl.staticrank;
- extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
- extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
- recordAttr->staticrank);
- zh->records_updated++;
- }
- zebra_rec_keys_close(delkeys);
- zebra_rec_keys_close(sortKeys);
- }
- /* update file type */
- xfree (rec->info[recInfo_fileType]);
- rec->info[recInfo_fileType] =
- rec_strdup (zh->m_record_type, &rec->size[recInfo_fileType]);
-
- /* update filename */
- xfree (rec->info[recInfo_filename]);
- rec->info[recInfo_filename] =
- rec_strdup (fname, &rec->size[recInfo_filename]);
-
- /* update delete keys */
- xfree (rec->info[recInfo_delKeys]);
- if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
- {
- zebra_rec_keys_get_buf(zh->reg->keys,
- &rec->info[recInfo_delKeys],
- &rec->size[recInfo_delKeys]);
- }
- else
- {
- rec->info[recInfo_delKeys] = NULL;
- rec->size[recInfo_delKeys] = 0;
- }
-
- /* update sort keys */
- xfree (rec->info[recInfo_sortKeys]);
-
- zebra_rec_keys_get_buf(zh->reg->sortKeys,
- &rec->info[recInfo_sortKeys],
- &rec->size[recInfo_sortKeys]);
-
- /* save file size of original record */
- zebraExplain_recordBytesIncrement (zh->reg->zei,
- - recordAttr->recordSize);
- recordAttr->recordSize = fi->file_moffset - recordOffset;
- if (!recordAttr->recordSize)
- recordAttr->recordSize = fi->file_max - recordOffset;
- zebraExplain_recordBytesIncrement (zh->reg->zei,
- recordAttr->recordSize);
-
- /* set run-number for this record */
- recordAttr->runNumber = zebraExplain_runNumberIncrement (zh->reg->zei,
- 0);
-
- /* update store data */
- xfree (rec->info[recInfo_storeData]);
- if (zh->store_data_buf)
- {
- rec->size[recInfo_storeData] = zh->store_data_size;
- rec->info[recInfo_storeData] = zh->store_data_buf;
- zh->store_data_buf = 0;
- }
- else if (zh->m_store_data)
- {
- rec->size[recInfo_storeData] = recordAttr->recordSize;
- rec->info[recInfo_storeData] = (char *)
- xmalloc (recordAttr->recordSize);
- if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
- {
- yaz_log(YLOG_ERRNO|YLOG_FATAL, "seek to " ZINT_FORMAT " in %s",
- (zint)recordOffset, fname);
- exit (1);
- }
- if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
- < recordAttr->recordSize)
- {
- yaz_log (YLOG_ERRNO|YLOG_FATAL, "read %d bytes of %s",
- recordAttr->recordSize, fname);
- exit (1);
- }
- }
- else
- {
- rec->info[recInfo_storeData] = NULL;
- rec->size[recInfo_storeData] = 0;
- }
- /* update database name */
- xfree (rec->info[recInfo_databaseName]);
- rec->info[recInfo_databaseName] =
- rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
-
- /* update offset */
- recordAttr->recordOffset = recordOffset;
-
- /* commit this record */
- rec_put (zh->reg->records, &rec);
- logRecord (zh);
- return ZEBRA_OK;
-}
-
ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname,
int deleteFlag)
{
char gprefix[128];
char ext[128];
char ext_res[128];
- struct file_read_info *fi;
+ struct file_read_info *fi = 0;
const char *original_record_type = 0;
RecType recType;
void *recTypeClientData;
+ struct ZebraRecStream stream, *streamp;
zebra_init_log_level();
yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
}
if (sysno && deleteFlag)
- fd = -1;
+ {
+ streamp = 0;
+ fi = 0;
+ }
else
{
char full_rep[1024];
zh->m_record_type = original_record_type;
return ZEBRA_FAIL;
}
+ streamp = &stream;
+ zebra_create_stream_fd(streamp, fd, 0);
}
- fi = file_read_start (fd);
while(1)
{
- fi->file_moffset = fi->file_offset;
- fi->file_more = 0; /* file_end not called (yet) */
- r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1,
- recType, recTypeClientData);
- if (fi->file_more)
- { /* file_end has been called so reset offset .. */
- fi->file_offset = fi->file_moffset;
- lseek(fi->fd, fi->file_moffset, SEEK_SET);
- }
+ r = zebra_extract_record_stream(zh, streamp,
+ deleteFlag,
+ 0, /* tst_mode */
+ zh->m_record_type,
+ sysno,
+ 0, /*match_criteria */
+ fname,
+ 1, /* force_update */
+ 1, /* allow_update */
+ recType, recTypeClientData);
if (r != ZEBRA_OK)
{
break;
break;
}
}
- file_read_stop (fi);
- if (fd != -1)
- close (fd);
+ if (streamp)
+ stream.destroy(streamp);
zh->m_record_type = original_record_type;
return r;
}
If not, and a record is provided, then sysno is got from there
*/
+
ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh,
const char *buf, size_t buf_size,
int delete_flag,
int force_update,
int allow_update)
{
- SYSNO sysno0 = 0;
- RecordAttr *recordAttr;
- struct recExtractCtrl extractCtrl;
- int r;
- const char *matchStr = 0;
- RecType recType = NULL;
+ struct ZebraRecStream stream;
+ ZEBRA_RES res;
void *clientData;
- Record rec;
- long recordOffset = 0;
- struct zebra_fetch_control fc;
- const char *pr_fname = fname; /* filename to print .. */
- int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
-
- zebra_init_log_level();
-
- if (!pr_fname)
- pr_fname = "<no file>"; /* make it printable if file is omitted */
-
- fc.fd = -1;
- fc.record_int_buf = buf;
- fc.record_int_len = buf_size;
- fc.record_int_pos = 0;
- fc.offset_end = 0;
- fc.record_offset = 0;
-
- extractCtrl.offset = 0;
- extractCtrl.readf = zebra_record_int_read;
- extractCtrl.seekf = zebra_record_int_seek;
- extractCtrl.tellf = zebra_record_int_tell;
- extractCtrl.endf = zebra_record_int_end;
- extractCtrl.first_record = 1;
- extractCtrl.fh = &fc;
+ RecType recType = 0;
- zebra_rec_keys_reset(zh->reg->keys);
- zebra_rec_keys_reset(zh->reg->sortKeys);
-
- if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
- {
- if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
- zh->m_explain_database))
- return ZEBRA_FAIL;
- }
-
if (recordType && *recordType)
{
yaz_log(log_level, "Record type explicitly specified: %s", recordType);
yaz_log (YLOG_WARN, "No such record type: %s", recordType);
return ZEBRA_FAIL;
}
-
- extractCtrl.init = extract_init;
- extractCtrl.tokenAdd = extract_token_add;
- extractCtrl.schemaAdd = extract_schema_add;
- extractCtrl.dh = zh->reg->dh;
- extractCtrl.handle = zh;
- extractCtrl.match_criteria[0] = '\0';
- extractCtrl.staticrank = 0;
-
- init_extractCtrl(zh, &extractCtrl);
- extract_set_store_data_prepare(&extractCtrl);
- r = (*recType->extract)(clientData, &extractCtrl);
- if (r == RECCTRL_EXTRACT_EOF)
- return ZEBRA_FAIL;
- else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+ zebra_create_stream_mem(&stream, buf, buf_size);
+
+ res = zebra_extract_record_stream(zh, &stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, clientData);
+ stream.destroy(&stream);
+ return res;
+}
+
+
+ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ SYSNO *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData)
+
+{
+ SYSNO sysno0 = 0;
+ RecordAttr *recordAttr;
+ struct recExtractCtrl extractCtrl;
+ int r;
+ const char *matchStr = 0;
+ Record rec;
+ off_t start_offset = 0;
+ const char *pr_fname = fname; /* filename to print .. */
+ int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
+
+ zebra_init_log_level();
+
+ if (!pr_fname)
+ pr_fname = "<no file>"; /* make it printable if file is omitted */
+
+ zebra_rec_keys_reset(zh->reg->keys);
+ zebra_rec_keys_reset(zh->reg->sortKeys);
+
+ if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
{
- /* error occured during extraction ... */
- yaz_log (YLOG_WARN, "extract error: generic");
- return ZEBRA_FAIL;
+ if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
+ zh->m_explain_database))
+ return ZEBRA_FAIL;
}
- else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
+
+ if (stream)
{
- /* error occured during extraction ... */
- yaz_log (YLOG_WARN, "extract error: no such filter");
- return ZEBRA_FAIL;
- }
+ off_t null_offset = 0;
+ extractCtrl.stream = stream;
+
+ start_offset = stream->tellf(stream);
- all_matches_add(&extractCtrl);
+ extractCtrl.first_record = start_offset ? 0 : 1;
- if (extractCtrl.match_criteria[0])
- match_criteria = extractCtrl.match_criteria;
+ stream->endf(stream, &null_offset);;
+
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = extract_token_add;
+ extractCtrl.schemaAdd = extract_schema_add;
+ extractCtrl.dh = zh->reg->dh;
+ extractCtrl.handle = zh;
+ extractCtrl.match_criteria[0] = '\0';
+ extractCtrl.staticrank = 0;
+
+ init_extractCtrl(zh, &extractCtrl);
+
+ extract_set_store_data_prepare(&extractCtrl);
+
+ r = (*recType->extract)(recTypeClientData, &extractCtrl);
+
+ if (r == RECCTRL_EXTRACT_EOF)
+ return ZEBRA_FAIL;
+ else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+ {
+ /* error occured during extraction ... */
+ yaz_log (YLOG_WARN, "extract error: generic");
+ return ZEBRA_FAIL;
+ }
+ else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
+ {
+ /* error occured during extraction ... */
+ yaz_log (YLOG_WARN, "extract error: no such filter");
+ return ZEBRA_FAIL;
+ }
+
+ all_matches_add(&extractCtrl);
+
+ if (extractCtrl.match_criteria[0])
+ match_criteria = extractCtrl.match_criteria;
+ }
if (!sysno) {
sysno = &sysno0;
/* new record */
if (delete_flag)
{
- yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
- pr_fname, (long) recordOffset);
+ yaz_log (YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
return ZEBRA_FAIL;
}
if (show_progress)
- yaz_log (YLOG_LOG, "add %s %s %ld", recordType, pr_fname,
- (long) recordOffset);
+ yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
+ (zint) start_offset);
rec = rec_new (zh->reg->records);
*sysno = rec->sysno;
zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
if (!allow_update)
{
- yaz_log (YLOG_LOG, "skipped %s %s %ld",
- recordType, pr_fname, (long) recordOffset);
+ yaz_log (YLOG_LOG, "skipped %s %s " ZINT_FORMAT,
+ recordType, pr_fname, (zint) start_offset);
logRecord(zh);
return ZEBRA_FAIL;
}
/* record going to be deleted */
if (zebra_rec_keys_empty(delkeys))
{
- yaz_log(YLOG_LOG, "delete %s %s %ld", recordType,
- pr_fname, (long) recordOffset);
+ yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
yaz_log(YLOG_WARN, "cannot delete file above, "
"storeKeys false (3)");
}
else
{
if (show_progress)
- yaz_log(YLOG_LOG, "delete %s %s %ld", recordType,
- pr_fname, (long) recordOffset);
+ yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
zh->records_deleted++;
if (matchStr)
{
else
{
if (show_progress)
- yaz_log(YLOG_LOG, "update %s %s %ld", recordType,
- pr_fname, (long) recordOffset);
+ yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) ZINT_FORMAT);
recordAttr->staticrank = extractCtrl.staticrank;
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
/* save file size of original record */
zebraExplain_recordBytesIncrement (zh->reg->zei,
- recordAttr->recordSize);
-#if 0
- recordAttr->recordSize = fi->file_moffset - recordOffset;
- if (!recordAttr->recordSize)
- recordAttr->recordSize = fi->file_max - recordOffset;
-#else
- recordAttr->recordSize = buf_size;
-#endif
- zebraExplain_recordBytesIncrement (zh->reg->zei,
- recordAttr->recordSize);
+ if (stream)
+ {
+ off_t end_offset = stream->endf(stream, 0);
+
+ if (!end_offset)
+ end_offset = stream->tellf(stream);
+ else
+ stream->seekf(stream, end_offset);
+
+ recordAttr->recordSize = end_offset - start_offset;
+ zebraExplain_recordBytesIncrement(zh->reg->zei,
+ recordAttr->recordSize);
+ }
/* set run-number for this record */
recordAttr->runNumber =
}
else if (zh->m_store_data)
{
+ off_t cur_offset = stream->tellf(stream);
+
rec->size[recInfo_storeData] = recordAttr->recordSize;
rec->info[recInfo_storeData] = (char *)
xmalloc (recordAttr->recordSize);
- memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
+ stream->seekf(stream, start_offset);
+ stream->readf(stream, rec->info[recInfo_storeData],
+ recordAttr->recordSize);
+ stream->seekf(stream, cur_offset);
}
else
{
rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
/* update offset */
- recordAttr->recordOffset = recordOffset;
+ recordAttr->recordOffset = start_offset;
/* commit this record */
rec_put (zh->reg->records, &rec);
-/* $Id: index.h,v 1.173 2006-08-16 13:16:36 adam Exp $
+/* $Id: index.h,v 1.174 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int force_update,
int allow_update);
+ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ SYSNO *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData);
+
#if 0
int extract_rec_in_mem (ZebraHandle zh, const char *recordType,
const char *buf, size_t buf_size,
#endif
void extract_flushWriteKeys (ZebraHandle zh, int final);
-struct zebra_fetch_control {
- off_t offset_end;
- off_t record_offset;
- off_t record_int_pos;
- const char *record_int_buf;
- int record_int_len;
- int fd;
-};
-
-int zebra_record_ext_read (void *fh, char *buf, size_t count);
-off_t zebra_record_ext_seek (void *fh, off_t offset);
-off_t zebra_record_ext_tell (void *fh);
-off_t zebra_record_int_seek (void *fh, off_t offset);
-off_t zebra_record_int_tell (void *fh);
-int zebra_record_int_read (void *fh, char *buf, size_t count);
-void zebra_record_int_end (void *fh, off_t offset);
-
+YAZ_EXPORT void zebra_create_stream_mem(struct ZebraRecStream *stream,
+ const char *buf, size_t sz);
+YAZ_EXPORT void zebra_create_stream_fd(struct ZebraRecStream *stream,
+ int fd, off_t start_offset);
void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys);
ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys,
-/* $Id: kcontrol.c,v 1.5 2006-08-16 13:16:36 adam Exp $
+/* $Id: kcontrol.c,v 1.6 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
struct rset_key_control *zebra_key_control_create(ZebraHandle zh)
{
- const char *res_val;
struct rset_key_control *kc = xmalloc(sizeof(*kc));
struct context_control *cp = xmalloc(sizeof(*cp));
-/* $Id: marcread.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: marcread.c,v 1.3 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
char *absynName;
data1_marctab *marctab;
- if ((*p->readf)(p->fh, buf, 5) != 5)
+ if (p->stream->readf(p->stream, buf, 5) != 5)
return NULL;
while (*buf < '0' || *buf > '9')
{
for (i = 0; i<4; i++)
buf[i] = buf[i+1];
- if ((*p->readf)(p->fh, buf+4, 1) != 1)
+ if (p->stream->readf(p->stream, buf+4, 1) != 1)
return NULL;
}
record_length = atoi_n (buf, 5);
return NULL;
}
/* read remaining part - attempt to read one byte furhter... */
- read_bytes = (*p->readf)(p->fh, buf+5, record_length-4);
+ read_bytes = p->stream->readf(p->stream, buf+5, record_length-4);
if (read_bytes < record_length-5)
{
yaz_log (YLOG_WARN, "Couldn't read whole MARC record");
}
if (read_bytes == record_length - 4)
{
- off_t cur_offset = (*p->tellf)(p->fh);
+ off_t cur_offset = p->stream->tellf(p->stream);
if (cur_offset <= 27)
return NULL;
- if (p->endf)
- (*p->endf)(p->fh, cur_offset - 1);
+ if (p->stream->endf)
+ {
+ off_t end_offset = cur_offset - 1;
+ p->stream->endf(p->stream, &end_offset);
+ }
}
absynName = mi->type;
res_root = data1_mk_root (p->dh, p->mem, absynName);
-/* $Id: recgrs.c,v 1.4 2006-08-14 10:40:15 adam Exp $
+/* $Id: recgrs.c,v 1.5 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int oidtmp[OID_SIZE];
RecWord wrd;
- gri.readf = p->readf;
- gri.seekf = p->seekf;
- gri.tellf = p->tellf;
- gri.endf = p->endf;
- gri.fh = p->fh;
- gri.offset = p->offset;
+ gri.stream = p->stream;
gri.mem = mem;
gri.dh = p->dh;
gri.clientData = clientData;
int dummy;
mem = nmem_create();
- gri.readf = p->readf;
- gri.seekf = p->seekf;
- gri.tellf = p->tellf;
- gri.endf = NULL;
- gri.fh = p->fh;
- gri.offset = 0;
+ gri.stream = p->stream;
gri.mem = mem;
gri.dh = p->dh;
gri.clientData = clientData;
-/* $Id: rectext.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: rectext.c,v 1.3 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
{
if (fi->max <= 0)
return 0;
- fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096);
+ fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096);
fi->offset = 0;
if (fi->max <= 0)
return 0;
*dst = fi->buf[(fi->offset)++];
if (tinfo->sep && *dst == *tinfo->sep)
{
- off_t off = (*fi->p->tellf)(fi->p->fh);
- (*fi->p->endf)(fi->p->fh, off - (fi->max - fi->offset));
+ off_t off = fi->p->stream->tellf(fi->p->stream);
+ off_t end_offset = off - (fi->max - fi->offset);
+ fi->p->stream->endf(fi->p->stream, &end_offset);
return 0;
}
return 1;
}
if (!make_body)
break;
- r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096);
+ r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096);
if (r <= 0)
break;
filter_ptr += r;
-/* $Id: regxread.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: regxread.c,v 1.3 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#if HAVE_TCL_H
Tcl_Interp *tcl_interp;
#endif
- void *f_win_fh;
- void (*f_win_ef)(void *, off_t);
+ struct ZebraRecStream *stream;
+ off_t (*f_win_ef)(struct ZebraRecStream *s, off_t *);
int f_win_start; /* first byte of buffer is this file offset */
int f_win_end; /* last byte of buffer is this offset - 1 */
int f_win_size; /* size of buffer */
char *f_win_buf; /* buffer itself */
- int (*f_win_rf)(void *, char *, size_t);
- off_t (*f_win_sf)(void *, off_t);
+ int (*f_win_rf)(struct ZebraRecStream *, char *, size_t);
+ off_t (*f_win_sf)(struct ZebraRecStream *, off_t);
struct lexConcatBuf *concatBuf;
int maxLevel;
}
if (off < 0 || start_pos >= spec->f_win_end)
{
- (*spec->f_win_sf)(spec->f_win_fh, start_pos);
+ (*spec->f_win_sf)(spec->stream, start_pos);
spec->f_win_start = start_pos;
if (!spec->f_win_buf)
spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
- *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
+ *size = (*spec->f_win_rf)(spec->stream, spec->f_win_buf,
spec->f_win_size);
spec->f_win_end = spec->f_win_start + *size;
}
for (i = 0; i<spec->f_win_end - start_pos; i++)
spec->f_win_buf[i] = spec->f_win_buf[i + off];
- r = (*spec->f_win_rf)(spec->f_win_fh,
+ r = (*spec->f_win_rf)(spec->stream,
spec->f_win_buf + i,
spec->f_win_size - i);
spec->f_win_start = start_pos;
{
if (spec->f_win_ef && *ptr != F_WIN_EOF)
{
+ off_t end_offset = *ptr;
#if REGX_DEBUG
yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr);
#endif
- (*spec->f_win_ef)(spec->f_win_fh, *ptr);
+ (*spec->f_win_ef)(spec->stream, &end_offset);
}
return NULL;
}
int res;
struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
struct lexSpec **curLexSpec = &specs->spec;
+ off_t start_offset;
#if REGX_DEBUG
yaz_log (YLOG_LOG, "grs_read_regx");
}
}
(*curLexSpec)->dh = p->dh;
- if (!p->offset)
+ start_offset = p->stream->tellf(p->stream);
+ if (start_offset == 0)
{
(*curLexSpec)->f_win_start = 0;
(*curLexSpec)->f_win_end = 0;
- (*curLexSpec)->f_win_rf = p->readf;
- (*curLexSpec)->f_win_sf = p->seekf;
- (*curLexSpec)->f_win_fh = p->fh;
- (*curLexSpec)->f_win_ef = p->endf;
+ (*curLexSpec)->f_win_rf = p->stream->readf;
+ (*curLexSpec)->f_win_sf = p->stream->seekf;
+ (*curLexSpec)->stream = p->stream;
+ (*curLexSpec)->f_win_ef = p->stream->endf;
(*curLexSpec)->f_win_size = 500000;
}
(*curLexSpec)->m = p->mem;
- return lexRoot (*curLexSpec, p->offset, "main");
+ return lexRoot (*curLexSpec, start_offset, "main");
}
static int extract_regx(void *clientData, struct recExtractCtrl *ctrl)
int res;
struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
struct lexSpec **curLexSpec = &specs->spec;
+ off_t start_offset;
#if REGX_DEBUG
yaz_log (YLOG_LOG, "grs_read_tcl");
}
}
(*curLexSpec)->dh = p->dh;
- if (!p->offset)
+ start_offset = p->stream->tellf(p->stream);
+ if (start_offset == 0)
{
(*curLexSpec)->f_win_start = 0;
(*curLexSpec)->f_win_end = 0;
- (*curLexSpec)->f_win_rf = p->readf;
- (*curLexSpec)->f_win_sf = p->seekf;
- (*curLexSpec)->f_win_fh = p->fh;
- (*curLexSpec)->f_win_ef = p->endf;
+ (*curLexSpec)->f_win_rf = p->stream->readf;
+ (*curLexSpec)->f_win_sf = p->stream->seekf;
+ (*curLexSpec)->stream = p->stream;
+ (*curLexSpec)->f_win_ef = p->stream->endf;
(*curLexSpec)->f_win_size = 500000;
}
(*curLexSpec)->m = p->mem;
- return lexRoot (*curLexSpec, p->offset, "main");
+ return lexRoot (*curLexSpec, start_offset, "main");
}
static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl)
-/* $Id: retrieve.c,v 1.43 2006-08-14 10:40:15 adam Exp $
+/* $Id: retrieve.c,v 1.44 2006-08-22 13:39:27 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#include "index.h"
#include <direntz.h>
-int zebra_record_ext_read (void *fh, char *buf, size_t count)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- return read (fc->fd, buf, count);
-}
-
-off_t zebra_record_ext_seek (void *fh, off_t offset)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- return lseek (fc->fd, offset + fc->record_offset, SEEK_SET);
-}
-
-off_t zebra_record_ext_tell (void *fh)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- return lseek (fc->fd, 0, SEEK_CUR) - fc->record_offset;
-}
-
-off_t zebra_record_int_seek (void *fh, off_t offset)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- return (off_t) (fc->record_int_pos = offset);
-}
-
-off_t zebra_record_int_tell (void *fh)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- return (off_t) fc->record_int_pos;
-}
-
-int zebra_record_int_read (void *fh, char *buf, size_t count)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- int l = fc->record_int_len - fc->record_int_pos;
- if (l <= 0)
- return 0;
- l = (l < (int) count) ? l : (int) count;
- memcpy (buf, fc->record_int_buf + fc->record_int_pos, l);
- fc->record_int_pos += l;
- return l;
-}
-
-void zebra_record_int_end (void *fh, off_t off)
-{
- struct zebra_fetch_control *fc = (struct zebra_fetch_control *) fh;
- fc->offset_end = off;
-}
-
int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score,
- zebra_snippets *hit_snippet, ODR stream,
+ zebra_snippets *hit_snippet, ODR odr,
oid_value input_format, Z_RecordComposition *comp,
oid_value *output_format, char **rec_bufp,
int *rec_lenp, char **basenamep,
char *fname, *file_type, *basename;
RecType rt;
struct recRetrieveCtrl retrieveCtrl;
- struct zebra_fetch_control fc;
+ struct ZebraRecStream stream;
RecordAttr *recordAttr;
void *clientData;
int raw_mode = 0;
sprintf(rec_str, ZINT_FORMAT, sysno);
*output_format = VAL_SUTRS;
*rec_lenp = strlen(rec_str);
- *rec_bufp = odr_strdup(stream, rec_str);
+ *rec_bufp = odr_strdup(odr, rec_str);
return 0;
}
rec = rec_get (zh->reg->records, sysno);
file_type = rec->info[recInfo_fileType];
fname = rec->info[recInfo_filename];
basename = rec->info[recInfo_databaseName];
- *basenamep = (char *) odr_malloc (stream, strlen(basename)+1);
+ *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
strcpy (*basenamep, basename);
if (comp && comp->which == Z_RecordComp_simple &&
}
*output_format = VAL_SUTRS;
*rec_lenp = wrbuf_len(wrbuf);
- *rec_bufp = odr_malloc(stream, *rec_lenp);
+ *rec_bufp = odr_malloc(odr, *rec_lenp);
memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
wrbuf_free(wrbuf, 1);
zebra_rec_keys_close(keys);
return 14;
}
yaz_log (YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d", sysno,score);
- retrieveCtrl.fh = &fc;
- fc.fd = -1;
+ retrieveCtrl.stream = &stream;
retrieveCtrl.fname = fname;
if (rec->size[recInfo_storeData] > 0)
{
- retrieveCtrl.readf = zebra_record_int_read;
- retrieveCtrl.seekf = zebra_record_int_seek;
- retrieveCtrl.tellf = zebra_record_int_tell;
- fc.record_int_len = rec->size[recInfo_storeData];
- fc.record_int_buf = rec->info[recInfo_storeData];
- fc.record_int_pos = 0;
- yaz_log (YLOG_DEBUG, "Internal retrieve. %d bytes", fc.record_int_len);
+ zebra_create_stream_mem(&stream, rec->info[recInfo_storeData],
+ rec->size[recInfo_storeData]);
if (raw_mode)
{
*output_format = VAL_SUTRS;
*rec_lenp = rec->size[recInfo_storeData];
- *rec_bufp = (char *) odr_malloc(stream, *rec_lenp);
+ *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
memcpy(*rec_bufp, rec->info[recInfo_storeData], *rec_lenp);
rec_rm (&rec);
+ stream.destroy(&stream);
return 0;
}
}
else
{
char full_rep[1024];
+ int fd;
if (zh->path_reg && !yaz_is_abspath (fname))
{
else
strcpy (full_rep, fname);
- if ((fc.fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
+ if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
{
yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
full_rep);
rec_rm (&rec);
+ stream.destroy(&stream);
return 14;
}
- fc.record_offset = recordAttr->recordOffset;
-
- retrieveCtrl.readf = zebra_record_ext_read;
- retrieveCtrl.seekf = zebra_record_ext_seek;
- retrieveCtrl.tellf = zebra_record_ext_tell;
- zebra_record_ext_seek (retrieveCtrl.fh, 0);
+ zebra_create_stream_fd(&stream, fd, recordAttr->recordOffset);
if (raw_mode)
{
*output_format = VAL_SUTRS;
*rec_lenp = recordAttr->recordSize;
- *rec_bufp = (char *) odr_malloc(stream, *rec_lenp);
- zebra_record_ext_read(&fc, *rec_bufp, *rec_lenp);
+ *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
+ stream.readf(&stream, *rec_bufp, *rec_lenp);
rec_rm (&rec);
- close (fc.fd);
- return 0;
+ stream.destroy(&stream);
+ return 0;
}
}
retrieveCtrl.localno = sysno;
retrieveCtrl.staticrank = recordAttr->staticrank;
retrieveCtrl.score = score;
retrieveCtrl.recordSize = recordAttr->recordSize;
- retrieveCtrl.odr = stream;
+ retrieveCtrl.odr = odr;
retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
retrieveCtrl.comp = comp;
retrieveCtrl.encoding = zh->record_encoding;
*output_format = retrieveCtrl.output_format;
*rec_bufp = (char *) retrieveCtrl.rec_buf;
*rec_lenp = retrieveCtrl.rec_len;
- if (fc.fd != -1)
- close (fc.fd);
+
+ stream.destroy(&stream);
+
rec_rm (&rec);
*addinfo = retrieveCtrl.addinfo;
return retrieveCtrl.diagnostic;
}
+
/*
* Local variables:
* c-basic-offset: 4
-/* $Id: safari.c,v 1.3 2006-08-16 13:16:36 adam Exp $
+/* $Id: safari.c,v 1.4 2006-08-22 13:39:28 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
{
if (fi->max <= 0)
return 0;
- fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096);
+ fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096);
fi->offset = 0;
if (fi->max <= 0)
return 0;
}
if (!make_body)
break;
- r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096);
+ r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096);
if (r <= 0)
break;
filter_ptr += r;
-/* $Id: sgmlread.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: sgmlread.c,v 1.3 2006-08-22 13:39:28 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int buf_size;
int size;
int off;
+ struct ZebraRecStream *stream;
off_t moffset;
- void *fh;
- int (*readf)(void *, char *, size_t);
WRBUF wrbuf;
};
p->moffset += p->off;
p->off = 0;
p->size = 0;
- res = (*p->readf)(p->fh, p->buf, p->buf_size);
+ res = p->stream->readf(p->stream, p->buf, p->buf_size);
if (res > 0)
{
p->size += res;
data1_node *node;
int res;
- sgi->moffset = p->offset;
- sgi->fh = p->fh;
- sgi->readf = p->readf;
+ sgi->moffset = p->stream->tellf(p->stream);
+ sgi->stream = p->stream;
sgi->off = 0;
sgi->size = 0;
- res = (*sgi->readf)(sgi->fh, sgi->buf, sgi->buf_size);
+ res = sgi->stream->readf(sgi->stream, sgi->buf, sgi->buf_size);
if (res > 0)
sgi->size += res;
else
return 0;
- node = data1_read_nodex (p->dh, p->mem, sgml_getc, sgi, sgi->wrbuf);
- if (node && p->endf)
- (*p->endf)(sgi->fh, sgi->moffset + sgi->off);
+ node = data1_read_nodex(p->dh, p->mem, sgml_getc, sgi, sgi->wrbuf);
+ if (node && p->stream->endf)
+ {
+ off_t end_offset = sgi->moffset + sgi->off;
+ p->stream->endf(sgi->stream, &end_offset);
+ }
return node;
}
--- /dev/null
+/* $Id: stream.c,v 1.1 2006-08-22 13:39:28 adam Exp $
+ Copyright (C) 1995-2006
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#include <stdio.h>
+#include <assert.h>
+
+#include <fcntl.h>
+#ifdef WIN32
+#include <io.h>
+#include <process.h>
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "index.h"
+
+struct zebra_mem_control {
+ off_t offset_end;
+ off_t record_int_pos;
+ const char *record_int_buf;
+ int record_int_len;
+};
+
+struct zebra_ext_control {
+ off_t offset_end;
+ off_t record_offset;
+ int fd;
+};
+
+static off_t zebra_mem_seek(struct ZebraRecStream *s, off_t offset)
+{
+ struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh;
+ return (off_t) (fc->record_int_pos = offset);
+}
+
+static off_t zebra_mem_tell(struct ZebraRecStream *s)
+{
+ struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh;
+ return (off_t) fc->record_int_pos;
+}
+
+static int zebra_mem_read(struct ZebraRecStream *s, char *buf, size_t count)
+{
+ struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh;
+ int l = fc->record_int_len - fc->record_int_pos;
+ if (l <= 0)
+ return 0;
+ l = (l < (int) count) ? l : (int) count;
+ memcpy (buf, fc->record_int_buf + fc->record_int_pos, l);
+ fc->record_int_pos += l;
+ return l;
+}
+
+static off_t zebra_mem_end(struct ZebraRecStream *s, off_t *offset)
+{
+ struct zebra_mem_control *fc = (struct zebra_mem_control *) s->fh;
+ if (offset)
+ fc->offset_end = *offset;
+ return fc->offset_end;
+}
+
+static void zebra_mem_destroy(struct ZebraRecStream *s)
+{
+ struct zebra_mem_control *fc = s->fh;
+ xfree(fc);
+}
+
+static int zebra_ext_read(struct ZebraRecStream *s, char *buf, size_t count)
+{
+ struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh;
+ return read(fc->fd, buf, count);
+}
+
+static off_t zebra_ext_seek(struct ZebraRecStream *s, off_t offset)
+{
+ struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh;
+ return lseek(fc->fd, offset + fc->record_offset, SEEK_SET);
+}
+
+static off_t zebra_ext_tell(struct ZebraRecStream *s)
+{
+ struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh;
+ return lseek(fc->fd, 0, SEEK_CUR) - fc->record_offset;
+}
+
+static void zebra_ext_destroy(struct ZebraRecStream *s)
+{
+ struct zebra_ext_control *fc = s->fh;
+ if (fc->fd != -1)
+ close(fc->fd);
+ xfree(fc);
+}
+
+static off_t zebra_ext_end(struct ZebraRecStream *s, off_t *offset)
+{
+ struct zebra_ext_control *fc = (struct zebra_ext_control *) s->fh;
+ if (offset)
+ fc->offset_end = *offset;
+ return fc->offset_end;
+}
+
+
+void zebra_create_stream_mem(struct ZebraRecStream *stream,
+ const char *buf, size_t sz)
+{
+ struct zebra_mem_control *fc = xmalloc(sizeof(*fc));
+ fc->record_int_buf = buf;
+ fc->record_int_len = sz;
+ fc->record_int_pos = 0;
+ fc->offset_end = 0;
+
+ stream->fh = fc;
+ stream->readf = zebra_mem_read;
+ stream->seekf = zebra_mem_seek;
+ stream->tellf = zebra_mem_tell;
+ stream->endf = zebra_mem_end;
+ stream->destroy = zebra_mem_destroy;
+}
+
+void zebra_create_stream_fd(struct ZebraRecStream *stream,
+ int fd, off_t start_offset)
+{
+ struct zebra_ext_control *fc = xmalloc(sizeof(*fc));
+
+ fc->fd = fd;
+ fc->record_offset = start_offset;
+ fc->offset_end = 0;
+
+ stream->fh = fc;
+ stream->readf = zebra_ext_read;
+ stream->seekf = zebra_ext_seek;
+ stream->tellf = zebra_ext_tell;
+ stream->endf = zebra_ext_end;
+ stream->destroy = zebra_ext_destroy;
+ zebra_ext_seek(stream, 0);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
-/* $Id: xmlread.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: xmlread.c,v 1.3 2006-08-22 13:39:28 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
}
-static int cb_external_entity (XML_Parser pparser,
- const char *context,
- const char *base,
- const char *systemId,
- const char *publicId)
+static int cb_external_entity(XML_Parser pparser,
+ const char *context,
+ const char *base,
+ const char *systemId,
+ const char *publicId)
{
struct user_info *ui = (struct user_info*) XML_GetUserData(pparser);
FILE *inf;
if (prefix)
yaz_log(ui->loglevel, "cb_ns_end %s", prefix);
}
-data1_node *zebra_read_xml (data1_handle dh,
- int (*rf)(void *, char *, size_t), void *fh,
- NMEM m)
+
+data1_node *zebra_read_xml(data1_handle dh,
+ struct ZebraRecStream *stream,
+ NMEM m)
{
XML_Parser parser;
struct user_info uinfo;
yaz_log (YLOG_WARN, "XML_GetBuffer fail");
break;
}
- r = (*rf)(fh, buf, XML_CHUNK);
+ r = stream->readf(stream, buf, XML_CHUNK);
if (r < 0)
{
/* error */
XML_Expat_Version expat_version;
};
-static data1_node *grs_read_xml (struct grs_read_info *p)
+static data1_node *grs_read_xml(struct grs_read_info *p)
{
- return zebra_read_xml (p->dh, p->readf, p->fh, p->mem);
+ return zebra_read_xml(p->dh, p->stream, p->mem);
}
static void *filter_init(Res res, RecType recType)
# Zebra makefile for MS NMAKE
-# $Id: makefile,v 1.54 2006-08-14 18:12:35 adam Exp $
+# $Id: makefile,v 1.55 2006-08-22 13:39:28 adam Exp $
###########################################################
############### Parameters
$(OBJDIR)\dopen.obj \
$(OBJDIR)\drdwr.obj \
$(OBJDIR)\extract.obj \
+ $(OBJDIR)\flock.obj \
$(OBJDIR)\imalloc.obj \
$(OBJDIR)\inline.obj \
$(OBJDIR)\insert.obj \
$(OBJDIR)\kcontrol.obj \
$(OBJDIR)\kinput.obj \
$(OBJDIR)\limit.obj \
- $(OBJDIR)\flock.obj \
$(OBJDIR)\lookgrep.obj \
$(OBJDIR)\lookup.obj \
$(OBJDIR)\lookupec.obj \
$(OBJDIR)\snippet.obj \
$(OBJDIR)\sortidx.obj \
$(OBJDIR)\states.obj \
+ $(OBJDIR)\stream.obj \
$(OBJDIR)\symtab.obj \
+ $(OBJDIR)\trunc.obj \
$(OBJDIR)\update_path.obj \
$(OBJDIR)\update_file.obj \
- $(OBJDIR)\trunc.obj \
$(OBJDIR)\xmlread.obj \
$(OBJDIR)\xpath.obj \
$(OBJDIR)\zebra-lock.obj \