X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=e3255287e911323efb4939fc1bcd955a5d70411c;hb=ae2d0647bd3a555df77397b6c2f9c129fda0733a;hp=939218d38a5bf568c2a9501b237a3f7941fa7aa3;hpb=271a10a59d479e149b9cd47258b9bf74c6690526;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 939218d..e325528 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.127 2002-10-23 14:28:20 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: extract.c,v 1.144 2003-04-15 16:46:18 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps This file is part of the Zebra server. @@ -361,8 +361,9 @@ static char *fileMatchStr (ZebraHandle zh, spec_src = rGroup->groupName; else if (!strcmp (special, "database")) spec_src = rGroup->databaseName; - else if (!strcmp (special, "filename")) + else if (!strcmp (special, "filename")) { spec_src = fname; + } else if (!strcmp (special, "type")) spec_src = rGroup->recordType; else @@ -418,7 +419,8 @@ static int recordExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, struct recordGroup *rGroup, int deleteFlag, struct file_read_info *fi, - RecType recType, char *subType, void *clientData) + RecType recType, char *subType, void *clientData, + int force_update) { RecordAttr *recordAttr; int r; @@ -439,8 +441,6 @@ static int recordExtract (ZebraHandle zh, zh->reg->keys.prevAttrSet = -1; zh->reg->keys.prevSeqNo = 0; zh->reg->sortKeys.buf_used = 0; - zh->reg->sortKeys.buf_max = 0; - zh->reg->sortKeys.buf = 0; recordOffset = fi->file_moffset; extractCtrl.offset = fi->file_moffset; @@ -581,7 +581,7 @@ static int recordExtract (ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); - if (recordAttr->runNumber == + if (!force_update && recordAttr->runNumber == zebraExplain_runNumberIncrement (zh->reg->zei, 0)) { yaz_log (LOG_LOG, "run number = %d", recordAttr->runNumber); @@ -883,15 +883,13 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, { file_begin (fi); r = recordExtract (zh, sysno, fname, rGroup, deleteFlag, fi, - recType, subType, clientData); + recType, subType, clientData, 1); } while (r && !sysno && fi->file_more); file_read_stop (fi); if (fd != -1) close (fd); return r; } - - int extract_rec_in_mem (ZebraHandle zh, const char *recordType, const char *buf, size_t buf_size, const char *databaseName, int delete_flag, @@ -899,14 +897,56 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, int store_keys, int store_data, const char *match_criteria) { + struct recordGroup rGroup; + rGroup.groupName = NULL; + rGroup.databaseName = (char *)databaseName; + rGroup.path = NULL; + rGroup.recordId = NULL; + rGroup.recordType = (char *)recordType; + rGroup.flagStoreData = store_data; + rGroup.flagStoreKeys = store_keys; + rGroup.flagRw = 1; + rGroup.databaseNamePath = 0; + rGroup.explainDatabase = 0; + rGroup.fileVerboseLimit = 100000; + rGroup.followLinks = -1; + return (bufferExtractRecord (zh, + buf, buf_size, + &rGroup, + delete_flag, + test_mode, + recordType, + sysno, + match_criteria, + "", + 0,1)); +} +/* + If sysno is provided, then it's used to identify the reocord. + If not, and match_criteria is provided, then sysno is guessed + If not, and a record is provided, then sysno is got from there + + */ +int bufferExtractRecord (ZebraHandle zh, + const char *buf, size_t buf_size, + struct recordGroup *rGroup, + int delete_flag, + int test_mode, + const char *recordType, + int *sysno, + const char *match_criteria, + const char *fname, + int force_update, + int allow_update) + +{ RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int i, r; char *matchStr = 0; - RecType recType; + RecType recType = NULL; char subType[1024]; void *clientData; - const char *fname = ""; Record rec; long recordOffset = 0; struct zebra_fetch_control fc; @@ -925,26 +965,45 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, extractCtrl.endf = zebra_record_int_end; extractCtrl.fh = &fc; - /* announce database */ - if (zebraExplain_curDatabase (zh->reg->zei, databaseName)) - { - if (zebraExplain_newDatabase (zh->reg->zei, databaseName, 0)) - return 0; - } - if (!(recType = - recType_byName (zh->reg->recTypes, recordType, subType, - &clientData))) - { - logf (LOG_WARN, "No such record type: %s", recordType); - return 0; - } - zh->reg->keys.buf_used = 0; zh->reg->keys.prevAttrUse = -1; zh->reg->keys.prevAttrSet = -1; zh->reg->keys.prevSeqNo = 0; zh->reg->sortKeys.buf_used = 0; + /* announce database */ + + if (!(rGroup->databaseName)) { + logf (LOG_WARN, "Invalid record group, no database name given"); + return 0; + } + + if (zebraExplain_curDatabase (zh->reg->zei, rGroup->databaseName)) + { + if (zebraExplain_newDatabase (zh->reg->zei, rGroup->databaseName, 0)) + return 0; + } + + if (*recordType) { + logf (LOG_DEBUG, "Record type explicitly specified: %s", recordType); + recType = recType_byName (zh->reg->recTypes, recordType, subType, + &clientData); + } else { + if (!(rGroup->recordType)) { + logf (LOG_WARN, "No such record type defined"); + return 0; + } + logf (LOG_DEBUG, "Get record type from rgroup: %s",rGroup->recordType); + recType = recType_byName (zh->reg->recTypes, rGroup->recordType, subType, + &clientData); + recordType = rGroup->recordType; + } + + if (!recType) { + logf (LOG_WARN, "No such record type: %s", rGroup->recordType); + return 0; + } + extractCtrl.subType = subType; extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; @@ -988,6 +1047,27 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, return 1; } /* match criteria */ + matchStr = NULL; + + if (! *sysno && match_criteria) { + char *rinfo; + if (*match_criteria) { + matchStr = (char *)match_criteria; + } else { + if (rGroup->recordId && *rGroup->recordId) { + matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, + rGroup->recordId); + } + } + if (matchStr) { + rinfo = dict_lookup (zh->reg->matchDict, matchStr); + if (rinfo) + memcpy (sysno, rinfo+1, sizeof(*sysno)); + } else { + logf (LOG_WARN, "Bad match criteria (recordID)"); + return 0; + } + } if (! *sysno) { @@ -1014,28 +1094,47 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, } extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); - } + + zh->records_inserted++; + } else { /* record already exists */ struct recKeys delkeys; + struct sortKeys sortKeys; + + if (!allow_update) { + logf (LOG_LOG, "skipped %s %s %ld", + recordType, fname, (long) recordOffset); + logRecord(zh); + return -1; + } rec = rec_get (zh->reg->records, *sysno); assert (rec); recordAttr = rec_init_attr (zh->reg->zei, rec); - if (recordAttr->runNumber == - zebraExplain_runNumberIncrement (zh->reg->zei, 0)) - { - logf (LOG_LOG, "skipped %s %s %ld", recordType, - fname, (long) recordOffset); - rec_rm (&rec); - return 1; + if (!force_update) { + if (recordAttr->runNumber == + zebraExplain_runNumberIncrement (zh->reg->zei, 0)) + { + logf (LOG_LOG, "skipped %s %s %ld", recordType, + fname, (long) recordOffset); + extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); + rec_rm (&rec); + logRecord(zh); + return 1; + } } + delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; - extract_flushSortKeys (zh, *sysno, 0, &zh->reg->sortKeys); + + sortKeys.buf_used = rec->size[recInfo_sortKeys]; + sortKeys.buf = rec->info[recInfo_sortKeys]; + + extract_flushSortKeys (zh, *sysno, 0, &sortKeys); extract_flushRecordKeys (zh, *sysno, 0, &delkeys); if (delete_flag) { @@ -1050,13 +1149,13 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, { logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); -#if 0 + zh->records_deleted++; if (matchStr) - dict_delete (matchDict, matchStr); -#endif + dict_delete (zh->reg->matchDict, matchStr); rec_del (zh->reg->records, &rec); } rec_rm (&rec); + logRecord(zh); return 1; } else @@ -1074,6 +1173,7 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, fname, (long) recordOffset); extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); + zh->records_updated++; } } } @@ -1089,7 +1189,7 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, /* update delete keys */ xfree (rec->info[recInfo_delKeys]); - if (zh->reg->keys.buf_used > 0 && store_keys == 1) + if (zh->reg->keys.buf_used > 0 && rGroup->flagStoreKeys == 1) { rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; @@ -1102,6 +1202,14 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, rec->size[recInfo_delKeys] = 0; } + /* update sort keys */ + xfree (rec->info[recInfo_sortKeys]); + + rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; + rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; + zh->reg->sortKeys.buf = NULL; + zh->reg->sortKeys.buf_max = 0; + /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, - recordAttr->recordSize); @@ -1121,7 +1229,7 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, /* update store data */ xfree (rec->info[recInfo_storeData]); - if (store_data == 1) + if (rGroup->flagStoreData == 1) { rec->size[recInfo_storeData] = recordAttr->recordSize; rec->info[recInfo_storeData] = (char *) @@ -1152,17 +1260,19 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, /* update database name */ xfree (rec->info[recInfo_databaseName]); rec->info[recInfo_databaseName] = - rec_strdup (databaseName, &rec->size[recInfo_databaseName]); + rec_strdup (rGroup->databaseName, &rec->size[recInfo_databaseName]); /* update offset */ recordAttr->recordOffset = recordOffset; /* commit this record */ rec_put (zh->reg->records, &rec); - + logRecord(zh); return 0; } + + int explain_extract (void *handle, Record rec, data1_node *n) { ZebraHandle zh = (ZebraHandle) handle; @@ -1503,22 +1613,18 @@ void extract_add_index_string (RecWord *p, const char *string, static void extract_add_sort_string (RecWord *p, const char *string, int length) { -#if 1 ZebraHandle zh = p->extractCtrl->handle; struct sortKeys *sk = &zh->reg->sortKeys; - size_t off = 0; - int slen; + int off = 0; while (off < sk->buf_used) { - int set, use, l; + int set, use, slen; - l = key_SU_decode(&set, sk->buf + off); - off += l; - l = key_SU_decode(&use, sk->buf + off); - off += l; - l = key_SU_decode(&slen, sk->buf + off); - off += l + slen; + off += key_SU_decode(&set, sk->buf + off); + off += key_SU_decode(&use, sk->buf + off); + off += key_SU_decode(&slen, sk->buf + off); + off += slen; if (p->attrSet == set && p->attrUse == use) return; } @@ -1536,30 +1642,9 @@ static void extract_add_sort_string (RecWord *p, const char *string, } off += key_SU_encode(p->attrSet, sk->buf + off); off += key_SU_encode(p->attrUse, sk->buf + off); - slen = strlen(string); - off += key_SU_encode(slen, sk->buf + off); - memcpy (sk->buf + off, string, slen); - sk->buf_used = off + slen; -#else - struct sortKey *sk; - ZebraHandle zh = p->extractCtrl->handle; - - - for (sk = zh->reg->sortKeys; sk; sk = sk->next) - if (sk->attrSet == p->attrSet && sk->attrUse == p->attrUse) - return; - - sk = (struct sortKey *) xmalloc (sizeof(*sk)); - sk->next = zh->reg->sortKeys; - zh->reg->sortKeys = sk; - - sk->string = (char *) xmalloc (length); - sk->length = length; - memcpy (sk->string, string, length); - - sk->attrSet = p->attrSet; - sk->attrUse = p->attrUse; -#endif + off += key_SU_encode(length, sk->buf + off); + memcpy (sk->buf + off, string, length); + sk->buf_used = off + length; } void extract_add_string (RecWord *p, const char *string, int length) @@ -1665,6 +1750,12 @@ static void extract_add_complete_field (RecWord *p) void extract_token_add (RecWord *p) { WRBUF wrbuf; + +#if 0 + yaz_log (LOG_LOG, "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s", + p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length, + p->string); +#endif if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0, p->string, p->length))) { @@ -1687,13 +1778,13 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct sortKeys *sk) { SortIdx sortIdx = zh->reg->sortIdx; - size_t off = 0; + int off = 0; sortIdx_sysno (sortIdx, sysno); while (off < sk->buf_used) { - int set, use, slen, l; + int set, use, slen; off += key_SU_decode(&set, sk->buf + off); off += key_SU_decode(&use, sk->buf + off); @@ -1743,6 +1834,7 @@ char *encode_key_int (int d, char *bp) } return bp; } +#define OLDENCODE 1 #ifdef OLDENCODE /* this is the old encode_key_write @@ -1798,6 +1890,7 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf) while ((*bp++ = *k++)) ; i->keylen= bp - i->buf -1; + assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN); } else { @@ -1845,25 +1938,6 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf) i->prevseq=key.seqno; i->prevcmd=*k; } -#ifdef SKIPTHIS_OLDCODE - bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp); - if (i->sysno != key.sysno) - { - i->sysno = key.sysno; - i->seqno = 0; - } - else if (!i->seqno && !key.seqno && i->cmd == *k) - return; - bp = encode_key_int (key.seqno - i->seqno, bp); - i->seqno = key.seqno; - i->cmd = *k; - if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) - { - logf (LOG_FATAL|LOG_ERRNO, "fwrite"); - exit (1); - } - i->keylen=0; /* ok, it's written, forget it */ -#endif } void encode_key_flush (struct encode_info *i, FILE *outf)