2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.16 1996-02-02 13:43:50 adam
8 * The public functions simply use char instead of Dict_char to represent
9 * search strings. Dict_char is used internally only.
11 * Revision 1.15 1996/02/01 20:39:59 adam
12 * Bug fix: insert didn't work on 8-bit characters due to unsigned char
13 * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is
16 * Revision 1.14 1995/12/07 11:48:56 adam
17 * Insert operation obeys DICT_type = 1 (slack in page).
18 * Function dict_open exists if page size or magic aren't right.
20 * Revision 1.13 1995/11/28 09:06:37 adam
21 * Fixed potential dangling pointer.
23 * Revision 1.12 1995/09/06 10:34:44 adam
24 * Memcpy in clean_page edited to satisfy checkergcc.
26 * Revision 1.11 1995/09/04 12:33:31 adam
27 * Various cleanup. YAZ util used instead.
29 * Revision 1.10 1994/10/05 12:16:48 adam
30 * Pagesize is a resource now.
32 * Revision 1.9 1994/09/16 15:39:13 adam
33 * Initial code of lookup - not tested yet.
35 * Revision 1.8 1994/09/16 12:35:01 adam
36 * New version of split_page which use clean_page for splitting.
38 * Revision 1.7 1994/09/12 08:06:42 adam
39 * Futher development of insert.c
41 * Revision 1.6 1994/09/06 13:05:15 adam
42 * Further development of insertion. Some special cases are
43 * not properly handled yet! assert(0) are put here. The
44 * binary search in each page definitely reduce usr CPU.
46 * Revision 1.5 1994/09/01 17:49:39 adam
47 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
49 * Revision 1.4 1994/09/01 17:44:09 adam
50 * depend include change.
52 * Revision 1.3 1994/08/18 12:40:56 adam
53 * Some development of dictionary. Not finished at all!
55 * Revision 1.2 1994/08/17 13:32:19 adam
56 * Use cache in dict - not in bfile.
58 * Revision 1.1 1994/08/16 16:26:48 adam
72 static int dict_ins (Dict dict, const Dict_char *str,
73 Dict_ptr back_ptr, int userlen, void *userinfo);
74 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
75 Dict_ptr subptr, char *userinfo);
78 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
81 Dict_ptr ptr = dict->head.free_list;
82 if (dict->head.free_list == dict->head.last)
84 dict->head.free_list++;
85 dict->head.last = dict->head.free_list;
86 dict_bf_newp (dict->dbf, ptr, &p);
90 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
91 dict->head.free_list = DICT_nextptr(p);
92 if (dict->head.free_list == 0)
93 dict->head.free_list = dict->head.last;
97 DICT_backptr(p) = back_ptr;
100 DICT_size(p) = DICT_infoffset;
106 static int split_page (Dict dict, Dict_ptr ptr, void *p)
112 short *indxp, *best_indxp = NULL;
113 Dict_char best_char = 0;
114 Dict_char prev_char = 0;
115 int best_no = -1, no_current = 1;
117 /* determine splitting char... */
118 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
119 for (i = DICT_nodir (p); --i >= 0; --indxp)
121 if (*indxp > 0) /* tail string here! */
125 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
127 { /* first entry met */
128 best_char = prev_char = dc;
131 else if (prev_char == dc)
132 { /* same char prefix. update */
133 if (++no_current > best_no)
134 { /* best entry so far */
135 best_no = no_current;
141 { /* new char prefix. restore */
147 if (best_no < 0) /* we didn't find any tail string entry at all! */
150 j = best_indxp - (short*) p;
151 subptr = new_page (dict, ptr, &subp);
152 /* scan entries to see if there is a string with */
153 /* length 1. info_here indicates if such entry exist */
155 for (i=0; i<best_no; i++, j++)
162 info = (char*) p + ((short*) p)[j];
164 memcpy (&dc, info, sizeof(dc));
165 assert (dc == best_char);
166 slen = dict_strlen((Dict_char*) info);
172 info_here = info+(slen+1)*sizeof(Dict_char);
176 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
177 dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)),
178 subptr, *info1, info1+1);
179 dict_bf_readp (dict->dbf, ptr, &p);
182 /* now clean the page ... */
183 clean_page (dict, ptr, p, &best_char, subptr, info_here);
187 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
188 Dict_ptr subptr, char *userinfo)
190 char *np = xmalloc (dict->head.page_size);
192 short *indxp1, *indxp2;
195 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
196 indxp2 = (short*) ((char*) np+DICT_pagesize(dict));
197 info2 = (char*) np + DICT_infoffset;
198 for (i = DICT_nodir (p); --i >= 0; --indxp1)
200 if (*indxp1 > 0) /* tail string here! */
202 /* string (Dict_char *) DICT_EOS terminated */
203 /* unsigned char length of information */
204 /* char * information */
206 info1 = (char*) p + *indxp1;
207 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
211 *--indxp2 = -(info2 - np);
212 memcpy (info2, &subptr, sizeof(Dict_ptr));
213 info2 += sizeof(Dict_ptr);
214 memcpy (info2, out, sizeof(Dict_char));
215 info2 += sizeof(Dict_char);
218 memcpy (info2, userinfo, *userinfo+1);
219 info2 += *userinfo + 1;
227 *--indxp2 = info2 - np;
228 slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
229 memcpy (info2, info1, slen);
235 /* Dict_ptr subptr */
236 /* Dict_char sub char */
237 /* unsigned char length of information */
238 /* char * information */
240 assert (*indxp1 < 0);
241 *--indxp2 = -(info2 - np);
242 info1 = (char*) p - *indxp1;
243 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
244 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
245 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
248 memcpy (info2, info1, slen);
253 memcpy ((char*)p+DICT_infoffset,
254 (char*)np+DICT_infoffset,
255 info2 - ((char*)np+DICT_infoffset));
256 memcpy ((char*)p + ((char*)indxp2 - (char*)np),
258 ((char*) np+DICT_pagesize(dict)) - (char*)indxp2);
260 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
261 DICT_pagesize(dict)-DICT_infoffset);
263 DICT_size(p) = info2 - np;
267 dict_bf_touch (dict->dbf, ptr);
272 /* return 0 if new */
273 /* return 1 if before but change of info */
274 /* return 2 if same as before */
276 static int dict_ins (Dict dict, const Dict_char *str,
277 Dict_ptr back_ptr, int userlen, void *userinfo)
279 int hi, lo, mid, slen, cmp = 1;
280 Dict_ptr ptr = back_ptr;
286 ptr = new_page (dict, back_ptr, &p);
288 dict_bf_readp (dict->dbf, ptr, &p);
294 hi = DICT_nodir(p)-1;
295 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
301 /* string (Dict_char *) DICT_EOS terminated */
302 /* unsigned char length of information */
303 /* char * information */
304 info = (char*)p + indxp[-mid];
305 cmp = dict_strcmp((Dict_char*) info, str);
308 info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
309 /* consider change of userinfo length... */
310 if (*info == userlen)
312 if (memcmp (info+1, userinfo, userlen))
314 dict_bf_touch (dict->dbf, ptr);
315 memcpy (info+1, userinfo, userlen);
320 else if (*info > userlen)
324 dict_bf_touch (dict->dbf, ptr);
325 memcpy (info+1, userinfo, userlen);
336 /* Dict_ptr subptr */
337 /* Dict_char sub char */
338 /* unsigned char length of information */
339 /* char * information */
340 info = (char*)p - indxp[-mid];
341 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
345 memcpy (&subptr, info, sizeof(Dict_ptr));
346 if (*++str == DICT_EOS)
350 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
353 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
356 dict_bf_touch (dict->dbf, ptr);
357 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
363 else if (xlen > userlen)
366 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
367 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
369 dict_bf_touch (dict->dbf, ptr);
372 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
374 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short))
376 if (DICT_type(p) == 1)
378 clean_page (dict, ptr, p, NULL, 0, NULL);
379 return dict_ins (dict, str-1, ptr,
382 if (split_page (dict, ptr, p))
384 logf (LOG_FATAL, "Unable to split page %d\n", ptr);
387 return dict_ins (dict, str-1, ptr, userlen, userinfo);
391 info = (char*)p + DICT_size(p);
392 memcpy (info, &subptr, sizeof(subptr));
393 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
394 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
395 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
397 indxp[-mid] = -DICT_size(p);
398 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
401 dict_bf_touch (dict->dbf, ptr);
411 subptr = new_page (dict, ptr, NULL);
412 memcpy (info, &subptr, sizeof(subptr));
413 dict_bf_touch (dict->dbf, ptr);
415 return dict_ins (dict, str, subptr, userlen, userinfo);
425 if (lo>hi && cmp < 0)
427 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
428 if (DICT_size(p)+slen+userlen >=
429 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
433 clean_page (dict, ptr, p, NULL, 0, NULL);
434 return dict_ins (dict, str, ptr, userlen, userinfo);
436 split_page (dict, ptr, p);
437 return dict_ins (dict, str, ptr, userlen, userinfo);
443 indxp1 = (short*)((char*) p + DICT_pagesize(dict)
444 - DICT_nodir(p)*sizeof(short));
445 for (; indxp1 != indxp; indxp1++)
446 indxp1[0] = indxp1[1];
448 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
449 for (i = DICT_nodir (p); --i >= 0; --indxp1)
453 info = (char*)p - *indxp1;
454 assert (info[sizeof(Dict_ptr)] > ' ');
461 info = (char*)p + DICT_size(p);
462 memcpy (info, str, slen);
465 memcpy (info, userinfo, userlen);
468 *indxp = DICT_size(p);
469 DICT_size(p) = info- (char*) p;
470 dict_bf_touch (dict->dbf, ptr);
476 int dict_insert (Dict dict, const char *str, int userlen, void *userinfo)
478 assert (dict->head.last > 0);
479 if (dict->head.last == 1)
480 return dict_ins (dict, (const Dict_char *) str, 0, userlen, userinfo);
482 return dict_ins (dict, (const Dict_char *) str, 1, userlen, userinfo);