2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto
13 #include "../isamc/isamd-p.h"
15 struct inv_stat_info {
17 int no_isam_entries[9];
21 int isam_occurrences[20];
23 int isamb_levels[10][5];
29 #define SINGLETON_TYPE 8 /* the type to use for singletons that */
30 /* have no block and no block type */
32 static int inv_stat_handle (char *name, const char *info, int pos,
37 struct inv_stat_info *stat_info = (struct inv_stat_info*) client;
40 stat_info->no_dict_entries++;
41 stat_info->no_dict_bytes += strlen(name);
43 assert (*info == sizeof(ISAMS_P));
44 memcpy (&isam_p, info+1, sizeof(ISAMS_P));
46 if (stat_info->zh->reg->isams)
52 pp = isams_pp_open (stat_info->zh->reg->isams, isam_p);
53 occur = isams_pp_num (pp);
54 while (isams_pp_read(pp, &key))
56 stat_info->cksum = stat_info->cksum * 65509 +
57 key.sysno + 11 * key.seqno;
60 assert (occurx == occur);
61 stat_info->no_isam_entries[0] += occur;
64 if (stat_info->zh->reg->isam)
68 ispt = is_position (stat_info->zh->reg->isam, isam_p);
69 occur = is_numkeys (ispt);
70 stat_info->no_isam_entries[is_type(isam_p)] += occur;
73 if (stat_info->zh->reg->isamc)
79 pp = isc_pp_open (stat_info->zh->reg->isamc, isam_p);
80 occur = isc_pp_num (pp);
81 while (isc_pp_read(pp, &key))
83 stat_info->cksum = stat_info->cksum * 65509 +
84 key.sysno + 11 * key.seqno;
87 assert (occurx == occur);
88 stat_info->no_isam_entries[isc_type(isam_p)] += occur;
91 if (stat_info->zh->reg->isamd)
97 pp = isamd_pp_open (stat_info->zh->reg->isamd, isam_p);
99 occur = isamd_pp_num (pp);
100 while (isamd_pp_read(pp, &key))
102 stat_info->cksum = stat_info->cksum * 65509 +
103 key.sysno + 11 * key.seqno;
105 if ( pp->is->method->debug >8 )
106 logf (LOG_LOG,"sysno=%d seqno=%d (%x/%x) oc=%d/%d ofs=%d ",
107 key.sysno, key.seqno,
108 key.sysno, key.seqno,
109 occur,occurx, pp->offset);
111 if ( pp->is->method->debug >7 )
112 logf(LOG_LOG,"item %d=%d:%d says %d keys, counted %d",
113 isam_p, isamd_type(isam_p), isamd_block(isam_p),
116 logf(LOG_LOG,"Count error!!! read %d, counted %d", occur, occurx);
117 assert (occurx == occur);
118 if ( is_singleton(isam_p) )
119 stat_info->no_isam_entries[SINGLETON_TYPE] += occur;
121 stat_info->no_isam_entries[isamd_type(isam_p)] += occur;
124 if (stat_info->zh->reg->isamb)
128 int cat = isam_p & 3;
133 pp = isamb_pp_open_x(stat_info->zh->reg->isamb, isam_p, &level);
135 while (isamb_pp_read(pp, &key))
137 stat_info->cksum = stat_info->cksum * 65509 +
138 key.sysno + 11 * key.seqno;
141 isamb_pp_close_x (pp, &size, &blocks);
142 stat_info->isamb_blocks[cat] += blocks;
143 stat_info->isamb_sizes[cat] += size;
146 stat_info->isamb_levels[cat][level] ++;
147 stat_info->no_isam_entries[cat] += occur;
150 while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i])
152 ++(stat_info->isam_occurrences[i]);
156 void zebra_register_statistics (ZebraHandle zh)
164 int after = 1000000000;
165 struct inv_stat_info stat_info;
166 char term_dict[2*IT_MAX_WORD+2];
168 if (zebra_begin_read (zh))
176 for (i = 0; i<=SINGLETON_TYPE; i++)
177 stat_info.no_isam_entries[i] = 0;
178 stat_info.no_dict_entries = 0;
179 stat_info.no_dict_bytes = 0;
180 stat_info.isam_bounds[0] = 1;
181 stat_info.isam_bounds[1] = 2;
182 stat_info.isam_bounds[2] = 3;
183 stat_info.isam_bounds[3] = 6;
184 stat_info.isam_bounds[4] = 10;
185 stat_info.isam_bounds[5] = 20;
186 stat_info.isam_bounds[6] = 30;
187 stat_info.isam_bounds[7] = 50;
188 stat_info.isam_bounds[8] = 100;
189 stat_info.isam_bounds[9] = 200;
190 stat_info.isam_bounds[10] = 5000;
191 stat_info.isam_bounds[11] = 10000;
192 stat_info.isam_bounds[12] = 20000;
193 stat_info.isam_bounds[13] = 50000;
194 stat_info.isam_bounds[14] = 100000;
195 stat_info.isam_bounds[15] = 200000;
196 stat_info.isam_bounds[16] = 500000;
197 stat_info.isam_bounds[17] = 1000000;
198 stat_info.isam_bounds[18] = 0;
202 for (i = 0; i<20; i++)
203 stat_info.isam_occurrences[i] = 0;
205 for (i = 0; i<10; i++)
208 for (j = 0; j<5; j++)
209 stat_info.isamb_levels[i][j] = 0;
210 stat_info.isamb_sizes[i] = 0;
211 stat_info.isamb_blocks[i] = 0;
214 dict_scan (zh->reg->dict, term_dict, &before, &after, &stat_info,
219 fprintf (stdout, " Blocks Occur Size KB Bytes/Entry\n");
220 for (i = 0; isc_block_used (zh->reg->isamc, i) >= 0; i++)
222 fprintf (stdout, " %8d %8d", isc_block_used (zh->reg->isamc, i),
223 stat_info.no_isam_entries[i]);
225 if (stat_info.no_isam_entries[i])
226 fprintf (stdout, " %8d %f",
227 (int) ((1023.0 + (double)
228 isc_block_used(zh->reg->isamc, i) *
229 isc_block_size(zh->reg->isamc,i))/1024),
230 ((double) isc_block_used(zh->reg->isamc, i) *
231 isc_block_size(zh->reg->isamc,i))/
232 stat_info.no_isam_entries[i]);
233 fprintf (stdout, "\n");
238 fprintf (stdout, " Blocks Occur KB Bytes/Entry\n");
239 if (zh->reg->isamd->method->debug >0)
240 logf(LOG_LOG," Blocks Occur KB Bytes/Entry");
241 for (i = 0; i<=SINGLETON_TYPE; i++)
243 blocks= isamd_block_used(zh->reg->isamd,i);
244 size= isamd_block_size(zh->reg->isamd,i);
245 count=stat_info.no_isam_entries[i];
246 if (i==SINGLETON_TYPE)
248 if (stat_info.no_isam_entries[i])
250 fprintf (stdout, "%c %7d %7d %7d %5.2f\n",
251 (i==SINGLETON_TYPE)?('z'):('A'+i),
254 (int) ((1023.0 + (double) blocks * size)/1024),
255 ((double) blocks * size)/count);
256 if (zh->reg->isamd->method->debug >0)
257 logf(LOG_LOG, "%c %7d %7d %7d %5.2f",
258 (i==SINGLETON_TYPE)?('z'):('A'+i),
261 (int) ((1023.0 + (double) blocks * size)/1024),
262 ((double) blocks * size)/count);
266 if ( (zh->reg->isamd) && (zh->reg->isamd->method->debug>0))
267 fprintf (stdout, "\n%d words using %d bytes\n",
268 stat_info.no_dict_entries, stat_info.no_dict_bytes);
272 for (i = 0; i<4; i++)
275 int bsize = isamb_block_info(zh->reg->isamb, i);
278 fprintf (stdout, "Category %d\n", i);
279 fprintf (stdout, "Block size %d\n", bsize);
280 fprintf (stdout, "Blocks: %d\n", stat_info.isamb_blocks[i]);
281 fprintf (stdout, "Size: %d\n", stat_info.isamb_sizes[i]);
282 fprintf (stdout, "Entries: %d\n", stat_info.no_isam_entries[i]);
283 fprintf (stdout, "Total %d\n", stat_info.isamb_blocks[i]*
285 for (j = 0; j<5; j++)
286 if (stat_info.isamb_levels[i][j])
287 fprintf (stdout, "Level%d %d\n", j,
288 stat_info.isamb_levels[i][j]);
289 fprintf (stdout, "\n");
292 fprintf (stdout, "Checksum %08lX\n", stat_info.cksum);
294 fprintf (stdout, "Distinct words %d\n", stat_info.no_dict_entries);
296 for (i = 0; i<9; i++)
297 occur += stat_info.no_isam_entries[i];
298 fprintf (stdout, "Word pos %d\n", occur);
299 fprintf (stdout, " Occurrences Words\n");
301 for (i = 0; stat_info.isam_bounds[i]; i++)
303 int here = stat_info.isam_bounds[i];
304 fprintf (stdout, "%7d-%-7d %7d\n",
305 prev, here, stat_info.isam_occurrences[i]);
308 fprintf (stdout, "%7d- %7d\n",
309 prev, stat_info.isam_occurrences[i]);
310 xmalloc_trav("unfreed"); /*! while hunting memory leaks */
317 * $Log: invstat.c,v $
318 * Revision 1.29 2002-06-19 10:29:17 adam
319 * align block sizes for isam sys. Better plot for test
321 * Revision 1.28 2002/04/30 19:31:09 adam
322 * isamb delete; more statistics
324 * Revision 1.27 2002/04/30 08:28:37 adam
325 * isamb fixes for pp_read. Statistics
327 * Revision 1.26 2002/04/29 18:03:46 adam
328 * More isamb statistics
330 * Revision 1.25 2002/04/26 08:44:47 adam
331 * Index statistics working again
333 * Revision 1.24 2002/04/05 08:46:26 adam
334 * Zebra with full functionality
336 * Revision 1.23 2002/04/04 14:14:13 adam
337 * Multiple registers (alpha early)
339 * Revision 1.22 2002/02/20 17:30:01 adam
340 * Work on new API. Locking system re-implemented
342 * Revision 1.21 2000/07/13 10:14:20 heikki
343 * Removed compiler warnings when making zebra
345 * Revision 1.20 1999/12/01 13:30:30 adam
346 * Updated configure for Zmbol/Zebra dependent settings.
348 * Revision 1.19 1999/11/30 13:48:03 adam
349 * Improved installation. Updated for inclusion of YAZ header files.
351 * Revision 1.18 1999/10/06 11:46:36 heikki
352 * mproved statistics on isam-d
354 * Revision 1.17 1999/08/20 08:28:37 heikki
357 * Revision 1.16 1999/08/18 08:38:22 heikki
358 * Memory leak hunting
360 * Revision 1.15 1999/08/18 08:34:53 heikki
363 * Revision 1.14 1999/07/14 10:59:26 adam
364 * Changed functions isc_getmethod, isams_getmethod.
365 * Improved fatal error handling (such as missing EXPLAIN schema).
367 * Revision 1.13 1999/07/08 14:23:27 heikki
368 * Fixed a bug in isamh_pp_read and cleaned up a bit
370 * Revision 1.12 1999/07/06 12:28:04 adam
371 * Updated record index structure. Format includes version ID. Compression
372 * algorithm ID is stored for each record block.
374 * Revision 1.11 1999/05/15 14:36:38 adam
375 * Updated dictionary. Implemented "compression" of dictionary.
377 * Revision 1.10 1999/05/12 13:08:06 adam
378 * First version of ISAMS.
380 * Revision 1.9 1999/02/12 13:29:23 adam
381 * Implemented position-flag for registers.
383 * Revision 1.8 1999/02/02 14:50:53 adam
384 * Updated WIN32 code specific sections. Changed header.
386 * Revision 1.7 1998/03/13 15:30:50 adam
387 * New functions isc_block_used and isc_block_size. Fixed 'leak'
388 * in isc_alloc_block.
390 * Revision 1.6 1998/03/06 13:54:02 adam
391 * Fixed two nasty bugs in isc_merge.
393 * Revision 1.5 1997/09/17 12:19:13 adam
394 * Zebra version corresponds to YAZ version 1.4.
395 * Changed Zebra server so that it doesn't depend on global common_resource.
397 * Revision 1.4 1996/11/08 11:10:21 adam
398 * Buffers used during file match got bigger.
399 * Compressed ISAM support everywhere.
400 * Bug fixes regarding masking characters in queries.
401 * Redesigned Regexp-2 queries.
403 * Revision 1.3 1996/06/04 10:18:58 adam
404 * Minor changes - removed include of ctype.h.
406 * Revision 1.2 1996/05/22 08:25:56 adam
409 * Revision 1.1 1996/05/14 14:04:34 adam
410 * In zebraidx, the 'stat' command is improved. Statistics about ISAM/DICT