1 /* $Id: physical.c,v 1.18.2.2 2006-08-14 10:39:03 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 * This module handles the representation of tables in the bfiles.
37 static int is_freestore_alloc(ISAM is, int type)
41 if (is->types[type].freelist >= 0)
43 tmp = is->types[type].freelist;
44 if (bf_read(is->types[type].bf, tmp, 0, sizeof(tmp),
45 &is->types[type].freelist) <=0)
47 logf (LOG_FATAL, "Failed to allocate block");
52 tmp = is->types[type].top++;
54 logf (LOG_DEBUG, "Allocating block #%d", tmp);
58 static void is_freestore_free(ISAM is, int type, int block)
62 logf (LOG_DEBUG, "Releasing block #%d", block);
63 tmp = is->types[type].freelist;
64 is->types[type].freelist = block;
65 if (bf_write(is->types[type].bf, block, 0, sizeof(tmp), &tmp) < 0)
67 logf (LOG_FATAL, "Failed to deallocate block.");
72 /* this code must be modified to handle an index */
73 int is_p_read_partial(is_mtable *tab, is_mblock *block)
78 assert(block->state == IS_MBSTATE_UNREAD);
79 block->data = buf = xmalloc_mbuf(IS_MBUF_TYPE_LARGE);
80 toread = tab->is->types[tab->pos_type].blocksize;
81 if (toread > is_mbuf_size[buf->type])
83 toread = is_mbuf_size[buf->type];
84 block->state = IS_MBSTATE_PARTIAL;
87 block->state = IS_MBSTATE_CLEAN;
88 if (bf_read(tab->is->types[tab->pos_type].bf, block->diskpos, 0, toread,
91 logf (LOG_FATAL, "bfread failed.");
94 /* extract header info */
96 memcpy(&block->num_records, buf->data, sizeof(block->num_records));
97 assert(block->num_records > 0);
98 buf->offset += sizeof(block->num_records);
99 memcpy(&block->nextpos, buf->data + buf->offset,
100 sizeof(block->nextpos));
101 buf->offset += sizeof(block->nextpos);
102 if (block == tab->data) /* first block */
104 memcpy(&tab->num_records, buf->data + buf->offset,
105 sizeof(tab->num_records));
106 buf->offset +=sizeof(tab->num_records);
108 logf(LOG_DEBUG, "R: Block #%d: num %d nextpos %d total %d",
109 block->diskpos, block->num_records, block->nextpos,
110 block == tab->data ? tab->num_records : -1);
111 buf->num = (toread - buf->offset) / is_keysize(tab->is);
112 if (buf->num >= block->num_records)
114 buf->num = block->num_records;
115 block->state = IS_MBSTATE_CLEAN;
118 block->bread = buf->offset + buf->num * is_keysize(tab->is);
122 int is_p_read_full(is_mtable *tab, is_mblock *block)
127 if (block->state == IS_MBSTATE_UNREAD && is_p_read_partial(tab, block) < 0)
129 logf (LOG_FATAL, "partial read failed.");
132 if (block->state == IS_MBSTATE_PARTIAL)
135 dread = block->data->num;
136 while (dread < block->num_records)
138 buf->next = xmalloc_mbuf(IS_MBUF_TYPE_LARGE);
141 toread = is_mbuf_size[buf->type] / is_keysize(tab->is);
142 if (toread > block->num_records - dread)
143 toread = block->num_records - dread;
145 if (bf_read(tab->is->types[tab->pos_type].bf, block->diskpos, block->bread, toread *
146 is_keysize(tab->is), buf->data) < 0)
148 logf (LOG_FATAL, "bfread failed.");
154 block->bread += toread * is_keysize(tab->is);
156 block->state = IS_MBSTATE_CLEAN;
158 logf (LOG_DEBUG, "R: Block #%d contains %d records.", block->diskpos, block->num_records);
163 * write dirty blocks to bfile.
164 * Allocate blocks as necessary.
166 void is_p_sync(is_mtable *tab)
171 isam_blocktype *type;
173 type = &tab->is->types[tab->pos_type];
174 for (p = tab->data; p; p = p->next)
176 if (p->state < IS_MBSTATE_DIRTY)
178 /* make sure that blocks are allocated. */
180 p->diskpos = is_freestore_alloc(tab->is, tab->pos_type);
183 if (p->next->diskpos < 0)
184 p->nextpos = p->next->diskpos = is_freestore_alloc(tab->is,
187 p->nextpos = p->next->diskpos;
192 memcpy(type->dbuf, &p->num_records, sizeof(p->num_records));
193 sum += sizeof(p->num_records);
194 memcpy(type->dbuf + sum, &p->nextpos, sizeof(p->nextpos));
195 sum += sizeof(p->nextpos);
196 if (p == tab->data) /* first block */
198 memcpy(type->dbuf + sum, &tab->num_records,
199 sizeof(tab->num_records));
200 sum += sizeof(tab->num_records);
202 logf (LOG_DEBUG, "W: Block #%d contains %d records.", p->diskpos,
204 assert(p->num_records > 0);
205 for (b = p->data; b; b = b->next)
207 logf(LOG_DEBUG, " buf: offset %d, keys %d, type %d, ref %d",
208 b->offset, b->num, b->type, b->refcount);
209 if ((v = b->num * is_keysize(tab->is)) > 0)
210 memcpy(type->dbuf + sum, b->data + b->offset, v);
213 assert(sum <= type->blocksize);
215 if (bf_write(type->bf, p->diskpos, 0, sum, type->dbuf) < 0)
217 logf (LOG_FATAL, "Failed to write block.");
224 * Free all disk blocks associated with table.
226 void is_p_unmap(is_mtable *tab)
230 for (p = tab->data; p; p = p->next)
234 is_freestore_free(tab->is, tab->pos_type, p->diskpos);
240 static is_mbuf *mbuf_takehead(is_mbuf **mb, int *num, int keysize)
242 is_mbuf *p = 0, **pp = &p, *inew;
247 while (*mb && toget >= (*mb)->num)
255 if (toget > 0 && *mb)
257 inew = xmalloc_mbuf(IS_MBUF_TYPE_SMALL);
258 inew->next = (*mb)->next;
260 inew->data = (*mb)->data;
262 inew->offset = (*mb)->offset + toget * keysize;
263 inew->num = (*mb)->num - toget;
275 * Split up individual blocks which have grown too large.
276 * is_p_align and is_p_remap are alternative functions which trade off
277 * speed in updating versus optimum usage of disk blocks.
279 void is_p_align(is_mtable *tab)
281 is_mblock *mblock, *inew, *last = 0, *next;
282 is_mbuf *mbufs, *mbp;
283 int blocks, recsblock;
285 logf (LOG_DEBUG, "Realigning table.");
286 for (mblock = tab->data; mblock; mblock = next)
289 if (mblock->state == IS_MBSTATE_DIRTY && mblock->num_records == 0)
293 last->next = mblock->next;
294 last->state = IS_MBSTATE_DIRTY;
299 next = tab->data->next;
302 if (next->state < IS_MBSTATE_CLEAN)
304 if (is_p_read_full(tab, next) < 0)
306 logf(LOG_FATAL, "Error during re-alignment");
309 if (next->nextpos && !next->next)
311 next->next = xmalloc_mblock();
312 next->next->diskpos = next->nextpos;
313 next->next->state = IS_MBSTATE_UNREAD;
314 next->next->data = 0;
317 next->state = IS_MBSTATE_DIRTY; /* force re-process */
321 if (mblock->diskpos >= 0)
322 is_freestore_free(tab->is, tab->pos_type, mblock->diskpos);
323 xrelease_mblock(mblock);
325 else if (mblock->state == IS_MBSTATE_DIRTY && mblock->num_records >
326 (mblock == tab->data ?
327 tab->is->types[tab->pos_type].max_keys_block0 :
328 tab->is->types[tab->pos_type].max_keys_block))
330 blocks = tab->num_records /
331 tab->is->types[tab->pos_type].nice_keys_block;
332 if (tab->num_records %
333 tab->is->types[tab->pos_type].nice_keys_block)
335 recsblock = tab->num_records / blocks;
338 mbufs = mblock->data;
339 while ((mbp = mbuf_takehead(&mbufs, &recsblock,
340 is_keysize(tab->is))) && recsblock)
344 inew = xmalloc_mblock();
346 inew->state = IS_MBSTATE_DIRTY;
347 inew->next = mblock->next;
351 mblock->num_records = recsblock;
353 mblock = mblock->next;
363 * Reorganize data in blocks for minimum block usage and quick access.
364 * Free surplus blocks.
365 * is_p_align and is_p_remap are alternative functions which trade off
366 * speed in updating versus optimum usage of disk blocks.
368 void is_p_remap(is_mtable *tab)
370 is_mbuf *mbufs, **bufpp, *mbp;
371 is_mblock *blockp, **blockpp;
372 int recsblock, blocks;
374 logf (LOG_DEBUG, "Remapping table.");
375 /* collect all data */
377 for (blockp = tab->data; blockp; blockp = blockp->next)
379 if (blockp->state < IS_MBSTATE_CLEAN && is_m_read_full(tab, blockp) < 0)
381 logf (LOG_FATAL, "Read-full failed in remap.");
384 *bufpp = blockp->data;
386 bufpp = &(*bufpp)->next;
389 blocks = tab->num_records / tab->is->types[tab->pos_type].nice_keys_block;
390 if (tab->num_records % tab->is->types[tab->pos_type].nice_keys_block)
394 recsblock = tab->num_records / blocks + 1;
395 if (recsblock > tab->is->types[tab->pos_type].nice_keys_block)
397 blockpp = &tab->data;
398 while ((mbp = mbuf_takehead(&mbufs, &recsblock, is_keysize(tab->is))) &&
403 *blockpp = xmalloc_mblock();
404 (*blockpp)->diskpos = -1;
406 (*blockpp)->data = mbp;
407 (*blockpp)->num_records = recsblock;
408 (*blockpp)->state = IS_MBSTATE_DIRTY;
409 blockpp = &(*blockpp)->next;
415 for (blockp = *blockpp; blockp; blockp = blockp->next)
416 if (blockp->diskpos >= 0)
417 is_freestore_free(tab->is, tab->pos_type, blockp->diskpos);
418 xfree_mblocks(*blockpp);