/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2000, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: delete.c,v $
- * Revision 1.6 1999-05-15 14:36:37 adam
+ * Revision 1.7 2000-12-05 09:59:10 adam
+ * Work on dict_delete_subtree.
+ *
+ * Revision 1.6 1999/05/15 14:36:37 adam
* Updated dictionary. Implemented "compression" of dictionary.
*
* Revision 1.5 1999/02/02 14:50:17 adam
#include <dict.h>
-static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr)
+static void dict_del_subtree (Dict dict, Dict_ptr ptr,
+ void *client,
+ int (*f)(const char *, void *))
+{
+ int more = 1;
+ void *p = 0;
+
+ if (!ptr)
+ return;
+
+ while (more)
+ {
+ short *indxp;
+ int i, hi;
+
+ dict_bf_readp (dict->dbf, ptr, &p);
+ hi = DICT_nodir(p)-1;
+ indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
+ more = 0;
+ for (i = 0; i <= hi; i++)
+ {
+ if (indxp[-i] > 0)
+ {
+ /* string (Dict_char *) DICT_EOS terminated */
+ /* unsigned char length of information */
+ /* char * information */
+ char *info = (char*)p + indxp[-i];
+ if (f)
+ (*f)(info + (dict_strlen((Dict_char*) info)+1)
+ *sizeof(Dict_char), client);
+ }
+ else
+ {
+
+ Dict_ptr subptr;
+
+ /* Dict_ptr subptr */
+ /* Dict_char sub char */
+ /* unsigned char length of information */
+ /* char * information */
+ char *info = (char*)p - indxp[-i];
+ memcpy (&subptr, info, sizeof(Dict_ptr));
+
+ if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
+ {
+ if (f)
+ (*f)(info+sizeof(Dict_ptr)+sizeof(Dict_char), client);
+ }
+ if (subptr)
+ {
+ dict_del_subtree (dict, subptr, client, f);
+ more = 1;
+ break;
+ }
+ }
+ }
+ }
+ DICT_backptr(p) = dict->head.freelist;
+ dict->head.freelist = ptr;
+ dict_bf_touch (dict->dbf, ptr);
+}
+
+static int dict_del_string (Dict dict, const Dict_char *str, Dict_ptr ptr,
+ int sub_flag, void *client,
+ int (*f)(const char *, void *))
{
int mid, lo, hi;
int cmp;
short *indxp;
char *info;
+ if (!ptr)
+ return 0;
dict_bf_readp (dict->dbf, ptr, &p);
mid = lo = 0;
hi = DICT_nodir(p)-1;
/* unsigned char length of information */
/* char * information */
info = (char*)p + indxp[-mid];
- cmp = dict_strcmp((Dict_char*) info, str);
- if (!cmp)
- {
- hi = DICT_nodir(p)-1;
- while (mid < hi)
- {
- indxp[-mid] = indxp[-mid-1];
- mid++;
- }
- DICT_type(p) = 1;
- (DICT_nodir(p))--;
- dict_bf_touch (dict->dbf, ptr);
- return 1;
- }
+
+ cmp = dict_strcmp((Dict_char*) info, str);
+ if (sub_flag)
+ {
+ /* determine if prefix match */
+ if (!dict_strncmp (str, (Dict_char*) info, dict_strlen(str)))
+ {
+ if (f)
+ (*f)(info + (dict_strlen((Dict_char*) info)+1)
+ *sizeof(Dict_char), client);
+
+ hi = DICT_nodir(p)-1;
+ while (mid < hi)
+ {
+ indxp[-mid] = indxp[-mid-1];
+ mid++;
+ }
+ DICT_type(p) = 1;
+ (DICT_nodir(p))--;
+ dict_bf_touch (dict->dbf, ptr);
+ --hi;
+ mid = lo = 0;
+ /* start again (may not be the most efficient way to go)*/
+ continue;
+ }
+ }
+ else
+ {
+ /* normal delete: delete if equal */
+ if (!cmp)
+ {
+ hi = DICT_nodir(p)-1;
+ while (mid < hi)
+ {
+ indxp[-mid] = indxp[-mid-1];
+ mid++;
+ }
+ DICT_type(p) = 1;
+ (DICT_nodir(p))--;
+ dict_bf_touch (dict->dbf, ptr);
+ return 1;
+ }
+ }
}
else
{
memcpy (&subptr, info, sizeof(Dict_ptr));
if (*++str == DICT_EOS)
{
+ if (sub_flag && subptr)
+ {
+ Dict null_ptr = 0;
+ memcpy (info, &null_ptr, sizeof(Dict_ptr));
+ }
if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
{
info[sizeof(Dict_ptr)+sizeof(Dict_char)] = 0;
DICT_type(p) = 1;
dict_bf_touch (dict->dbf, ptr);
+
+ if (f)
+ (*f)(info+sizeof(Dict_ptr)+sizeof(Dict_char),
+ client);
+ if (sub_flag && subptr)
+ dict_del_subtree (dict, subptr, client, f);
return 1;
}
+ if (sub_flag && subptr)
+ {
+ DICT_type(p) = 1;
+ dict_bf_touch (dict->dbf, ptr);
+ dict_del_subtree (dict, subptr, client, f);
+ }
return 0;
}
else
int dict_delete (Dict dict, const char *p)
{
- if (!dict->head.root)
- return 0;
- return dict_del (dict, (const Dict_char*) p, dict->head.root);
+ return dict_del_string (dict, (const Dict_char*) p, dict->head.root, 0,
+ 0, 0);
+}
+
+int dict_delete_subtree (Dict dict, const char *p, void *client,
+ int (*f)(const char *info, void *client))
+{
+ return dict_del_string (dict, (const Dict_char*) p, dict->head.root, 1,
+ client, f);
}
/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2000, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: dicttest.c,v $
- * Revision 1.24 2000-09-05 14:04:05 adam
+ * Revision 1.25 2000-12-05 09:59:10 adam
+ * Work on dict_delete_subtree.
+ *
+ * Revision 1.24 2000/09/05 14:04:05 adam
* Updates for prefix 'yaz_' for YAZ log functions.
*
* Revision 1.23 2000/07/07 12:49:20 adam
static int look_hits;
-static int grep_handle (char *name, const char *info, void *client)
+static int grep_handler (char *name, const char *info, void *client)
{
look_hits++;
printf ("%s\n", name);
return 0;
}
+static int scan_handler (char *name, const char *info, int pos, void *client)
+{
+ printf ("%s\n", name);
+ return 0;
+}
+
int main (int argc, char **argv)
{
Res my_resource = 0;
const char *name = NULL;
const char *inputfile = NULL;
const char *config = NULL;
+ const char *delete_term = NULL;
+ int scan_the_thing = 0;
int do_delete = 0;
int range = -1;
int srange = 0;
if (argc < 2)
{
fprintf (stderr, "usage:\n "
- " %s [-d] [-r n] [-p n] [-u] [-g pat] [-s n] [-v n] [-i f]"
- " [-w] [-c n] config file\n\n",
+ " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
+ "[-v n] [-i f] [-w] [-c n] config file\n\n",
prog);
fprintf (stderr, " -d delete instead of insert\n");
+ fprintf (stderr, " -D t delete subtree instead of insert\n");
fprintf (stderr, " -r n set regular match range\n");
fprintf (stderr, " -p n set regular match start range\n");
fprintf (stderr, " -u report if keys change during insert\n");
fprintf (stderr, " -i f read file with words\n");
fprintf (stderr, " -w insert/delete instead of lookup\n");
fprintf (stderr, " -c n cache size (number of pages)\n");
+ fprintf (stderr, " -S scan the dictionary\n");
exit (1);
}
- while ((ret = options ("dr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
+ while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
exit (1);
}
}
+ else if (ret == 'D')
+ {
+ delete_term = arg;
+ }
else if (ret == 'd')
do_delete = 1;
else if (ret == 'g')
rw = 1;
else if (ret == 'i')
inputfile = arg;
+ else if (ret == 'S')
+ scan_the_thing = 1;
else if (ret == 's')
{
infosize = atoi(arg);
ipf_ptr[i++] = '\0';
if (rw)
{
- if (do_delete)
+ if (do_delete)
switch (dict_delete (dict, ipf_ptr))
{
case 0:
{
look_hits = 0;
dict_lookup_grep (dict, ipf_ptr, range, NULL,
- &max_pos, srange, grep_handle);
+ &max_pos, srange, grep_handler);
if (look_hits)
no_of_hits++;
else
}
fclose (ipf);
}
+ if (rw && delete_term)
+ {
+ logf (LOG_LOG, "dict_delete_subtree %s", delete_term);
+ dict_delete_subtree (dict, delete_term, 0, 0);
+ }
if (grep_pattern)
{
if (range < 0)
range = 0;
logf (LOG_LOG, "Grepping '%s'", grep_pattern);
dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
- srange, grep_handle);
+ srange, grep_handler);
}
if (rw)
{
logf (LOG_LOG, "No of hits.... %d", no_of_hits);
logf (LOG_LOG, "No of misses.. %d", no_of_misses);
}
+ if (scan_the_thing)
+ {
+ char term_dict[1024];
+
+ int before = 1000000;
+ int after = 1000000;
+ logf (LOG_LOG, "dict_scan");
+ term_dict[0] = 1;
+ term_dict[1] = 0;
+ dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
+ }
dict_close (dict);
bfs_destroy (bfs);
res_close (my_resource);
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: open.c,v $
- * Revision 1.16 1999-05-26 07:49:13 adam
+ * Revision 1.17 2000-12-05 09:59:10 adam
+ * Work on dict_delete_subtree.
+ *
+ * Revision 1.16 1999/05/26 07:49:13 adam
* C++ compilation.
*
* Revision 1.15 1999/05/15 14:36:37 adam
return strcmp ((const char *) s1, (const char *) s2);
}
+int dict_strncmp (const Dict_char *s1, const Dict_char *s2, size_t n)
+{
+ return strncmp ((const char *) s1, (const char *) s2, n);
+}
+
int dict_strlen (const Dict_char *s)
{
return strlen((const char *) s);
/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2000, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: dict.h,v $
- * Revision 1.30 1999-11-30 13:48:03 adam
+ * Revision 1.31 2000-12-05 09:59:10 adam
+ * Work on dict_delete_subtree.
+ *
+ * Revision 1.30 1999/11/30 13:48:03 adam
* Improved installation. Updated for inclusion of YAZ header files.
*
* Revision 1.29 1999/05/15 14:36:37 adam
int dict_close (Dict dict);
int dict_insert (Dict dict, const char *p, int userlen, void *userinfo);
int dict_delete (Dict dict, const char *p);
+int dict_delete_subtree (Dict dict, const char *p, void *client,
+ int (*f)(const char *info, void *client));
char *dict_lookup (Dict dict, const char *p);
int dict_lookup_ec (Dict dict, char *p, int range,
int (*f)(char *name));
int (*f)(char *name, const char *info,
void *client));
int dict_strcmp (const Dict_char *s1, const Dict_char *s2);
+int dict_strncmp (const Dict_char *s1, const Dict_char *s2, size_t n);
int dict_strlen (const Dict_char *s);
int dict_scan (Dict dict, char *str,
int *before, int *after, void *client,