# ParaZ. Copyright (C) 2000-2004, Index Data ApS
# All rights reserved.
-# $Id: Makefile,v 1.2 2006-11-21 18:46:43 quinn Exp $
+# $Id: Makefile,v 1.3 2006-11-24 20:29:07 quinn Exp $
SHELL=/bin/sh
YAZCFLAGS=`$(YAZCONF) --cflags`
PROG=pazpar2
-PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o
+PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o termlists.o \
+ reclists.o relevance.o
all: $(PROG)
/*
- * $Id: http.c,v 1.1 2006-11-21 18:46:43 quinn Exp $
+ * $Id: http.c,v 1.2 2006-11-24 20:29:07 quinn Exp $
*/
#include <stdio.h>
#include <strings.h>
#include <ctype.h>
#include <fcntl.h>
+#include <netdb.h>
+#include <errno.h>
#include <yaz/yaz-util.h>
#include <yaz/comstack.h>
#include "http.h"
#include "http_command.h"
+static void proxy_io(IOCHAN i, int event);
+
extern IOCHAN channel_list;
+static struct sockaddr_in *proxy_addr = 0; // If this is set, we proxy normal HTTP requests
+static char proxy_url[256] = "";
+static struct http_buf *http_buf_freelist = 0;
+
+static struct http_buf *http_buf_create()
+{
+ struct http_buf *r;
+
+ if (http_buf_freelist)
+ {
+ r = http_buf_freelist;
+ http_buf_freelist = http_buf_freelist->next;
+ }
+ else
+ r = xmalloc(sizeof(struct http_buf));
+ r->offset = 0;
+ r->len = 0;
+ r->next = 0;
+ return r;
+}
+
+static void http_buf_destroy(struct http_buf *b)
+{
+ b->next = http_buf_freelist;
+ http_buf_freelist = b;
+}
+
+static void http_buf_destroy_queue(struct http_buf *b)
+{
+ struct http_buf *p;
+ while (b)
+ {
+ p = b->next;
+ http_buf_destroy(b);
+ b = p;
+ }
+}
+
+#ifdef GAGA
+// Calculate length of chain
+static int http_buf_len(struct http_buf *b)
+{
+ int sum = 0;
+ for (; b; b = b->next)
+ sum += b->len;
+ return sum;
+}
+#endif
+
+static struct http_buf *http_buf_bybuf(char *b, int len)
+{
+ struct http_buf *res = 0;
+ struct http_buf **p = &res;
+
+ while (len)
+ {
+ *p = http_buf_create();
+ int tocopy = len;
+ if (tocopy > HTTP_BUF_SIZE)
+ tocopy = HTTP_BUF_SIZE;
+ memcpy((*p)->buf, b, tocopy);
+ (*p)->len = tocopy;
+ len -= tocopy;
+ b += tocopy;
+ p = &(*p)->next;
+ }
+ return res;
+}
+
+// Add a (chain of) buffers to the end of an existing queue.
+static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b)
+{
+ while (*queue)
+ queue = &(*queue)->next;
+ *queue = b;
+}
+
+static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf)
+{
+ return http_buf_bybuf(wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
+}
+
+// Non-destructively collapse chain of buffers into a string (max *len)
+// Return
+static int http_buf_peek(struct http_buf *b, char *buf, int len)
+{
+ int rd = 0;
+ while (b && rd < len)
+ {
+ int toread = len - rd;
+ if (toread > b->len)
+ toread = b->len;
+ memcpy(buf + rd, b->buf + b->offset, toread);
+ rd += toread;
+ b = b->next;
+ }
+ buf[rd] = '\0';
+ return rd;
+}
+
+// Ddestructively munch up to len from head of queue.
+static int http_buf_read(struct http_buf **b, char *buf, int len)
+{
+ int rd = 0;
+ while ((*b) && rd < len)
+ {
+ int toread = len - rd;
+ if (toread > (*b)->len)
+ toread = (*b)->len;
+ memcpy(buf + rd, (*b)->buf + (*b)->offset, toread);
+ rd += toread;
+ if (toread < (*b)->len)
+ {
+ (*b)->len -= toread;
+ (*b)->offset += toread;
+ break;
+ }
+ else
+ {
+ struct http_buf *n = (*b)->next;
+ http_buf_destroy(*b);
+ *b = n;
+ }
+ }
+ buf[rd] = '\0';
+ return rd;
+}
+
void http_addheader(struct http_response *r, const char *name, const char *value)
{
struct http_channel *c = r->channel;
r->headers = h;
}
-char *argbyname(struct http_request *r, char *name)
+char *http_argbyname(struct http_request *r, char *name)
{
struct http_argument *p;
+ if (!name)
+ return 0;
for (p = r->arguments; p; p = p->next)
if (!strcmp(p->name, name))
return p->value;
return 0;
}
-char *headerbyname(struct http_request *r, char *name)
+char *http_headerbyname(struct http_request *r, char *name)
{
struct http_header *p;
for (p = r->headers; p; p = p->next)
// Check if we have a complete request. Return 0 or length (including trailing newline)
// FIXME: Does not deal gracefully with requests carrying payload
// but this is kind of OK since we will reject anything other than an empty GET
-static int request_check(const char *buf)
+static int request_check(struct http_buf *queue)
{
+ char tmp[4096];
int len = 0;
+ char *buf = tmp;
+ http_buf_peek(queue, tmp, 4096);
while (*buf) // Check if we have a sequence of lines terminated by an empty line
{
char *b = strstr(buf, "\r\n");
return 0;
}
-struct http_request *http_parse_request(struct http_channel *c, char *buf)
+struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue,
+ int len)
{
struct http_request *r = nmem_malloc(c->nmem, sizeof(*r));
char *p, *p2;
+ char tmp[4096];
+ char *buf = tmp;
+
+ if (len > 4096)
+ return 0;
+ if (http_buf_read(queue, buf, len) < len)
+ return 0;
r->channel = c;
+ r->arguments = 0;
+ r->headers = 0;
// Parse first line
- if (!strncmp(buf, "GET ", 4))
- r->method = Method_GET;
- else
+ for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++)
+ *(p2++) = *p;
+ if (*p != ' ')
{
yaz_log(YLOG_WARN, "Unexpected HTTP method in request");
return 0;
}
+ *p2 = '\0';
+
if (!(buf = strchr(buf, ' ')))
{
yaz_log(YLOG_WARN, "Syntax error in request (1)");
if (!(p = strstr(buf, "\r\n")))
return 0;
*(p++) = '\0';
+ p++;
strcpy(r->http_version, buf);
buf = p;
}
strcpy(c->version, r->http_version);
- r->headers = 0; // We might want to parse these someday
+ r->headers = 0;
+ while (*buf)
+ {
+ if (!(p = strstr(buf, "\r\n")))
+ return 0;
+ if (p == buf)
+ break;
+ else
+ {
+ struct http_header *h = nmem_malloc(c->nmem, sizeof(*h));
+ if (!(p2 = strchr(buf, ':')))
+ return 0;
+ *(p2++) = '\0';
+ h->name = nmem_strdup(c->nmem, buf);
+ while (isspace(*p2))
+ p2++;
+ if (p2 >= p) // Empty header?
+ {
+ buf = p + 2;
+ continue;
+ }
+ *p = '\0';
+ h->value = nmem_strdup(c->nmem, p2);
+ h->next = r->headers;
+ r->headers = h;
+ buf = p + 2;
+ }
+ }
return r;
}
-static char *http_serialize_response(struct http_channel *c, struct http_response *r)
+static struct http_buf *http_serialize_response(struct http_channel *c,
+ struct http_response *r)
{
wrbuf_rewind(c->wrbuf);
struct http_header *h;
if (r->payload)
wrbuf_puts(c->wrbuf, r->payload);
- wrbuf_putc(c->wrbuf, '\0');
- return wrbuf_buf(c->wrbuf);
+ return http_buf_bywrbuf(c->wrbuf);
+}
+
+// Serialize a HTTP request
+static struct http_buf *http_serialize_request(struct http_request *r)
+{
+ struct http_channel *c = r->channel;
+ wrbuf_rewind(c->wrbuf);
+ struct http_header *h;
+ struct http_argument *a;
+
+ wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path);
+
+ if (r->arguments)
+ {
+ wrbuf_putc(c->wrbuf, '?');
+ for (a = r->arguments; a; a = a->next) {
+ if (a != r->arguments)
+ wrbuf_putc(c->wrbuf, '&');
+ wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value);
+ }
+ }
+
+ wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version);
+
+ for (h = r->headers; h; h = h->next)
+ wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value);
+
+ wrbuf_puts(c->wrbuf, "\r\n");
+
+ return http_buf_bywrbuf(c->wrbuf);
}
+
// Cleanup
static void http_destroy(IOCHAN i)
{
struct http_channel *s = iochan_getdata(i);
yaz_log(YLOG_DEBUG, "Destroying http channel");
+ if (s->proxy)
+ {
+ yaz_log(YLOG_DEBUG, "Destroying Proxy channel");
+ if (s->proxy->iochan)
+ {
+ close(iochan_getfd(s->proxy->iochan));
+ iochan_destroy(s->proxy->iochan);
+ }
+ http_buf_destroy_queue(s->proxy->oqueue);
+ xfree(s->proxy);
+ }
+ http_buf_destroy_queue(s->iqueue);
+ http_buf_destroy_queue(s->oqueue);
nmem_destroy(s->nmem);
wrbuf_free(s->wrbuf, 1);
xfree(s);
iochan_destroy(i);
}
+static int http_weshouldproxy(struct http_request *rq)
+{
+ if (proxy_addr && !strstr(rq->path, "search.pz2"))
+ return 1;
+ return 0;
+}
+
+static int http_proxy(struct http_request *rq)
+{
+ struct http_channel *c = rq->channel;
+ struct http_proxy *p = c->proxy;
+ struct http_header *hp;
+ struct http_buf *requestbuf;
+
+ yaz_log(YLOG_DEBUG, "Proxy request");
+
+ if (!p) // This is a new connection. Create a proxy channel
+ {
+ int sock;
+ struct protoent *pe;
+ int one = 1;
+ int flags;
+
+ yaz_log(YLOG_DEBUG, "Creating a new proxy channel");
+ if (!(pe = getprotobyname("tcp"))) {
+ abort();
+ }
+ if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0)
+ {
+ yaz_log(YLOG_WARN|YLOG_ERRNO, "socket");
+ return -1;
+ }
+ if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*)
+ &one, sizeof(one)) < 0)
+ abort();
+ if ((flags = fcntl(sock, F_GETFL, 0)) < 0)
+ yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl");
+ if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0)
+ yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2");
+ if (connect(sock, (struct sockaddr *) proxy_addr, sizeof(*proxy_addr)) < 0)
+ if (errno != EINPROGRESS)
+ {
+ yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect");
+ return -1;
+ }
+
+ p = xmalloc(sizeof(struct http_proxy));
+ p->oqueue = 0;
+ p->channel = c;
+ c->proxy = p;
+ // We will add EVENT_OUTPUT below
+ p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT);
+ iochan_setdata(p->iochan, p);
+ p->iochan->next = channel_list;
+ channel_list = p->iochan;
+ }
+
+ // Modify Host: header
+ for (hp = rq->headers; hp; hp = hp->next)
+ if (!strcmp(hp->name, "Host"))
+ break;
+ if (!hp)
+ {
+ yaz_log(YLOG_WARN, "Failed to find Host header in proxy");
+ return -1;
+ }
+ hp->value = nmem_strdup(c->nmem, proxy_url);
+ requestbuf = http_serialize_request(rq);
+ http_buf_enqueue(&p->oqueue, requestbuf);
+ iochan_setflag(p->iochan, EVENT_OUTPUT);
+ return 0;
+}
+
static void http_io(IOCHAN i, int event)
{
struct http_channel *hc = iochan_getdata(i);
switch (event)
{
- int res;
+ int res, reqlen;
+ struct http_buf *htbuf;
case EVENT_INPUT:
yaz_log(YLOG_DEBUG, "HTTP Input event");
- res = read(iochan_getfd(i), hc->ibuf + hc->read, IBUF_SIZE - (hc->read + 1));
- if (res <= 0)
+ htbuf = http_buf_create();
+ res = read(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1);
+ if (res <= 0 && errno != EAGAIN)
{
yaz_log(YLOG_WARN|YLOG_ERRNO, "HTTP read");
+ http_buf_destroy(htbuf);
http_destroy(i);
return;
}
- yaz_log(YLOG_DEBUG, "HTTP read %d octets", res);
- hc->read += res;
- hc->ibuf[hc->read] = '\0';
+ if (res > 0)
+ {
+ htbuf->buf[res] = '\0';
+ htbuf->len = res;
+ http_buf_enqueue(&hc->iqueue, htbuf);
+ }
- if ((res = request_check(hc->ibuf)) <= 2)
+ if ((reqlen = request_check(hc->iqueue)) <= 2)
{
yaz_log(YLOG_DEBUG, "We don't have a complete HTTP request yet");
return;
}
- yaz_log(YLOG_DEBUG, "We think we have a complete HTTP request (len %d): \n%s", res, hc->ibuf);
+ yaz_log(YLOG_DEBUG, "We think we have a complete HTTP request (len %d)", reqlen);
+
nmem_reset(hc->nmem);
- if (!(request = http_parse_request(hc, hc->ibuf)))
+ if (!(request = http_parse_request(hc, &hc->iqueue, reqlen)))
{
yaz_log(YLOG_WARN, "Failed to parse request");
http_destroy(i);
return;
}
- response = http_command(request);
- if (!response)
+ yaz_log(YLOG_LOG, "Request: %s %s v %s", request->method, request->path,
+ request->http_version);
+ if (http_weshouldproxy(request))
+ http_proxy(request);
+ else
{
- http_destroy(i);
- return;
+ struct http_buf *hb;
+ // Execute our business logic!
+ response = http_command(request);
+ if (!response)
+ {
+ http_destroy(i);
+ return;
+ }
+ if (!(hb = http_serialize_response(hc, response)))
+ {
+ http_destroy(i);
+ return;
+ }
+ http_buf_enqueue(&hc->oqueue, hb);
+ yaz_log(YLOG_DEBUG, "Response ready");
+ iochan_setflags(i, EVENT_OUTPUT); // Turns off input selecting
}
- // FIXME -- do something to cause the response to be sent to the client
- if (!(hc->obuf = http_serialize_response(hc, response)))
+ if (hc->iqueue)
{
- http_destroy(i);
- return;
+ yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event");
+ iochan_setevent(i, EVENT_INPUT);
}
- yaz_log(YLOG_DEBUG, "Response ready:\n%s", hc->obuf);
- hc->writ = 0;
- hc->read = 0;
- iochan_setflags(i, EVENT_OUTPUT); // Turns off input selecting
+
break;
case EVENT_OUTPUT:
yaz_log(YLOG_DEBUG, "HTTP output event");
- res = write(iochan_getfd(hc->iochan), hc->obuf + hc->writ,
- strlen(hc->obuf + hc->writ));
- if (res <= 0)
+ if (hc->oqueue)
{
- yaz_log(YLOG_WARN|YLOG_ERRNO, "write");
- http_destroy(i);
- return;
- }
- hc->writ += res;
- if (!hc->obuf[hc->writ]) {
- yaz_log(YLOG_DEBUG, "Writing finished");
- if (!strcmp(hc->version, "1.0"))
+ struct http_buf *wb = hc->oqueue;
+ res = write(iochan_getfd(hc->iochan), wb->buf + wb->offset, wb->len);
+ if (res <= 0)
{
- yaz_log(YLOG_DEBUG, "Closing 1.0 connection");
+ yaz_log(YLOG_WARN|YLOG_ERRNO, "write");
http_destroy(i);
+ return;
+ }
+ yaz_log(YLOG_DEBUG, "HTTP Wrote %d octets", res);
+ if (res == wb->len)
+ {
+ hc->oqueue = hc->oqueue->next;
+ http_buf_destroy(wb);
}
else
- iochan_setflags(i, EVENT_INPUT); // Turns off output flag
+ {
+ wb->len -= res;
+ wb->offset += res;
+ }
+ if (!hc->oqueue) {
+ yaz_log(YLOG_DEBUG, "Writing finished");
+ if (!strcmp(hc->version, "1.0"))
+ {
+ yaz_log(YLOG_DEBUG, "Closing 1.0 connection");
+ http_destroy(i);
+ return;
+ }
+ else
+ iochan_setflags(i, EVENT_INPUT); // Turns off output flag
+ }
}
+
+ if (!hc->oqueue && hc->proxy && !hc->proxy->iochan)
+ http_destroy(i); // Server closed; we're done
break;
default:
yaz_log(YLOG_WARN, "Unexpected event on connection");
}
}
+// Handles I/O on a client connection to a backend web server (proxy mode)
+static void proxy_io(IOCHAN pi, int event)
+{
+ struct http_proxy *pc = iochan_getdata(pi);
+ struct http_channel *hc = pc->channel;
+
+ switch (event)
+ {
+ int res;
+ struct http_buf *htbuf;
+
+ case EVENT_INPUT:
+ yaz_log(YLOG_DEBUG, "Proxy input event");
+ htbuf = http_buf_create();
+ res = read(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1);
+ yaz_log(YLOG_DEBUG, "Proxy read %d bytes.", res);
+ if (res == 0 || (res < 0 && errno != EINPROGRESS))
+ {
+ if (hc->oqueue)
+ {
+ yaz_log(YLOG_WARN, "Proxy read came up short");
+ // Close channel and alert client HTTP channel that we're gone
+ http_buf_destroy(htbuf);
+ close(iochan_getfd(pi));
+ iochan_destroy(pi);
+ pc->iochan = 0;
+ }
+ else
+ {
+ http_destroy(hc->iochan);
+ return;
+ }
+ }
+ else
+ {
+ htbuf->buf[res] = '\0';
+ htbuf->len = res;
+ http_buf_enqueue(&hc->oqueue, htbuf);
+ }
+ iochan_setflag(hc->iochan, EVENT_OUTPUT);
+ break;
+ case EVENT_OUTPUT:
+ yaz_log(YLOG_DEBUG, "Proxy output event");
+ if (!(htbuf = pc->oqueue))
+ {
+ iochan_clearflag(pi, EVENT_OUTPUT);
+ return;
+ }
+ res = write(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len);
+ if (res <= 0)
+ {
+ yaz_log(YLOG_WARN|YLOG_ERRNO, "write");
+ http_destroy(hc->iochan);
+ return;
+ }
+ if (res == htbuf->len)
+ {
+ struct http_buf *np = htbuf->next;
+ http_buf_destroy(htbuf);
+ pc->oqueue = np;
+ }
+ else
+ {
+ htbuf->len -= res;
+ htbuf->offset += res;
+ }
+
+ if (!pc->oqueue) {
+ iochan_setflags(pi, EVENT_INPUT); // Turns off output flag
+ }
+ break;
+ default:
+ yaz_log(YLOG_WARN, "Unexpected event on connection");
+ http_destroy(hc->iochan);
+ }
+}
+
/* Accept a new command connection */
static void http_accept(IOCHAN i, int event)
{
c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT);
ch = xmalloc(sizeof(*ch));
- ch->read = 0;
+ ch->proxy = 0;
ch->nmem = nmem_create();
ch->wrbuf = wrbuf_alloc();
ch->iochan = c;
+ ch->iqueue = ch->oqueue = 0;
iochan_setdata(c, ch);
c->next = channel_list;
channel_list = c;
}
-
/* Create a http-channel listener */
void http_init(int port)
{
channel_list = c;
}
+void http_set_proxyaddr(char *host)
+{
+ char *p;
+ int port;
+ struct hostent *he;
+
+ strcpy(proxy_url, host);
+ p = strchr(host, ':');
+ yaz_log(YLOG_DEBUG, "Proxying for %s", host);
+ if (p) {
+ port = atoi(p + 1);
+ *p = '\0';
+ }
+ else
+ port = 80;
+ if (!(he = gethostbyname(host)))
+ {
+ fprintf(stderr, "Failed to lookup '%s'\n", host);
+ exit(1);
+ }
+ proxy_addr = xmalloc(sizeof(struct sockaddr_in));
+ proxy_addr->sin_family = he->h_addrtype;
+ memcpy(&proxy_addr->sin_addr.s_addr, he->h_addr_list[0], he->h_length);
+ proxy_addr->sin_port = htons(port);
+}
/*
* Local variables:
#ifndef HTTP_H
#define HTTP_H
+// Generic I/O buffer
+struct http_buf
+{
+#define HTTP_BUF_SIZE 4096
+ char buf[4096];
+ int offset;
+ int len;
+ struct http_buf *next;
+};
+
struct http_channel
{
IOCHAN iochan;
-#define IBUF_SIZE 10240
- char ibuf[IBUF_SIZE];
+ struct http_buf *iqueue;
+ struct http_buf *oqueue;
char version[10];
- int read;
- char *obuf;
- int writ;
+ struct http_proxy *proxy;
NMEM nmem;
WRBUF wrbuf;
};
+struct http_proxy // attached to iochan for proxy connection
+{
+ IOCHAN iochan;
+ struct http_channel *channel;
+ struct http_buf *oqueue;
+};
+
struct http_header
{
char *name;
{
struct http_channel *channel;
char http_version[20];
- enum
- {
- Method_GET,
- Method_other
- } method;
+ char method[20];
char *path;
struct http_header *headers;
struct http_argument *arguments;
char *payload;
};
+void http_set_proxyaddr(char *url);
void http_init(int port);
void http_addheader(struct http_response *r, const char *name, const char *value);
-char *argbyname(struct http_request *r, char *name);
-char *headerbyname(struct http_request *r, char *name);
+char *http_argbyname(struct http_request *r, char *name);
+char *http_headerbyname(struct http_request *r, char *name);
struct http_response *http_create_response(struct http_channel *c);
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
#endif
/*
- * $Id: http_command.c,v 1.1 2006-11-21 18:46:43 quinn Exp $
+ * $Id: http_command.c,v 1.2 2006-11-24 20:29:07 quinn Exp $
*/
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <ctype.h>
+#include <sys/time.h>
#include <yaz/yaz-util.h>
struct http_session {
struct session *psession;
- char session_id[128];
+ int session_id;
int timestamp;
struct http_session *next;
};
struct http_session *http_session_create()
{
struct http_session *r = xmalloc(sizeof(*r));
- r->psession = 0;
- *r->session_id = '\0';
+ r->psession = new_session();
+ r->session_id = 0;
r->timestamp = 0;
r->next = session_list;
session_list = r;
rs->payload = nmem_strdup(c->nmem, tmp);
}
+int make_sessionid()
+{
+ struct timeval t;
+ int res;
+ static int seq = 0;
+
+ seq++;
+ if (gettimeofday(&t, 0) < 0)
+ abort();
+ res = t.tv_sec;
+ res = (res << 8) | (seq & 0xff);
+ return res;
+}
+
+static struct http_session *locate_session(struct http_request *rq, struct http_response *rs)
+{
+ struct http_session *p;
+ char *session = http_argbyname(rq, "session");
+ int id;
+
+ if (!session)
+ {
+ error(rs, "417", "Must supply session", 0);
+ return 0;
+ }
+ id = atoi(session);
+ for (p = session_list; p; p = p->next)
+ if (id == p->session_id)
+ return p;
+ error(rs, "417", "Session does not exist, or it has expired", 0);
+ return 0;
+}
+
+
static void cmd_init(struct http_request *rq, struct http_response *rs)
{
+ int sesid;
+ char buf[1024];
+ struct http_session *s = http_session_create();
+
+ // FIXME create a pazpar2 session
+ yaz_log(YLOG_DEBUG, "HTTP Session init");
+ sesid = make_sessionid();
+ s->session_id = sesid;
+ sprintf(buf, "<init><status>OK</status><session>%d</session></init>", sesid);
+ rs->payload = nmem_strdup(rq->channel->nmem, buf);
}
+static void cmd_termlist(struct http_request *rq, struct http_response *rs)
+{
+ struct http_session *s = locate_session(rq, rs);
+ struct http_channel *c = rq->channel;
+ struct termlist_score **p;
+ int len;
+ int i;
+
+ if (!s)
+ return;
+ wrbuf_rewind(c->wrbuf);
+
+ wrbuf_puts(c->wrbuf, "<termlist>");
+ p = termlist(s->psession, &len);
+ if (p)
+ for (i = 0; i < len; i++)
+ {
+ wrbuf_puts(c->wrbuf, "\n<term>");
+ wrbuf_printf(c->wrbuf, "<name>%s</name>", p[i]->term);
+ wrbuf_printf(c->wrbuf, "<frequency>%d</frequency>", p[i]->frequency);
+ wrbuf_puts(c->wrbuf, "</term>");
+ }
+ wrbuf_puts(c->wrbuf, "</termlist>");
+ rs->payload = nmem_strdup(rq->channel->nmem, wrbuf_buf(c->wrbuf));
+}
+
+
+static void cmd_bytarget(struct http_request *rq, struct http_response *rs)
+{
+ struct http_session *s = locate_session(rq, rs);
+ struct http_channel *c = rq->channel;
+ struct hitsbytarget *ht;
+ int count, i;
+
+ if (!s)
+ return;
+ if (!(ht = hitsbytarget(s->psession, &count)))
+ {
+ error(rs, "500", "Failed to retrieve hitcounts", 0);
+ return;
+ }
+ wrbuf_rewind(c->wrbuf);
+ wrbuf_puts(c->wrbuf, "<bytarget><status>OK</status>");
+
+ for (i = 0; i < count; i++)
+ {
+ wrbuf_puts(c->wrbuf, "\n<target>");
+ wrbuf_printf(c->wrbuf, "<id>%s</id>\n", ht[i].id);
+ wrbuf_printf(c->wrbuf, "<hits>%d</hits>\n", ht[i].hits);
+ wrbuf_printf(c->wrbuf, "<diagnostic>%d</diagnostic>\n", ht[i].diagnostic);
+ wrbuf_printf(c->wrbuf, "<records>%d</records>\n", ht[i].records);
+ wrbuf_printf(c->wrbuf, "<state>%s</state>\n", ht[i].state);
+ wrbuf_puts(c->wrbuf, "</target>");
+ }
+
+ wrbuf_puts(c->wrbuf, "</bytarget>");
+ rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf));
+}
+
+static void cmd_show(struct http_request *rq, struct http_response *rs)
+{
+ struct http_session *s = locate_session(rq, rs);
+ struct http_channel *c = rq->channel;
+ struct record **rl;
+ char *start = http_argbyname(rq, "start");
+ char *num = http_argbyname(rq, "num");
+ int startn = 0;
+ int numn = 20;
+ int i;
+
+ if (!s)
+ return;
+
+ if (start)
+ startn = atoi(start);
+ if (num)
+ numn = atoi(num);
+
+ rl = show(s->psession, startn, &numn);
+
+ wrbuf_rewind(c->wrbuf);
+ wrbuf_puts(c->wrbuf, "<show>\n<status>OK</status>\n");
+
+ for (i = 0; i < numn; i++)
+ {
+ int ccount;
+ struct record *p;
+
+ wrbuf_puts(c->wrbuf, "<hit>\n");
+ wrbuf_printf(c->wrbuf, "<merge_key>%s</merge_key>\n", rl[i]->merge_key);
+ for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++)
+ ;
+ if (ccount > 1)
+ wrbuf_printf(c->wrbuf, "<count>%d</count>\n", ccount);
+ wrbuf_puts(c->wrbuf, "</hit>\n");
+ }
+
+ wrbuf_puts(c->wrbuf, "</show>\n");
+ rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf));
+}
+
+static void cmd_search(struct http_request *rq, struct http_response *rs)
+{
+ struct http_session *s = locate_session(rq, rs);
+ char *query = http_argbyname(rq, "query");
+
+ if (!s)
+ return;
+ if (!query)
+ {
+ error(rs, "417", "Must supply query", 0);
+ return;
+ }
+ search(s->psession, query);
+ rs->payload = "<search><status>OK</status></search>";
+}
+
+
static void cmd_stat(struct http_request *rq, struct http_response *rs)
{
}
static void cmd_load(struct http_request *rq, struct http_response *rs)
{
+ struct http_session *s = locate_session(rq, rs);
+ char *fn = http_argbyname(rq, "name");
+
+ if (!s)
+ return;
+ if (!fn)
+ {
+ error(rs, "417", "Must suppply name", 0);
+ return;
+ }
+ if (load_targets(s->psession, fn) < 0)
+ error(rs, "417", "Failed to find targets", "Possibly wrong filename");
+ else
+ rs->payload = "<load><status>OK</status></load>";
}
struct {
{ "init", cmd_init },
{ "stat", cmd_stat },
{ "load", cmd_load },
+ { "bytarget", cmd_bytarget },
+ { "show", cmd_show },
+ { "search", cmd_search },
+ { "termlist", cmd_termlist },
{0,0}
};
struct http_response *http_command(struct http_request *rq)
{
- char *command = argbyname(rq, "command");
+ char *command = http_argbyname(rq, "command");
struct http_channel *c = rq->channel;
struct http_response *rs = http_create_response(c);
int i;
-/* $Id: pazpar2.c,v 1.3 2006-11-21 18:46:43 quinn Exp $ */
+/* $Id: pazpar2.c,v 1.4 2006-11-24 20:29:07 quinn Exp $ */
#include <stdlib.h>
#include <stdio.h>
#include "eventl.h"
#include "command.h"
#include "http.h"
+#include "termlists.h"
+#include "reclists.h"
+#include "relevance.h"
#define PAZPAR2_VERSION "0.1"
#define MAX_DATABASES 512
struct timeval base_time;
int toget;
int chunk;
+ void *ccl_filter;
} global_parameters =
{
30,
PAZPAR2_VERSION,
{0,0},
100,
- MAX_CHUNK
+ MAX_CHUNK,
+ 0
};
p++;
if (*p == '\t' && *(++p) == subfield) {
if (*(++p) == ' ')
- return ++p;
+ {
+ while (isspace(*p))
+ p++;
+ return p;
+ }
}
}
return 0;
return out;
}
+#ifdef RECHEAP
static void push_record(struct session *s, struct record *r)
{
int p;
static struct record *pop_record(struct session *s)
{
- struct record *res = s->recheap[0];
+ struct record *res;
int p = 0;
int lastnonleaf = (s->recheap_max - 1) >> 1;
if (s->recheap_max < 0)
return 0;
+ res = s->recheap[0];
+
s->recheap[p] = s->recheap[s->recheap_max--];
while (p <= lastnonleaf)
}
}
+#endif
+
+// FIXME needs to be generalized. Should flexibly generate X lists per search
+static void extract_subject(struct session *s, const char *rec)
+{
+ const char *field, *subfield;
+
+ while ((field = find_field(rec, "650")))
+ {
+ rec = field + 1; // Crude way to cause a loop through repeating fields
+ if ((subfield = find_subfield(field, 'a')))
+ {
+ char *e, *ef;
+ char buf[1024];
+ int len;
+
+ ef = index(subfield, '\n');
+ if ((e = index(subfield, '\t')) && e < ef)
+ ef = e;
+ while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')')
+ ef--;
+ len = ef - subfield;
+ assert(len < 1023);
+ memcpy(buf, subfield, len);
+ buf[len] = '\0';
+ termlist_insert(s->termlist, buf);
+ }
+ }
+}
+
struct record *ingest_record(struct target *t, char *buf, int len)
{
struct session *s = t->session;
res = nmem_malloc(s->nmem, sizeof(struct record));
+ extract_subject(s, recbuf);
+
res->merge_key = extract_mergekey(s, recbuf);
if (!res->merge_key)
return 0;
yaz_log(YLOG_DEBUG, "Key: %s", res->merge_key);
- push_record(s, res);
+ reclist_insert(s->reclist, res);
return res;
}
}
if (live_channels)
{
+ const char *t[] = { "aa", "ab", 0 };
int maxrecs = live_channels * global_parameters.toget;
- if (!s->recheap_size)
- {
- s->recheap = xmalloc(maxrecs * sizeof(struct record *));
- s->recheap_size = maxrecs;
- }
- else if (s->recheap_size < maxrecs)
- {
- s->recheap = xrealloc(s->recheap, maxrecs * sizeof(struct record*));
- s->recheap_size = maxrecs;
- }
+ s->termlist = termlist_create(s->nmem, maxrecs, 15);
+ s->reclist = reclist_create(s->nmem, maxrecs);
+ relevance_create(s->nmem, t, 1000);
}
- s->recheap_max = -1;
- s->recheap_scratch = -1;
}
struct session *new_session()
yaz_log(YLOG_DEBUG, "New pazpar2 session");
+ session->termlist = 0;
+ session->reclist = 0;
session->requestid = -1;
session->targets = 0;
session->pqf_parser = yaz_pqf_create();
session->yaz_marc = yaz_marc_create();
yaz_marc_subfield_str(session->yaz_marc, "\t");
session->wrbuf = wrbuf_alloc();
- session->recheap = 0;
- session->recheap_size = 0;
return session;
}
return res;
}
+struct termlist_score **termlist(struct session *s, int *num)
+{
+ return termlist_highscore(s->termlist, num);
+}
+
struct record **show(struct session *s, int start, int *num)
{
struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *));
// FIXME -- skip initial records
+ reclist_rewind(s->reclist);
for (i = 0; i < *num; i++)
{
- recs[i] = read_recheap(s);
+ recs[i] = reclist_read_record(s->reclist);
if (!recs[i])
{
*num = i;
break;
}
}
- rewind_recheap(s);
return recs;
}
stat->num_connections = i;
}
+static void *load_cclfile(const char *fn)
+{
+ return 0;
+}
+
int main(int argc, char **argv)
{
int ret;
yaz_log_init(YLOG_DEFAULT_LEVEL|YLOG_DEBUG, "pazpar2", 0);
- while ((ret = options("c:h:", argv, argc, &arg)) != -2)
+ while ((ret = options("c:h:p:C:", argv, argc, &arg)) != -2)
{
switch (ret) {
case 0:
case 'c':
command_init(atoi(arg));
break;
+ case 'C':
+ global_parameters.ccl_filter = load_cclfile(arg);
+ break;
case 'h':
http_init(atoi(arg));
break;
+ case 'p':
+ http_set_proxyaddr(arg);
+ break;
default:
fprintf(stderr, "Usage: pazpar2 -d comport");
exit(1);
#define PAZPAR2_H
#include <yaz/pquery.h>
+#include "termlists.h"
struct record {
struct target *target;
char query[1024];
NMEM nmem;
WRBUF wrbuf;
- struct record **recheap;
- int recheap_size;
- int recheap_max;
- int recheap_scratch;
+ struct termlist *termlist;
+ struct reclist *reclist;
yaz_marc_t yaz_marc;
};
void statistics(struct session *s, struct statistics *stat);
void search(struct session *s, char *query);
struct record **show(struct session *s, int start, int *num);
+struct termlist_score **termlist(struct session *s, int *num);
#endif
--- /dev/null
+/*
+ * $Id: reclists.c,v 1.1 2006-11-24 20:29:07 quinn Exp $
+ */
+
+#include <assert.h>
+
+#include <yaz/yaz-util.h>
+
+#include "pazpar2.h"
+#include "reclists.h"
+
+struct reclist_bucket
+{
+ struct record *record;
+ struct reclist_bucket *next;
+};
+
+struct reclist
+{
+ struct reclist_bucket **hashtable;
+ int hashtable_size;
+ int hashmask;
+
+ struct record **flatlist;
+ int flatlist_size;
+ int num_records;
+ int pointer;
+
+ NMEM nmem;
+};
+
+struct record *reclist_read_record(struct reclist *l)
+{
+ if (l->pointer < l->num_records)
+ return l->flatlist[l->pointer++];
+ else
+ return 0;
+}
+
+void reclist_rewind(struct reclist *l)
+{
+ l->pointer = 0;
+}
+
+// Jenkins one-at-a-time hash (from wikipedia)
+static unsigned int hash(const unsigned char *key)
+{
+ unsigned int hash = 0;
+
+ while (*key)
+ {
+ hash += *(key++);
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
+}
+
+struct reclist *reclist_create(NMEM nmem, int numrecs)
+{
+ int hashsize = 1;
+ struct reclist *res;
+
+ assert(numrecs);
+ while (hashsize < numrecs)
+ hashsize <<= 1;
+ res = nmem_malloc(nmem, sizeof(struct reclist));
+ res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct reclist_bucket*));
+ bzero(res->hashtable, hashsize * sizeof(struct reclist_bucket*));
+ res->hashtable_size = hashsize;
+ res->nmem = nmem;
+ res->hashmask = hashsize - 1; // Creates a bitmask
+
+ res->num_records = 0;
+ res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*));
+ res->flatlist_size = numrecs;
+
+ return res;
+}
+
+void reclist_insert(struct reclist *l, struct record *record)
+{
+ unsigned int bucket;
+ struct reclist_bucket **p;
+
+ bucket = hash(record->merge_key) & l->hashmask;
+ for (p = &l->hashtable[bucket]; *p; p = &(*p)->next)
+ {
+ // We found a matching record. Merge them
+ if (!strcmp(record->merge_key, (*p)->record->merge_key))
+ {
+ struct record *existing = (*p)->record;
+ yaz_log(YLOG_LOG, "Found a matching record: %s", record->merge_key);
+ record->next_cluster = existing->next_cluster;
+ existing->next_cluster = record;
+ break;
+ }
+ }
+ if (!*p) // We made it to the end of the bucket without finding match
+ {
+ yaz_log(YLOG_DEBUG, "Added a new record: %s", record->merge_key);
+ struct reclist_bucket *new = nmem_malloc(l->nmem,
+ sizeof(struct reclist_bucket));
+ new->record = record;
+ record->next_cluster = 0;
+ new->next = 0;
+ *p = new;
+ l->flatlist[l->num_records++] = record;
+ }
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
--- /dev/null
+#ifndef RECLISTS_H
+#define RECLISTS_H
+
+struct reclist;
+
+struct reclist *reclist_create(NMEM, int numrecs);
+void reclist_insert(struct reclist *tl, struct record *record);
+struct record *reclist_read_record(struct reclist *l);
+void reclist_rewind(struct reclist *l);
+
+#endif
--- /dev/null
+/*
+ * $Id: relevance.c,v 1.1 2006-11-24 20:29:07 quinn Exp $
+ */
+
+#include <ctype.h>
+
+#include "relevance.h"
+#include "pazpar2.h"
+
+struct relevance
+{
+ struct relevance_record *records;
+ int num_records;
+ int *doc_frequency_vec;
+ int vec_len;
+ struct word_trie *wt;
+ NMEM nmem;
+};
+
+struct relevance_record
+{
+ struct record *record;
+ int *term_frequency_vec;
+};
+
+// We use this data structure to recognize terms in input records,
+// and map them to record term vectors for counting.
+struct word_trie
+{
+ struct
+ {
+ struct word_trie *child;
+ int termno;
+ } list[26];
+};
+
+static struct word_trie *create_word_trie_node(NMEM nmem)
+{
+ struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie));
+ int i;
+ for (i = 0; i < 26; i++)
+ {
+ res->list[i].child = 0;
+ res->list[i].termno = -1;
+ }
+ return res;
+}
+
+static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num)
+{
+ while (*term) {
+ int c = tolower(*term);
+ if (c < 'a' || c > 'z')
+ term++;
+ else
+ {
+ c -= 'a';
+ if (!n->list[c].child)
+ {
+ struct word_trie *new = create_word_trie_node(nmem);
+ n->list[c].child = new;
+ }
+ if (!*(++term))
+ n->list[c].termno = num;
+ else
+ word_trie_addterm(nmem, n->list[c].child, term, num);
+ break;
+ }
+ }
+
+}
+
+static struct word_trie *build_word_trie(NMEM nmem, const char **terms)
+{
+ struct word_trie *res = create_word_trie_node(nmem);
+ const char **p;
+ int i;
+
+ for (i = 1, p = terms; *p; p++, i++)
+ word_trie_addterm(nmem, res, *p, i);
+ return res;
+}
+
+struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs)
+{
+ struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance));
+ const char **p;
+ int i;
+
+ for (p = terms, i = 0; *p; p++, i++)
+ ;
+ res->vec_len = ++i;
+ res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
+ bzero(res->doc_frequency_vec, res->vec_len * sizeof(int));
+ res->nmem = nmem;
+ res->num_records = 0;
+ res->records = nmem_malloc(nmem, numrecs * sizeof(struct relevance_record *));
+ res->wt = build_word_trie(nmem, terms);
+ return res;
+}
+
+struct relevance_record *relevance_newrec(struct relevance *r, struct record *rec)
+{
+ struct relevance_record *res = nmem_malloc(r->nmem,
+ sizeof(struct relevance_record));
+ res->record = rec;
+ res->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int));
+ bzero(res->term_frequency_vec, r->vec_len * sizeof(int));
+ return res;
+}
+
+void relevance_countwords(struct relevance_record *rec, const char *words, int len)
+{
+}
+
+void relevance_donerecord(struct relevance_record *rec)
+{
+}
+
+// Prepare for a relevance-sorted read of up to num entries
+void relevance_prepare_read(struct relevance *r, int num)
+{
+}
+
+struct record *relevance_read(struct relevance *r)
+{
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
--- /dev/null
+#ifndef RELEVANCE_H
+#define RELEVANCE_H
+
+#include <yaz/yaz-util.h>
+
+#include "pazpar2.h"
+
+struct relevance;
+struct relevance_record;
+
+struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs);
+struct relevance_record *relevance_newrec(struct relevance *r, struct record *rec);
+void relevance_countwords(struct relevance_record *rec, const char *words, int len);
+void relevance_donerecord(struct relevance_record *rec);
+
+void relevance_prepare_read(struct relevance *r, int num);
+struct record *relevance_read(struct relevance *r);
+
+#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
--- /dev/null
+/*
+ * $Id: termlists.c,v 1.1 2006-11-24 20:29:07 quinn Exp $
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <yaz/yaz-util.h>
+
+#include "termlists.h"
+
+// Discussion:
+// As terms are found in incoming records, they are added to (or updated in) a
+// Hash table. When term records are updated, a frequency value is updated. At
+// the same time, a highscore is maintained for the most frequent terms.
+
+struct termlist_bucket
+{
+ struct termlist_score term;
+ struct termlist_bucket *next;
+};
+
+struct termlist
+{
+ struct termlist_bucket **hashtable;
+ int hashtable_size;
+ int hashmask;
+
+ struct termlist_score **highscore;
+ int highscore_size;
+ int highscore_num;
+ int highscore_min;
+
+ NMEM nmem;
+};
+
+
+// Jenkins one-at-a-time hash (from wikipedia)
+static unsigned int hash(const unsigned char *key)
+{
+ unsigned int hash = 0;
+
+ while (*key)
+ {
+ hash += *(key++);
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
+}
+
+struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size)
+{
+ int hashsize = 1;
+ int halfnumterms;
+ struct termlist *res;
+
+ // Calculate a hash size smallest power of 2 larger than 50% of expected numterms
+ halfnumterms = numterms >> 1;
+ if (halfnumterms < 0)
+ halfnumterms = 1;
+ while (hashsize < halfnumterms)
+ hashsize <<= 1;
+ res = nmem_malloc(nmem, sizeof(struct termlist));
+ res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*));
+ bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*));
+ res->hashtable_size = hashsize;
+ res->nmem = nmem;
+ res->hashmask = hashsize - 1; // Creates a bitmask
+
+ res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *));
+ res->highscore_size = highscore_size;
+ res->highscore_num = 0;
+ res->highscore_min = 0;
+
+ return res;
+}
+
+static void update_highscore(struct termlist *tl, struct termlist_score *t)
+{
+ int i;
+ int smallest;
+ int me = -1;
+
+ if (t->frequency < tl->highscore_min)
+ return;
+
+ smallest = 0;
+ for (i = 0; i < tl->highscore_num; i++)
+ {
+ if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency)
+ smallest = i;
+ if (tl->highscore[i] == t)
+ me = i;
+ }
+ if (tl->highscore_num)
+ tl->highscore_min = tl->highscore[smallest]->frequency;
+ if (me >= 0)
+ return;
+ if (tl->highscore_num < tl->highscore_size)
+ {
+ tl->highscore[tl->highscore_num++] = t;
+ if (t->frequency < tl->highscore_min)
+ tl->highscore_min = t->frequency;
+ }
+ else
+ {
+ if (t->frequency > tl->highscore[smallest]->frequency)
+ {
+ tl->highscore[smallest] = t;
+ }
+ }
+}
+
+void termlist_insert(struct termlist *tl, const char *term)
+{
+ unsigned int bucket;
+ struct termlist_bucket **p;
+
+ bucket = hash(term) & tl->hashmask;
+ for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next)
+ {
+ if (!strcmp(term, (*p)->term.term))
+ {
+ yaz_log(YLOG_LOG, "Found a matching term: %s", term);
+ (*p)->term.frequency++;
+ update_highscore(tl, &((*p)->term));
+ break;
+ }
+ }
+ if (!*p) // We made it to the end of the bucket without finding match
+ {
+ yaz_log(YLOG_DEBUG, "Added a new term: %s", term);
+ struct termlist_bucket *new = nmem_malloc(tl->nmem,
+ sizeof(struct termlist_bucket));
+ new->term.term = nmem_strdup(tl->nmem, term);
+ new->term.frequency = 1;
+ new->next = 0;
+ *p = new;
+ update_highscore(tl, &new->term);
+ }
+}
+
+static int compare(const void *s1, const void *s2)
+{
+ struct termlist_score **p1 = (struct termlist_score**) s1, **p2 = (struct termlist_score **) s2;
+ return (*p2)->frequency - (*p1)->frequency;
+}
+
+struct termlist_score **termlist_highscore(struct termlist *tl, int *len)
+{
+ qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare);
+ *len = tl->highscore_num;
+ return tl->highscore;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
--- /dev/null
+#ifndef TERMLISTS_H
+#define TERMLISTS_H
+
+#include <yaz/nmem.h>
+
+struct termlist_score
+{
+ char *term;
+ int frequency;
+};
+
+struct termlist;
+
+struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size);
+void termlist_insert(struct termlist *tl, const char *term);
+struct termlist_score **termlist_highscore(struct termlist *tl, int *len);
+
+#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */