article. Title and relevance sort supported by config file.
Sort switch added to demo
-/* $Id: config.c,v 1.11 2007-01-14 17:34:31 adam Exp $ */
+/* $Id: config.c,v 1.12 2007-01-15 04:34:28 quinn Exp $ */
#include <string.h>
xmlNode *n;
struct conf_service *r = nmem_malloc(nmem, sizeof(struct conf_service));
int md_node = 0;
+ int sk_node = 0;
r->num_sortkeys = r->num_metadata = 0;
- // Allocate array of conf metadata structs, if necessary
+ // Allocate array of conf metadata and sortkey tructs, if necessary
for (n = node->children; n; n = n->next)
if (n->type == XML_ELEMENT_NODE && !strcmp(n->name, "metadata"))
{
{
if (!strcmp(type, "generic"))
md->type = Metadata_type_generic;
- else if (!strcmp(type, "integer"))
- md->type = Metadata_type_integer;
else if (!strcmp(type, "year"))
md->type = Metadata_type_year;
else
else
md->type = Metadata_type_generic;
- if (sortkey)
- {
- if (!strcmp(sortkey, "no"))
- md->sortkey = Metadata_sortkey_no;
- else if (!strcmp(sortkey, "numeric"))
- md->sortkey = Metadata_sortkey_numeric;
- else if (!strcmp(sortkey, "range"))
- md->sortkey = Metadata_sortkey_range;
- else if (!strcmp(sortkey, "skiparticle"))
- md->sortkey = Metadata_sortkey_skiparticle;
- else
- {
- yaz_log(YLOG_FATAL, "Unknown sortkey in metadata element: %s", sortkey);
- return 0;
- }
- }
- else
- md->sortkey = Metadata_sortkey_no;
-
if (merge)
{
if (!strcmp(merge, "no"))
else
md->merge = Metadata_merge_no;
+ if (sortkey && strcmp(sortkey, "no"))
+ {
+ struct conf_sortkey *sk = &r->sortkeys[sk_node];
+ if (md->merge == Metadata_merge_no)
+ {
+ yaz_log(YLOG_FATAL, "Can't specify sortkey on a non-merged field");
+ return 0;
+ }
+ if (!strcmp(sortkey, "numeric"))
+ sk->type = Metadata_sortkey_numeric;
+ else if (!strcmp(sortkey, "skiparticle"))
+ sk->type = Metadata_sortkey_skiparticle;
+ else
+ {
+ yaz_log(YLOG_FATAL, "Unknown sortkey in metadata element: %s", sortkey);
+ return 0;
+ }
+ sk->name = md->name;
+ md->sortkey_offset = sk_node;
+ sk_node++;
+ }
+ else
+ md->sortkey_offset = -1;
+
xmlFree(name);
xmlFree(brief);
xmlFree(sortkey);
#include <libxslt/transform.h>
#include <libxslt/xsltutils.h>
-enum conf_sortkey_types
+enum conf_sortkey_type
{
- Metadata_sortkey_no, // This is not to be used as a sortkey
+ Metadata_sortkey_relevance,
Metadata_sortkey_numeric, // Standard numerical sorting
- Metadata_sortkey_range, // Range sorting (pick lowest or highest)
Metadata_sortkey_skiparticle, // Skip leading article when sorting
- Metadata_sortkey_string
+ Metadata_sortkey_string // Flat string
};
// Describes known metadata elements and how they are to be manipulated
int termlist;// Is this field to be treated as a termlist for browsing?
int rank; // Rank factor. 0 means don't use this field for ranking, 1 is default
// values >1 give additional significance to a field
+ int sortkey_offset; // -1 if it's not a sortkey, otherwise index
+ // into service/record_cluster->sortkey array
enum
{
Metadata_type_generic, // Generic text field
- Metadata_type_integer, // Integer type
- Metadata_type_year // A year
+ Metadata_type_number, // A number
+ Metadata_type_year // A number
} type;
- enum conf_sortkey_types sortkey;
enum
{
Metadata_merge_no, // Don't merge
struct conf_sortkey
{
char *name;
- enum conf_sortkey_types type;
+ enum conf_sortkey_type type;
};
+// It is conceivable that there will eventually be several 'services' offered
+// from one server, with separate configuration -- possibly more than one services
+// associated with the same port. For now, however, only a single service is possible.
struct conf_service
{
int num_metadata;
/*
- * $Id: http_command.c,v 1.21 2007-01-12 23:41:35 quinn Exp $
+ * $Id: http_command.c,v 1.22 2007-01-15 04:34:28 quinn Exp $
*/
#include <stdio.h>
wrbuf_puts(w, md->data.text);
break;
case Metadata_type_year:
- wrbuf_printf(w, "%d", md->data.year.year1);
- if (md->data.year.year1 != md->data.year.year2)
- wrbuf_printf(w, "-%d", md->data.year.year2);
+ wrbuf_printf(w, "%d", md->data.number.min);
+ if (md->data.number.min != md->data.number.max)
+ wrbuf_printf(w, "-%d", md->data.number.max);
break;
default:
wrbuf_puts(w, "[can't represent]");
struct http_response *rs = c->response;
struct http_session *s = locate_session(rq, rs);
struct record_cluster **rl;
- NMEM nmem_show;
+ struct reclist_sortparms *sp;
char *start = http_argbyname(rq, "start");
char *num = http_argbyname(rq, "num");
+ char *sort = http_argbyname(rq, "sort");
int startn = 0;
int numn = 20;
int total;
startn = atoi(start);
if (num)
numn = atoi(num);
+ if (!sort)
+ sort = "relevance";
+ if (!(sp = reclist_parse_sortparms(c->nmem, sort)))
+ {
+ error(rs, "500", "Bad sort parameters", 0);
+ return;
+ }
- nmem_show = nmem_create();
- rl = show(s->psession, startn, &numn, &total, &total_hits, nmem_show);
+ rl = show(s->psession, sp, startn, &numn, &total, &total_hits, c->nmem);
wrbuf_rewind(c->wrbuf);
wrbuf_puts(c->wrbuf, "<show>\n<status>OK</status>\n");
wrbuf_puts(c->wrbuf, "</show>\n");
rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf));
http_send_response(c);
- nmem_destroy(nmem_show);
}
static void show_records_ready(void *data)
static void cmd_info(struct http_channel *c)
{
char yaz_version_str[20];
- struct http_request *rq = c->request;
struct http_response *rs = c->response;
wrbuf_rewind(c->wrbuf);
-/* $Id: pazpar2.c,v 1.29 2007-01-14 17:34:31 adam Exp $ */
+/* $Id: pazpar2.c,v 1.30 2007-01-15 04:34:28 quinn Exp $ */
#include <stdlib.h>
#include <stdio.h>
}
}
-char *normalize_mergekey(char *buf)
+char *normalize_mergekey(char *buf, int skiparticle)
{
char *p = buf, *pout = buf;
+ if (skiparticle)
+ {
+ char firstword[64];
+ char articles[] = "the den der die des an a "; // must end in space
+
+ while (*p && !isalnum(*p))
+ p++;
+ pout = firstword;
+ while (*p && *p != ' ' && pout - firstword < 62)
+ *(pout++) = tolower(*(p++));
+ *(pout++) = ' ';
+ *(pout++) = '\0';
+ if (!strstr(articles, firstword))
+ p = buf;
+ pout = buf;
+ }
+
while (*p)
{
while (*p && !isalnum(*p))
mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey);
xmlFree(mergekey);
- normalize_mergekey(mergekey_norm);
+ normalize_mergekey(mergekey_norm, 0);
cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged);
if (!cluster)
if (!strcmp(n->name, "metadata"))
{
struct conf_metadata *md = 0;
+ struct conf_sortkey *sk = 0;
struct record_metadata **wheretoput, *newm;
int imeta;
int first, last;
if (!strcmp(type, service->metadata[imeta].name))
{
md = &service->metadata[imeta];
+ if (md->sortkey_offset >= 0)
+ sk = &service->sortkeys[md->sortkey_offset];
break;
}
if (!md)
{
if (!*wheretoput ||
strlen(newm->data.text) > strlen((*wheretoput)->data.text))
- *wheretoput = newm;
+ {
+ *wheretoput = newm;
+ if (sk)
+ {
+ char *s = nmem_strdup(se->nmem, newm->data.text);
+ if (!cluster->sortkeys[md->sortkey_offset])
+ cluster->sortkeys[md->sortkey_offset] =
+ nmem_malloc(se->nmem, sizeof(union data_types));
+ normalize_mergekey(s,
+ (sk->type == Metadata_sortkey_skiparticle));
+ cluster->sortkeys[md->sortkey_offset]->text = s;
+ yaz_log(YLOG_LOG, "SK Longest: %s", s);
+ }
+ }
}
else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
{
if (!*wheretoput)
{
*wheretoput = newm;
- (*wheretoput)->data.year.year1 = first;
- (*wheretoput)->data.year.year2 = last;
+ (*wheretoput)->data.number.min = first;
+ (*wheretoput)->data.number.max = last;
+ if (sk)
+ cluster->sortkeys[md->sortkey_offset] = &newm->data;
}
else
{
- if (first < (*wheretoput)->data.year.year1)
- (*wheretoput)->data.year.year1 = first;
- if (last > (*wheretoput)->data.year.year2)
- (*wheretoput)->data.year.year2 = last;
+ if (first < (*wheretoput)->data.number.min)
+ (*wheretoput)->data.number.min = first;
+ if (last > (*wheretoput)->data.number.max)
+ (*wheretoput)->data.number.max = last;
+ if (sk)
+ {
+ union data_types *sdata = cluster->sortkeys[md->sortkey_offset];
+ sdata->number.min = first;
+ sdata->number.max = last;
+ }
+ }
+ if (sk)
+ {
+ union data_types *sdata = cluster->sortkeys[md->sortkey_offset];
+ yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max);
}
}
else
return 0;
}
-struct record_cluster **show(struct session *s, int start, int *num, int *total,
- int *sumhits, NMEM nmem_show)
+struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start,
+ int *num, int *total, int *sumhits, NMEM nmem_show)
{
struct record_cluster **recs = nmem_malloc(nmem_show, *num
* sizeof(struct record_cluster *));
+ struct reclist_sortparms *spp;
int i;
#if USE_TIMING
yaz_timing_t t = yaz_timing_create();
#endif
- relevance_prepare_read(s->relevance, s->reclist);
+
+ for (spp = sp; spp; spp = spp->next)
+ if (spp->type == Metadata_sortkey_relevance)
+ {
+ relevance_prepare_read(s->relevance, s->reclist);
+ break;
+ }
+ reclist_sort(s->reclist, sp);
*total = s->reclist->num_records;
*sumhits = s->total_hits;
#include "termlists.h"
#include "relevance.h"
+#include "reclists.h"
#include "eventl.h"
#include "config.h"
struct client;
+union data_types {
+ char *text;
+ struct {
+ int min;
+ int max;
+ } number;
+};
+
struct record_metadata {
- union {
- char *text;
- struct {
- int year1;
- int year2;
- } year;
- } data;
+ union data_types data;
struct record_metadata *next; // next item of this name
};
struct record {
struct client *client;
struct record_metadata **metadata; // Array mirrors list of metadata fields in config
+ union data_types **sortkeys; // Array mirrors list of sortkey fields in config
struct record *next; // Next in cluster of merged records
};
struct record_cluster
{
struct record_metadata **metadata; // Array mirrors list of metadata fields in config
+ union data_types **sortkeys;
char *merge_key;
int relevance;
int *term_frequency_vec;
int load_targets(struct session *s, const char *fn);
void statistics(struct session *s, struct statistics *stat);
char *search(struct session *s, char *query);
-struct record_cluster **show(struct session *s, int start, int *num, int *total,
- int *sumhits, NMEM nmem_show);
+struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start,
+ int *num, int *total, int *sumhits, NMEM nmem_show);
struct record_cluster *show_single(struct session *s, int id);
struct termlist_score **termlist(struct session *s, const char *name, int *num);
void session_set_watch(struct session *s, int what, session_watchfun fun, void *data);
/*
- * $Id: reclists.c,v 1.6 2007-01-10 10:04:23 adam Exp $
+ * $Id: reclists.c,v 1.7 2007-01-15 04:34:28 quinn Exp $
*/
#include <assert.h>
extern struct parameters global_parameters;
+// Not threadsafe
+static struct reclist_sortparms *sortparms = 0;
+
struct reclist_bucket
{
struct record_cluster *record;
struct reclist_bucket *next;
};
+struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms)
+{
+ struct reclist_sortparms *res = 0;
+ struct reclist_sortparms **rp = &res;
+ struct conf_service *service = config->servers->service;
+
+ if (strlen(parms) > 256)
+ return 0;
+ while (*parms)
+ {
+ char parm[256];
+ char *pp;
+ const char *cpp;
+ int increasing;
+ int i;
+ int offset;
+ enum conf_sortkey_type type;
+ struct reclist_sortparms *new;
+
+ if (!(cpp = strchr(parms, ',')))
+ cpp = parms + strlen(parms);
+ strncpy(parm, parms, cpp - parms);
+ parm[cpp-parms] = '\0';
+
+ if ((pp = strchr(parm, ':')))
+ {
+ increasing = pp[1] == '1' ? 1 : 0;
+ *pp = '\0';
+ }
+ else
+ increasing = 0;
+ if (!strcmp(parm, "relevance"))
+ {
+ type = Metadata_sortkey_relevance;
+ offset = -1;
+ }
+ else
+ {
+ for (i = 0; i < service->num_sortkeys; i++)
+ {
+ struct conf_sortkey *sk = &service->sortkeys[i];
+ if (!strcmp(sk->name, parm))
+ {
+ type = sk->type;
+ if (type == Metadata_sortkey_skiparticle)
+ type = Metadata_sortkey_string;
+ break;
+ }
+ }
+ if (i >= service->num_sortkeys)
+ {
+ yaz_log(YLOG_FATAL, "Bad sortkey: %s", parm);
+ return 0;
+ }
+ else
+ offset = i;
+ }
+ new = *rp = nmem_malloc(nmem, sizeof(struct reclist_sortparms));
+ new->next = 0;
+ new->offset = offset;
+ new->type = type;
+ new->increasing = increasing;
+ rp = &new->next;
+ if (*(parms = cpp))
+ parms++;
+ }
+ return res;
+}
+
+static int reclist_cmp(const void *p1, const void *p2)
+{
+ struct record_cluster *r1 = (*(struct record_cluster**) p1);
+ struct record_cluster *r2 = (*(struct record_cluster**) p2);
+ struct reclist_sortparms *s;
+
+ for (s = sortparms; s; s = s->next)
+ {
+ int res;
+ switch (s->type)
+ {
+ case Metadata_sortkey_relevance:
+ res = r2->relevance - r1->relevance;
+ break;
+ case Metadata_sortkey_string:
+ res = strcmp(r2->sortkeys[s->offset]->text, r1->sortkeys[s->offset]->text);
+ break;
+ case Metadata_sortkey_numeric:
+ res = 0;
+ break;
+ default:
+ yaz_log(YLOG_FATAL, "Bad sort type: %d", s->type);
+ exit(1);
+ }
+ if (res)
+ {
+ if (s->increasing)
+ res *= -1;
+ return res;
+ }
+ }
+ return 0;
+}
+
+void reclist_sort(struct reclist *l, struct reclist_sortparms *parms)
+{
+ sortparms = parms;
+ qsort(l->flatlist, l->num_records, sizeof(struct record_cluster*), reclist_cmp);
+ reclist_rewind(l);
+}
+
struct record_cluster *reclist_read_record(struct reclist *l)
{
if (l->pointer < l->num_records)
newc->relevance = 0;
newc->term_frequency_vec = 0;
newc->recid = (*total)++;
- newc->metadata = 0;
newc->metadata = nmem_malloc(l->nmem,
sizeof(struct record_metadata*) * service->num_metadata);
memset(newc->metadata, 0, sizeof(struct record_metadata*) * service->num_metadata);
+ newc->sortkeys = nmem_malloc(l->nmem,
+ sizeof(struct record_metadata*) * service->num_sortkeys);
+ memset(newc->sortkeys, 0, sizeof(union data_types*) * service->num_sortkeys);
*p = new;
l->flatlist[l->num_records++] = newc;
#ifndef RECLISTS_H
#define RECLISTS_H
+#include "config.h"
+
struct reclist
{
struct reclist_bucket **hashtable;
NMEM nmem;
};
+// This is a recipe for sorting. First node in list has highest priority
+struct reclist_sortparms
+{
+ int offset;
+ enum conf_sortkey_type type;
+ int increasing;
+ struct reclist_sortparms *next;
+};
+
struct reclist *reclist_create(NMEM, int numrecs);
struct record_cluster *reclist_insert(struct reclist *tl, struct record *record,
char *merge_key, int *total);
+void reclist_sort(struct reclist *l, struct reclist_sortparms *parms);
struct record_cluster *reclist_read_record(struct reclist *l);
void reclist_rewind(struct reclist *l);
+struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms);
#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
/*
- * $Id: relevance.c,v 1.7 2007-01-10 10:04:23 adam Exp $
+ * $Id: relevance.c,v 1.8 2007-01-15 04:34:28 quinn Exp $
*/
#include <ctype.h>
r->doc_frequency_vec[0]++;
}
+#ifdef GAGA
#ifdef FLOAT_REL
static int comp(const void *p1, const void *p2)
{
return (*r2)->relevance - (*r1)->relevance;
}
#endif
+#endif
-// Prepare for a relevance-sorted read of up to num entries
+// Prepare for a relevance-sorted read
void relevance_prepare_read(struct relevance *rel, struct reclist *reclist)
{
int i;
}
rec->relevance = (int) (relevance * 100000);
}
+#ifdef GAGA
qsort(reclist->flatlist, reclist->num_records, sizeof(struct record*), comp);
+#endif
reclist->pointer = 0;
xfree(idfvec);
}
top: 30px;
}
+div.search span.sort {
+position: absolute;
+right: 500;
+top: 50;
+}
+
div.search div {
position: absolute;
left: 225px;
<?php
-/* $Id: page_segments.phpi,v 1.9 2007-01-12 20:11:51 quinn Exp $
+/* $Id: page_segments.phpi,v 1.10 2007-01-15 04:34:29 quinn Exp $
* ---------------------------------------------------
* Page segments
*/
<input type="hidden" name="action_type" id="action_type"
value="search"/>
</form>
+ <p>
+ <class class="sort" id="sortselect"></class>
</div>
</div>
<div class="main">
-/* $Id: search.js,v 1.22 2007-01-12 20:11:51 quinn Exp $
+/* $Id: search.js,v 1.23 2007-01-15 04:34:29 quinn Exp $
* ---------------------------------------------------
* Javascript container
*/
var page_window = 5; // Number of pages prior to and after the current page
var facet_list;
var cur_facet = 0;
+var cur_sort = "relevance";
+var searched = 0;
function initialize ()
{
facet_list = get_available_facets();
start_session();
session_check();
+ set_sort();
}
setTimeout(ping_session, 50000);
}
-function targets_loaded()
-{
- if (xloadTargets.readyState != 4)
- return;
- var xml = xloadTargets.responseXML;
- var error = xml.getElementsByTagName("error");
- if (error[0])
- {
- var msg = error[0].childNodes[0].nodeValue;
- alert(msg);
- return;
- }
-
- assign_text(document.getElementById("targetstatus"), 'Targets loaded');
-}
-
-function load_targets()
-{
- var fn = document.getElementById("targetfilename").value;
- clearTimeout(termtimer);
- clearTimeout(searchtimer);
- clearTimeout(stattimer);
- clearTimeout(showtimer);
- clear_cell(document.getElementById("stat"));
- if (!fn)
- {
- alert("Please enter a target definition file name");
- return;
- }
- var url="search.pz2?" +
- "command=load" +
- "&session=" + session +
- "&name=" + fn;
- assign_text(document.getElementById("targetstatus"), 'Loading targets...');
- xloadTargets = GetXmlHttpObject();
- xloadTargets.onreadystatechange=targets_loaded;
- xloadTargets.open("GET", url);
- xloadTargets.send(null);
-}
-
-
function update_action (new_action) {
document.search.action_type.value = new_action;
}
append_text(cell, text);
}
+function set_sort_opt(n, opt, str)
+{
+ var txt = document.createTextNode(str);
+ if (opt == cur_sort)
+ n.appendChild(txt);
+ else
+ {
+ var a = document.createElement('a');
+ a.appendChild(txt);
+ a.setAttribute('href', "");
+ a.setAttribute('onclick', "set_sort('" + opt + "'); return false");
+ n.appendChild(a);
+ }
+}
+
+function set_sort(sort)
+{
+ if (sort && sort != cur_sort)
+ {
+ cur_sort = sort;
+ if (searched)
+ check_search();
+ }
+
+ var t = document.getElementById("sortselect");
+ clear_cell(t);
+ t.appendChild(document.createTextNode("Sort results by: "));
+ set_sort_opt(t, 'relevance', 'Relevance');
+ t.appendChild(document.createTextNode(" or "));
+ set_sort_opt(t, 'title:1', 'Title');
+}
function show_records()
{
}
else
{
-
var total = Number(xml.getElementsByTagName('total')[0].childNodes[0].nodeValue);
var merged = Number(xml.getElementsByTagName('merged')[0].childNodes[0].nodeValue);
var start = Number(xml.getElementsByTagName('start')[0].childNodes[0].nodeValue);
' to ' + (start + num) + ' of ' +
merged + ' (total hits: ' +
total + ')');
+ searched = 1;
interval.className = 'results';
record_container.className = 'records';
"&start=" + document.search.startrec.value +
"&num=" + recstoshow +
"&session=" + session +
+ "&sort=" + cur_sort +
"&block=1";
xshow = GetXmlHttpObject();
xshow.onreadystatechange=show_records;
stattimer = 0;
clearTimeout(showtimer);
showtimer = 0;
- if (!targets_loaded)
- {
- alert("Please load targets first");
- return;
- }
var query = escape(document.getElementById('query').value);
var url = "search.pz2?" +
"command=search" +
document.search.startrec.value = 0;
}
-
function session_encode ()
{
var i;
-/* $Id: search.js,v 1.9 2007-01-08 18:32:35 quinn Exp $
+/* $Id: search.js,v 1.10 2007-01-15 04:34:29 quinn Exp $
* ---------------------------------------------------
* Javascript container
*/
"&start=" + document.search.startrec.value +
"&num=" + recstoshow +
"&session=" + session +
+ "&sort=title:1" +
"&block=1";
xshow = GetXmlHttpObject();
xshow.onreadystatechange=show_records;