X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=doc%2Fpazpar2_conf.xml;h=4e0498045f270929025dd6d93d18e24c94e9c004;hb=e954f104fba0c6ef9142f09042e0b7e7f73d7388;hp=6deafa28ee008f4b10f1df061b31cbe67025cb1c;hpb=bb1af0d3e894c72d2392108890cb20db173f9f13;p=pazpar2-moved-to-github.git

diff --git a/doc/pazpar2_conf.xml b/doc/pazpar2_conf.xml
index 6deafa2..4e04980 100644
--- a/doc/pazpar2_conf.xml
+++ b/doc/pazpar2_conf.xml
@@ -8,7 +8,7 @@
      <!ENTITY % common SYSTEM "common/common.ent">
      %common;
 ]>
-<!-- $Id: pazpar2_conf.xml,v 1.2 2007-01-12 15:31:30 adam Exp $ -->
+<!-- $Id: pazpar2_conf.xml,v 1.7 2007-01-26 18:53:55 quinn Exp $ -->
 <refentry id="pazpar2_conf">
  <refentryinfo>
   <productname>Pazpar2</productname>
@@ -31,22 +31,323 @@
  </refsynopsisdiv>
  
  <refsect1><title>DESCRIPTION</title>
-  <para></para>
+   <para>
+     The pazpar2 configuration file, together with any referenced XSLT files,
+     govern pazpar2's behavior as a client, and control the normalization and
+     extraction of data elements from incoming result records, for the
+     purposes of merging, sorting, facet analysis, and display.
+    </para>
+
+    <para>
+      The file is specified using the option -f on the pazpar2 command line.
+      There is not presently a way to reload the configuration file without
+      restarting pazpar2, although this will most likely be added some time
+      in the future.
+    </para>
  </refsect1>
+
+ <refsect1><title>FORMAT</title>
+   <para>
+     The configuration file is XML-structured. It must be valid XML. All
+     elements specific to pazpar2 should belong to the namespace
+     "http://www.indexdata.com/pazpar2/1.0" (this is assumed in the
+     following examples). The root element is named 'pazpar2'. Under the
+     root element are a number of elements which group categories of
+     information. The categories are described below.
+    </para>
+
+    <refsect2 id="config-server"><title>server</title>
+      <para>
+        This section governs overall behavior of the client. The data
+	elements are described below.
+      </para>
+      <variablelist> <!-- level 1 -->
+        <varlistentry>
+	  <term>listen</term>
+	  <listitem>
+	    <para>
+	      Configures the webservice -- this controls how you can connect
+	      to pazpar2 from your browser or server-side code. The
+	      attributes 'host' and 'port' control the binding of the
+	      server. The 'host' attribute can be used to bind the server to
+	      a secondary IP address of your system, enabling you to run
+	      pazpar2 on port 80 alongside a conventional web server. You
+	      can override this setting on the command lineusing the option -h.
+	    </para>
+	  </listitem>
+	</varlistentry>
+
+	<varlistentry>
+	  <term>proxy</term>
+	  <listitem>
+	    <para>
+	      If this item is given, pazpar2 will forward all incoming HTTP
+	      requests that do not contain the filename 'search.pz2' to the
+	      host and port specified using the 'host' and 'port'
+	      attributes. This functionality is crucial if you wish to use
+	      pazpar2 in conjunction with browser-based code (JS, Flash,
+	      applets, etc.) which operates in a security sandbox. Such code
+	      can only connect to the same server from which the enclosing
+	      HTML page originated. Pazpar2s proxy functionality enables you
+	      to host all of the main pages (plus images, CSS, etc) of your
+	      application on a conventional webserver, while efficiently
+	      processing webservice requests for metasearch status, results,
+	      etc.
+	    </para>
+	  </listitem>
+	</varlistentry>
+
+	<varlistentry>
+	  <term>service</term>
+	  <listitem>
+	    <para>
+	      This nested element controls the behavior of pazpar2 with
+	      respect to your data model. In pazpar2, incoming records are
+	      normalized, using XSLT, into an internal representation (see
+	      the <link
+	      linkend="config-retrievalprofile">retrievalprofile</link> secion.
+	      The 'service' section controls the further processing and
+	      extraction of data from the internal representation, primarily
+	      through the 'metdata' sub-element.
+	    </para>
+
+	    <variablelist> <!-- Level 2 -->
+	      <varlistentry><term>metadata</term>
+	        <listitem>
+		  <para>
+		    One of these elements is required for every data element in
+		    the internal representation of the record (see
+		    <xref linkend="data_model"/>. It governs
+		    subsequent processing as pertains to sorting, relevance
+		    ranking, merging, and display of data elements. It supports
+		    the following attributes:
+		  </para>
+
+		  <variablelist> <!-- level 3 -->
+		    <varlistentry><term>name</term>
+		      <listitem>
+			<para>
+			  This is the name of the data element. It is matched
+			  against the 'type' attribute of the 'metadata' element
+			  in the normalized record. A warning is produced if
+			  metdata elements with an unknown name are found in the
+			  normalized record. This name is also used to represent
+			  data elements in the records returned by the
+			  webservice API, and to name sort lists and browse
+			  facets.
+			</para>
+		      </listitem>
+		    </varlistentry>
+
+		    <varlistentry><term>type</term>
+		     <listitem>
+			<para>
+			  The type of data element. This value governs any
+			  normalization or special processing that might take
+			  place on an element. Possible values are 'generic'
+			  (basic string), 'year' (a range is computed if
+			  multiple years are found in the record). Note: This
+			  list is likely to increase in the future.
+			</para>
+		      </listitem>
+		    </varlistentry>
+
+		    <varlistentry><term>brief</term>
+		      <listitem>
+			<para>
+			  If this is set to 'yes', then the data element is
+			  includes in brief records in the webservice API. Note
+			  that this only makes sense for metadata elements that
+			  are merged (see below). The default value is 'no'.
+			</para>
+		      </listitem>
+		    </varlistentry>
+
+		    <varlistentry><term>sortkey</term>
+		      <listitem>
+			<para>
+			  Specifies that this data element is to be used for
+			  sorting. The possible values are 'numeric' (numeric
+			  value), 'skiparticle' (string; skip common, leading
+			  articles), and 'no' (no sorting). The default value is
+			  'no'.
+			</para>
+		      </listitem>
+		    </varlistentry>
+
+		    <varlistentry><term>rank</term>
+		      <listitem>
+			<para>
+			  Specifies that this element is to be used to help rank
+			  records against the user's query (when ranking is
+			  requested). The value is an integer, used as a
+			  multiplier against the basic TF*IDF score. A value of
+			  1 is the base, higher values give additional weight to
+			  elements of this type. The default is '0', which
+			  excludes this element from the rank calculation.
+			</para>
+		      </listitem>
+		    </varlistentry>
+
+		    <varlistentry><term>termlist</term>
+		      <listitem>
+			<para>
+			  Specifies that this element is to be used as a
+			  termlist, or browse facet. Values are tabulated from
+			  incoming records, and a highscore of values (with
+			  their associated frequency) is made available to the
+			  client through the webservice API. The possible values
+			  are 'yes' and 'no' (default).
+			</para>
+		      </listitem>
+		    </varlistentry>
+
+		    <varlistentry><term>merge</term>
+		      <listitem>
+			<para>
+			  This governs whether, and how elements are extracted
+			  from individual records and merged into cluster
+			  records. The possible values are: 'unique' (include
+			  all unique elements), 'longest' (include only the
+			  longest element (strlen), 'range' (calculate a range
+			  of values across al matching records), 'all' (include
+			  all elements), or 'no' (don't merge; this is the
+			  default);
+			</para>
+		      </listitem>
+		    </varlistentry>
+		  </variablelist> <!-- attributes to metadata -->
+
+		</listitem>
+	      </varlistentry>
+	    </variablelist>     <!-- Data elements in service directive -->
+	  </listitem>
+	</varlistentry>
+      </variablelist>           <!-- Data elements in server directive -->
+    </refsect2>
+
+    <refsect2 id="config-queryprofile"><title>queryprofile</title>
+      <para>
+        At the moment, this directive is ignored; there is one global
+	CCL-mapping file which governs the mapping of queries to Z39.50
+	type-1. This file is located in etc/default.bib. This will change
+	shortly.
+      </para>
+    </refsect2>
+
+    <refsect2 id="config_retrievalprofile"><title>retrievalprofile</title>
+      <para>
+	Note: In the present version, there is a single retrieval
+	profile. However, in a future release, it will be possible to
+	associate unique retrieval profiles with different targets, or to
+	generate retrieval profiles using XSLT from the ZeeRex description of
+	a target.
+      </para>
+      
+      <para>
+        The following data elements are recognized for the retrievalprofile
+	directive:
+      </para>
+      
+      <variablelist>
+        <varlistentry><term>requestsyntax</term>
+	  <listitem>
+	    <para>
+	      This element specifies the request syntax to be used in queries. It only
+	      makes sense for Z39.50-type targets.
+	    </para>
+	  </listitem>
+	</varlistentry>
+
+	<varlistentry><term>nativesyntax</term>
+	  <listitem>
+	    <para>
+	      This element specifies the native syntax and encoding of the
+	      result records. The default is XML. The following attributes
+	      are defined:
+	    </para>
+	    <variablelist>
+	      <varlistentry><term>name</term>
+	        <listitem>
+		  <para>
+		    The name of the syntax. Currently recognized values are
+		    'iso2709' (MARC), and 'xml'.
+		  </para>
+		</listitem>
+	      </varlistentry>
+
+	      <varlistentry><term>format</term>
+	        <listitem>
+		  <para>
+		    The format, or schema, to be expected. Default is
+		    'marc21'.
+		  </para>
+		</listitem>
+	      </varlistentry>
+
+	      <varlistentry><term>encoding</term>
+	        <listitem>
+		  <para>
+		    The encoding of the response record. Typical values for
+		    MARC records are 'marc8' (general MARC-8), 'marc8s'
+		    (MARC-8, but maps to precomposed UTF-8 characters, more
+		    suitable for use in web browsers), 'latin1'.
+		  </para>
+		</listitem>
+	      </varlistentry>
+
+	      <varlistentry><term>mapto</term>
+	        <listitem>
+		  <para>
+		    Specifies the flavor of MARCXML to map results to.
+		    Default is 'marcxml'. 'marcxchange' is also possible, and
+		    useful for Danish DANMARC records.
+		  </para>
+		</listitem>
+	      </varlistentry>
+	    </variablelist> <!-- parameters to nativesyntax directive -->
+	  </listitem>
+	</varlistentry>
+      </variablelist> <!-- sub-elements in retrievalprofile -->
+    </refsect2>
+
+  </refsect1>
  
- <refsect1><title>OPTIONS</title>
-  <para></para>
- </refsect1>
+ <refsect1><title>EXAMPLE</title>
+  <para>Below is a working example configuration:
+  <screen><![CDATA[
+<?xml version="1.0" encoding="UTF-8"?>
+<pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
 
- <refsect1><title>EXAMPLES</title>
-  <para></para>
- </refsect1> 
+<server>
+  <listen port="9004"/>
+  <proxy host="us1.indexdata.com"/>
 
- <refsect1><title>FILES</title>
-  <para></para>
- </refsect1>
-</refentry>
+  <service>
+    <metadata name="title" brief="yes" sortkey="skiparticle" merge="longest" rank="6"/>
+    <metadata name="isbn" merge="unique"/>
+    <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
+	    termlist="yes"/>
+    <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
+    <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
+    <metadata name="url" merge="unique"/>
+  </service>
+</server>
+
+<queryprofile/>  <!-- Like a CCL profile++ . Can optionally refer to XSLT to 
+       convert ZeeRex into queryprofile. Multiple profiles can exist.  -->
 
+<retrievalprofile>
+  <requestsyntax>marc21</requestsyntax>
+  <nativesyntax name="iso2709" format="marc21" encoding="marc8s" mapto="marcxml"/>
+  <map type="xslt" stylesheet="marc21.xsl"/>
+</retrievalprofile>
+
+</pazpar2>
+]]></screen>
+   </para>
+ </refsect1> 
+</refentry>
 <!-- Keep this comment at the end of the file
 Local variables:
 mode: sgml